夜莺v7添加云之家通知渠道工作记录
极不推荐生产环境使用下述告警推送方式!!!整套监控系统由前任建立,无文档情况下接手,本文仅为工作记录。
一、环境梳理
1、首先,数据源来自Prometheus。监听端口7746,启动参数/monitor/prometheus/prometheus --web.listen-address=:7746 --config.file=/monitor/prometheus/prometheus.yml --web.enable-remote-write-receiver --web.enable-admin-api
[root@monitor alarmweb]# netstat -tunlp |grep 7746
tcp6 0 0 :::7746 :::* LISTEN 4359/prometheus
[root@monitor alarmweb]# ps aux |grep 4359
root 4359 2.0 2.0 3640152 329784 ? Sl 2025 2647:18 /monitor/prometheus/prometheus --web.listen-address=:7746 --config.file=/monitor/prometheus/prometheus.yml --web.enable-remote-write-receiver --web.enable-admin-api
可以看到Prometheus的工作目录在/monitor/prometheus
2、其次,夜莺本体监听端口80,启动参数/monitor/n9e/n9e -configs /monitor/n9e/etc
[root@monitor alarmweb]# netstat -tunlp |grep 80
tcp 0 0 0.0.0.0:18000 0.0.0.0:* LISTEN 5649/python
tcp 0 0 0.0.0.0:18001 0.0.0.0:* LISTEN 24294/python
tcp6 0 0 :::80 :::* LISTEN 2672/n9e
[root@monitor alarmweb]# ps aux |grep 2672
root 2672 1.4 0.6 812840 104068 ? Sl 2025 6025:14 /monitor/n9e/n9e -configs /monitor/n9e/etc
root 27479 0.0 0.0 112840 2308 pts/1 S+ 14:16 0:00 grep --color=auto 2672
可以看到夜莺的工作目录在/monitor/n9e
3、从夜莺管理平台上看到,所有的告警事件都会抛给一个回调地址:http://monitor.baiyyy.com:18000/webhook/event

查看一下是什么进程在监听18000端口。
[root@monitor alarmweb]# netstat -tunlp |grep 18000
tcp 0 0 0.0.0.0:18000 0.0.0.0:* LISTEN 5649/python
[root@monitor alarmweb]# ps aux |grep 5649
root 5649 0.0 0.8 2307712 136588 ? S 2025 56:43 python /zscript/alarmweb/webmain.py
root 28636 0.0 0.0 112840 2356 pts/1 S+ 14:21 0:00 grep --color=auto 5649
打开/zscript/alarmweb/webmain.py,内容如下:
import requests
import json
import sys
from flask import Flask,request
import time
app = Flask(__name__)
# 企业微信机器人发送纯文本
def send_message(wkey,alm):
wx_url = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key='+wkey+'&type'
data = {"msgtype": "text", "text": {"content": alm}}
r = requests.post(url=wx_url, data=json.dumps(data)) # url中的xxx是你的机器人webhook对应片段,不要轻易泄露出去否则任何人都可以控制你的机器人哦
return str(r.status_code)
# @app.route("/")
# def hello_world():
# return "<p>基础架构告警</p>"
@app.route("/webhook/event",methods=['POST'])
def event():
json_data = json.loads(request.data)
eid = str(json_data.get("id"))
group_wkey=str(json_data.get("notify_groups_obj")[0].get("note"))
recovered =str(json_data.get("is_recovered"))
severity=str(json_data.get("severity")) #告警事件级别
rule_name=json_data.get("rule_name") #告警规则名称
trigger_tim=time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(json_data.get("trigger_tim"))) #告警时间
trigger_value=json_data.get("trigger_value") #告警触发值
tags=str(json_data.get("tags")) #告警详情
with open("/zscript/alarmweb/jsondata", 'a') as f:
f.write(json.dumps(json_data, indent=4))
with open("/zscript/alarmweb/alarmlog", 'a') as f:
f.write(rule_name+' '+trigger_tim+' '+eid+recovered+ '\n')
if recovered=='True' :
tt='------恢复通知------'
else:
tt='------告警通知------'
message='''{} {} \n告警级别: {} \n告警规则: {} \n告警时间: {} \n告警触发值: {} \n告警详情: {}
'''.format(tt,eid,severity,rule_name,trigger_tim,trigger_value,tags)
rcode= send_message(wkey=group_wkey,alm=message)
return rcode
if __name__ == '__main__':
app.run(host="0.0.0.0",port=18000)
至此,夜莺的报警逻辑大概清晰,即夜莺产生告警事件之后,会推送给配置好的回调地址。 Flask 拉起的 Webhook 服务用来接收夜莺的告警回调,然后通过企业微信机器人发送告警/恢复通知。
4、那么是如何做到消息分组发送的呢?
查看/zscript/alarmweb/webmain.py,发现以下代码片段
def send_message(wkey,alm):
wx_url = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key='+wkey+'&type'
data = {"msgtype": "text", "text": {"content": alm}}
这是构建企业微信机器人发送文本格式的,其中企业微信机器人的webhook地址是由wkey变量构建的,继续查看代码
rcode= send_message(wkey=group_wkey,alm=message)
def event():
json_data = json.loads(request.data)
eid = str(json_data.get("id"))
group_wkey=str(json_data.get("notify_groups_obj")[0].get("note"))
recovered =str(json_data.get("is_recovered"))
所以,wkey的值是由group_wkey引入的,group_wkey的值来自json_data.get("notify_groups_obj")[0].get("note"),所以wkey的值也是从夜莺产生告警事件中推送过来的。
查看一下官方文档,看看夜莺推送过来的数据结构:
{
"id": 16,
"cate": "prometheus",
"cluster": "xxx",
"datasource_id": 1,
"group_id": 1,
"group_name": "Default Busi Group",
"hash": "0188b06deaa5eb24832548d599090f2b",
"rule_id": 4,
"rule_name": "测试回调地址",
"rule_note": "",
"rule_prod": "metric",
"rule_algo": "",
"severity": 2,
"prom_for_duration": 0,
"prom_ql": "system_load_norm_5 \u003e 0",
"rule_config": {
"queries": [
{
"keys": {
"labelKey": "",
"valueKey": ""
},
"prom_ql": "system_load_norm_5 \u003e 0",
"severity": 2
}
]
},
"prom_eval_interval": 15,
"callbacks": [
"http://10.211.55.3:4321"
],
"runbook_url": "",
"notify_recovered": 1,
"notify_channels": [
"email"
],
"notify_groups": [
"2"
],
"notify_groups_obj": [
{
"id": 2,
"name": "测试邮件告警的团队",
"note": "",
"create_at": 1708921626,
"create_by": "root",
"update_at": 1708948109,
"update_by": "root"
}
],
"target_ident": "ulric-flashcat.local",
"target_note": "",
"trigger_time": 1708999492,
"trigger_value": "0.7229",
"trigger_values": "",
"tags": [
"__name__=system_load_norm_5",
"ident=ulric-flashcat.local",
"rulename=测试回调地址"
],
"tags_map": {
"__name__": "system_load_norm_5",
"ident": "ulric-flashcat.local",
"rulename": "测试回调地址"
},
"annotations": {
},
"is_recovered": false,
"notify_users_obj": [
{
"id": 3,
"username": "n9e-wecom-robot",
"nickname": "夜莺V7群机器人",
"phone": "",
"email": "",
"portrait": "",
"roles": [
"Guest"
],
"contacts": {
"wecom_robot_token": "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=x"
},
"maintainer": 0,
"create_at": 1708945529,
"create_by": "root",
"update_at": 1708945529,
"update_by": "root",
"admin": false
},
{
"id": 4,
"username": "n9e-ding-robot",
"nickname": "钉钉机器人",
"phone": "",
"email": "",
"portrait": "",
"roles": [
"Guest"
],
"contacts": {
"dingtalk_robot_token": "https://oapi.dingtalk.com/robot/send?access_token=x"
},
"maintainer": 0,
"create_at": 1708948099,
"create_by": "root",
"update_at": 1708948099,
"update_by": "root",
"admin": false
},
{
"id": 1,
"username": "root",
"nickname": "超管",
"phone": "",
"email": "",
"portrait": "",
"roles": [
"Admin"
],
"contacts": {
},
"maintainer": 0,
"create_at": 1708920315,
"create_by": "system",
"update_at": 1708920315,
"update_by": "system",
"admin": true
},
{
"id": 2,
"username": "qinxiaohui",
"nickname": "秦晓辉",
"phone": "",
"email": "qinxiaohui@flashcat.cloud",
"portrait": "",
"roles": [
"Standard"
],
"contacts": {
},
"maintainer": 0,
"create_at": 1708921503,
"create_by": "root",
"update_at": 1708921503,
"update_by": "root",
"admin": false
}
],
"last_eval_time": 1708999492,
"last_sent_time": 1708999492,
"notify_cur_number": 1,
"first_trigger_time": 1708999492,
"extra_config": null,
"status": 0,
"claimant": "",
"sub_rule_id": 0,
"extra_info": null
}
其中包含了一下字段
"notify_groups_obj": [
{
"id": 2,
"name": "测试邮件告警的团队",
"note": "",
"create_at": 1708921626,
"create_by": "root",
"update_at": 1708948109,
"update_by": "root"
}
],
正是我们需要的json_data.get("notify_groups_obj")[0].get("note"),不过官方提供的回调 JSON示例中,note为空,这个note代表什么呢?
我们打开夜莺的“人员组织–团队列表”,选择一个已有的团队查看一下。

可以看到,团队中有name信息:服务器告警,有更新人信息:root,还有更新时间信息:2026-02-04 17:18:17,正好对应JSON示例中的name、update_by
、update_at,截图中还有一个备注,很有可能就是JSON示例中的note字段了。
这边只是结合已知信息进行的猜测,如需落实实际情况,可以获取一下夜莺回调过来的详细数据对比一下就可以了。
至此,消息分组的实现逻辑也依然清晰,夜莺告警规则中,选择指定的告警接收组,告警接收组的备注信息填写对应机器人的token,Flask 拉起的 Webhook 服务将接收到的token组装好,然后发送告警/恢复通知。
二、创建云之家告警渠道
云之家机器人与企业微信机器人一样,提供一个webhook地址用来发送消息。所以找了个AI仿写一个Python脚本。
import requests
import json
import sys
from flask import Flask, request
import time
app = Flask(__name__)
# 云之家机器人发送消息
def send_yunzhijia_message(webhook_url, alarm_msg):
"""
云之家机器人发送消息
参考文档:云之家开放平台-群组机器人文档
"""
# 云之家机器人消息格式
data = {
"content": alarm_msg
}
try:
# 云之家机器人webhook调用
response = requests.post(
url=webhook_url,
json=data,
timeout=10
)
# 检查响应
if response.status_code == 200:
result = response.json()
if result.get('success') == True:
return "200" # 发送成功
else:
print(f"云之家机器人返回错误: {result}")
return str(result.get('errorCode', '未知错误'))
else:
print(f"HTTP请求失败,状态码: {response.status_code}")
print(f"响应内容: {response.text}")
return str(response.status_code)
except Exception as e:
print(f"发送云之家消息异常: {e}")
return "500"
@app.route("/webhook/event", methods=['POST'])
def event():
try:
json_data = request.get_json()
if json_data is None:
return "Invalid JSON data", 400
eid = str(json_data.get("id", ""))
notify_groups = json_data.get("notify_groups_obj", [])
if not notify_groups:
return "No notify groups found", 400
group_wkey = str(notify_groups[0].get("note", ""))
recovered = str(json_data.get("is_recovered", ""))
severity = str(json_data.get("severity", "")) # 告警事件级别
rule_name = json_data.get("rule_name", "") # 告警规则名称
# 时间字段处理
trigger_time_value = json_data.get("trigger_time", json_data.get("trigger_tim", 0))
trigger_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(trigger_time_value))
trigger_value = json_data.get("trigger_value", "") # 告警触发值
tags = str(json_data.get("tags", "")) # 告警详情
# 日志记录
with open("/zscript/alarmweb/jsondata", 'a', encoding='utf-8') as f:
f.write(json.dumps(json_data, indent=4, ensure_ascii=False))
with open("/zscript/alarmweb/alarmlog", 'a', encoding='utf-8') as f:
f.write(f"{rule_name} {trigger_time} {eid} {recovered}\n")
# 判断是恢复通知还是告警通知
if recovered == 'True':
title = '🚀 恢复通知'
else:
# 根据告警级别设置不同的emoji前缀
severity_level = int(severity) if severity.isdigit() else 3
if severity_level == 1:
title = '🔥 紧急告警'
elif severity_level == 2:
title = '⚠️ 重要告警'
else:
title = '📢 一般告警'
# 构造消息内容 - 根据图片格式只需要content字段
message = f'''{title}
事件ID: {eid}
告警级别: {severity}
告警规则: {rule_name}
告警时间: {trigger_time}
触发数值: {trigger_value}
告警标签: {tags}
'''
# 如果需要@所有人,可以在消息末尾添加 @ALL
# 根据图片说明,@ALL会提醒所有人
if recovered != 'True' and int(severity) <= 2: # 紧急和重要告警@所有人
message += "\n@ALL"
# 检查消息内容是否为空
if not message.strip():
message = "收到告警通知,但内容为空"
# 发送云之家消息
yunzhijia_webhook_url = f"https://www.yunzhijia.com/gateway/robot/webhook/send?yzjtype=0&yzjtoken={group_wkey}"
rcode = send_yunzhijia_message(webhook_url=yunzhijia_webhook_url, alarm_msg=message)
return rcode
except Exception as e:
print(f"处理事件异常: {e}")
import traceback
traceback.print_exc()
return "500"
@app.route("/health", methods=['GET'])
def health_check():
return "云之家机器人服务运行正常"
if __name__ == '__main__':
app.run(host="0.0.0.0", port=18001)
查看效果:
Done!
DAMO开发者矩阵,由阿里巴巴达摩院和中国互联网协会联合发起,致力于探讨最前沿的技术趋势与应用成果,搭建高质量的交流与分享平台,推动技术创新与产业应用链接,围绕“人工智能与新型计算”构建开放共享的开发者生态。
更多推荐



所有评论(0)