1
0
Fork 0

reduce alerting time for 5XX error

It's alerting for too long, even after it recovered.
We'll probably need to something more appropriate than an error budget but it'll do for now.
This commit is contained in:
ibizaman 2024-09-07 07:30:17 +02:00 committed by Pierre Penninckx
parent 192d8ad97e
commit adbeef3289

View file

@ -18,7 +18,7 @@
"uid": "cd6cc53e-840c-484d-85f7-96fede324006" "uid": "cd6cc53e-840c-484d-85f7-96fede324006"
}, },
"editorMode": "code", "editorMode": "code",
"expr": "(sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> <line>\" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | status =~ \"[1234]..\" | server_name =~ \".*\" [7d])) / sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> <line>\" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | server_name =~ \".*\" [7d])))", "expr": "(sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> <line>\" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | status =~ \"[1234]..\" | server_name =~ \".*\" [1h])) / sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> <line>\" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | server_name =~ \".*\" [1h])))",
"intervalMs": 1000, "intervalMs": 1000,
"legendFormat": "{{server_name}}", "legendFormat": "{{server_name}}",
"maxDataPoints": 43200, "maxDataPoints": 43200,
@ -120,7 +120,7 @@
"__panelId__": "9", "__panelId__": "9",
"description": "", "description": "",
"runbook_url": "", "runbook_url": "",
"summary": "The error budget for a service for the last 7 days is under 99%" "summary": "The error budget for a service for the last 1 hour is under 99%"
}, },
"labels": { "labels": {
"": "", "": "",