reduce alerting time for 5XX error
It's alerting for too long, even after it recovered. We'll probably need to something more appropriate than an error budget but it'll do for now.
This commit is contained in:
parent
192d8ad97e
commit
adbeef3289
1 changed files with 2 additions and 2 deletions
|
@ -18,7 +18,7 @@
|
|||
"uid": "cd6cc53e-840c-484d-85f7-96fede324006"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "(sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> <line>\" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | status =~ \"[1234]..\" | server_name =~ \".*\" [7d])) / sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> <line>\" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | server_name =~ \".*\" [7d])))",
|
||||
"expr": "(sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> <line>\" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | status =~ \"[1234]..\" | server_name =~ \".*\" [1h])) / sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> <line>\" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | server_name =~ \".*\" [1h])))",
|
||||
"intervalMs": 1000,
|
||||
"legendFormat": "{{server_name}}",
|
||||
"maxDataPoints": 43200,
|
||||
|
@ -120,7 +120,7 @@
|
|||
"__panelId__": "9",
|
||||
"description": "",
|
||||
"runbook_url": "",
|
||||
"summary": "The error budget for a service for the last 7 days is under 99%"
|
||||
"summary": "The error budget for a service for the last 1 hour is under 99%"
|
||||
},
|
||||
"labels": {
|
||||
"": "",
|
||||
|
|
Loading…
Reference in a new issue