From adbeef32897cf13cfdd114ccb7527bed55fbe4c7 Mon Sep 17 00:00:00 2001 From: ibizaman Date: Sat, 7 Sep 2024 07:30:17 +0200 Subject: [PATCH] reduce alerting time for 5XX error It's alerting for too long, even after it recovered. We'll probably need to something more appropriate than an error budget but it'll do for now. --- modules/blocks/monitoring/rules.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/blocks/monitoring/rules.json b/modules/blocks/monitoring/rules.json index 6519d5f..192fbc6 100644 --- a/modules/blocks/monitoring/rules.json +++ b/modules/blocks/monitoring/rules.json @@ -18,7 +18,7 @@ "uid": "cd6cc53e-840c-484d-85f7-96fede324006" }, "editorMode": "code", - "expr": "(sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> \" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | status =~ \"[1234]..\" | server_name =~ \".*\" [7d])) / sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> \" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | server_name =~ \".*\" [7d])))", + "expr": "(sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> \" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | status =~ \"[1234]..\" | server_name =~ \".*\" [1h])) / sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> \" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | server_name =~ \".*\" [1h])))", "intervalMs": 1000, "legendFormat": "{{server_name}}", "maxDataPoints": 43200, @@ -120,7 +120,7 @@ "__panelId__": "9", "description": "", "runbook_url": "", - "summary": "The error budget for a service for the last 7 days is under 99%" + "summary": "The error budget for a service for the last 1 hour is under 99%" }, "labels": { "": "",