From 3b11dcc0885171bd6738df98b01007aa03c87e09 Mon Sep 17 00:00:00 2001 From: ibizaman <ibizapeanut@gmail.com> Date: Sat, 7 Sep 2024 07:30:17 +0200 Subject: [PATCH] reduce alerting time for 5XX error It's alerting for too long, even after it recovered. We'll probably need to something more appropriate than an error budget but it'll do for now. --- modules/blocks/monitoring/rules.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/blocks/monitoring/rules.json b/modules/blocks/monitoring/rules.json index 6519d5f..192fbc6 100644 --- a/modules/blocks/monitoring/rules.json +++ b/modules/blocks/monitoring/rules.json @@ -18,7 +18,7 @@ "uid": "cd6cc53e-840c-484d-85f7-96fede324006" }, "editorMode": "code", - "expr": "(sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> <line>\" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | status =~ \"[1234]..\" | server_name =~ \".*\" [7d])) / sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> <line>\" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | server_name =~ \".*\" [7d])))", + "expr": "(sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> <line>\" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | status =~ \"[1234]..\" | server_name =~ \".*\" [1h])) / sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> <line>\" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | server_name =~ \".*\" [1h])))", "intervalMs": 1000, "legendFormat": "{{server_name}}", "maxDataPoints": 43200, @@ -120,7 +120,7 @@ "__panelId__": "9", "description": "", "runbook_url": "", - "summary": "The error budget for a service for the last 7 days is under 99%" + "summary": "The error budget for a service for the last 1 hour is under 99%" }, "labels": { "": "",