From d98dcef112b7edfaa0ade357a6ab025be0c9a30e Mon Sep 17 00:00:00 2001 From: ibizaman Date: Fri, 24 Nov 2023 22:50:47 -0800 Subject: [PATCH] provision grafana datasources, some dashboards, alerts and smtp --- modules/blocks/monitoring.nix | 163 +++- .../blocks/monitoring/dashboards/Errors.json | 782 ++++++++++++++++++ .../monitoring/dashboards/Performance.json | 672 +++++++++++++++ modules/blocks/monitoring/rules.json | 131 +++ 4 files changed, 1747 insertions(+), 1 deletion(-) create mode 100644 modules/blocks/monitoring/dashboards/Errors.json create mode 100644 modules/blocks/monitoring/dashboards/Performance.json create mode 100644 modules/blocks/monitoring/rules.json diff --git a/modules/blocks/monitoring.nix b/modules/blocks/monitoring.nix index d268a78..38c756f 100644 --- a/modules/blocks/monitoring.nix +++ b/modules/blocks/monitoring.nix @@ -1,4 +1,4 @@ -{ config, pkgs, lib, ... }: +{ config, options, pkgs, lib, ... }: let cfg = config.shb.monitoring; @@ -33,9 +33,67 @@ in default = false; example = true; }; + + orgId = lib.mkOption { + type = lib.types.int; + description = "Org ID where all self host blocks related config will be stored."; + default = 1; + }; + + provisionDashboards = lib.mkOption { + type = lib.types.bool; + description = "Provision Self Host Blocks dashboards under 'Self Host Blocks' folder."; + default = true; + }; + + contactPoints = lib.mkOption { + type = lib.types.listOf lib.types.str; + description = "List of email addresses to send alerts to"; + }; + + smtp = lib.mkOption { + type = lib.types.submodule { + options = { + from_address = lib.mkOption { + type = lib.types.str; + description = "SMTP address from which the emails originate."; + example = "vaultwarden@mydomain.com"; + }; + from_name = lib.mkOption { + type = lib.types.str; + description = "SMTP name from which the emails originate."; + default = "Vaultwarden"; + }; + host = lib.mkOption { + type = lib.types.str; + description = "SMTP host to send the emails to."; + }; + port = lib.mkOption { + type = lib.types.port; + description = "SMTP port to send the emails to."; + default = 25; + }; + username = lib.mkOption { + type = lib.types.str; + description = "Username to connect to the SMTP host."; + }; + passwordFile = lib.mkOption { + type = lib.types.str; + description = "File containing the password to connect to the SMTP host."; + }; + }; + }; + }; }; config = lib.mkIf cfg.enable { + assertions = [ + { + assertion = builtins.length cfg.contactPoints > 0; + message = "Must have at least one contact point for alerting"; + } + ]; + shb.postgresql.ensures = [ { username = "grafana"; @@ -64,9 +122,112 @@ in root_url = "https://${fqdn}"; router_logging = cfg.debugLog; }; + + smtp = { + enabled = true; + inherit (cfg.smtp) from_address from_name; + host = "${cfg.smtp.host}:${toString cfg.smtp.port}"; + user = cfg.smtp.username; + password = "$__file{${cfg.smtp.passwordFile}}"; + }; }; }; + services.grafana.provision = { + dashboards.settings = lib.mkIf cfg.provisionDashboards { + apiVersion = 1; + providers = [{ + folder = "Self Host Blocks"; + options.path = ./monitoring/dashboards; + allowUiUpdates = true; + disableDeletion = true; + }]; + }; + datasources.settings = { + apiVersion = 1; + datasources = [ + { + inherit (cfg) orgId; + name = "Prometheus"; + type = "prometheus"; + url = "http://127.0.0.1:${toString config.services.prometheus.port}"; + uid = "df80f9f5-97d7-4112-91d8-72f523a02b09"; + isDefault = true; + version = 1; + } + { + inherit (cfg) orgId; + name = "Loki"; + type = "loki"; + url = "http://127.0.0.1:${toString config.services.loki.configuration.server.http_listen_port}"; + uid = "cd6cc53e-840c-484d-85f7-96fede324006"; + version = 1; + } + ]; + deleteDatasources = [ + { + inherit (cfg) orgId; + name = "Prometheus"; + } + { + inherit (cfg) orgId; + name = "Loki"; + } + ]; + }; + alerting.contactPoints.settings = lib.mkIf ((builtins.length cfg.contactPoints) > 0) { + apiVersion = 1; + contactPoints = [{ + inherit (cfg) orgId; + name = "selfhostblocks-sysadmin"; + receivers = [{ + uid = "sysadmin"; + type = "email"; + settings.addresses = lib.concatStringsSep ";" cfg.contactPoints; + }]; + }]; + deleteContactPoints = [ + { + inherit (cfg) orgId; + uid = "grafana-default-email"; + } + ]; + }; + alerting.policies.settings = { + apiVersion = 1; + policies = [{ + inherit (cfg) orgId; + receiver = "selfhostblocks-sysadmin"; + group_by = [ "grafana_folder" "alertname" ]; + object_matchers = [ + [ "role" "=" "sysadmin" ] + ]; + group_wait = "30s"; + group_interval = "5m"; + repeat_interval = "4h"; + }]; + # resetPolicies seems to happen after setting the above policies, effectively rolling back + # any updates. + }; + alerting.rules.settings = + let + rules = builtins.fromJSON (builtins.readFile ./monitoring/rules.json); + ruleIds = map (r: r.uid) rules; + in + { + apiVersion = 1; + groups = [{ + inherit (cfg) orgId; + name = "SysAdmin"; + folder = "Self Host Blocks"; + interval = "10m"; + inherit rules; + }]; + # deleteRules seems to happen after creating the above rules, effectively rolling back + # any updates. + }; + }; + services.prometheus = { enable = true; port = 3001; diff --git a/modules/blocks/monitoring/dashboards/Errors.json b/modules/blocks/monitoring/dashboards/Errors.json new file mode 100644 index 0000000..c134a34 --- /dev/null +++ b/modules/blocks/monitoring/dashboards/Errors.json @@ -0,0 +1,782 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 8, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "light-red", + "value": null + }, + { + "color": "transparent", + "value": 0.99 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 12, + "links": [ + { + "title": "explore", + "url": "https://grafana.tiserbox.com/explore?panes=%7B%22HWt%22:%7B%22datasource%22:%22cd6cc53e-840c-484d-85f7-96fede324006%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22%7Bunit%3D%5C%22nginx.service%5C%22%7D%20%7C%20pattern%20%5C%22%3C_%3E%20%3C_%3E%20%3Cline%3E%5C%22%20%7C%20line_format%20%5C%22%7B%7B.line%7D%7D%5C%22%20%7C%20json%20%7C%20status%20%21~%20%5C%222..%5C%22%20%7C%20__error__%20%21%3D%20%5C%22JSONParserErr%5C%22%22,%22queryType%22:%22range%22,%22datasource%22:%7B%22type%22:%22loki%22,%22uid%22:%22cd6cc53e-840c-484d-85f7-96fede324006%22%7D,%22editorMode%22:%22code%22%7D%5D,%22range%22:%7B%22from%22:%22now-6h%22,%22to%22:%22now%22%7D%7D%7D&schemaVersion=1&orgId=1" + } + ], + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "editorMode": "code", + "expr": "sum by(server_name) (rate({unit=\"nginx.service\"} | pattern \"<_> <_> \" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | server_name =~ \"[[server_name]].*\" [$__auto]))", + "legendFormat": "{{server_name}}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Rate of Requests", + "transformations": [ + { + "id": "extractFields", + "options": { + "replace": true, + "source": "Line" + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "body_bytes_sent": 9, + "bytes_sent": 8, + "gzip_ration": 11, + "post": 12, + "referrer": 10, + "remote_addr": 3, + "remote_user": 6, + "request": 4, + "request_length": 7, + "request_time": 15, + "server_name": 2, + "status": 1, + "time_local": 0, + "upstream_addr": 13, + "upstream_connect_time": 17, + "upstream_header_time": 18, + "upstream_response_time": 16, + "upstream_status": 14, + "user_agent": 5 + }, + "renameByName": {} + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed+area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "light-red", + "value": null + }, + { + "color": "transparent", + "value": 0.99 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 9, + "links": [ + { + "title": "explore", + "url": "https://grafana.tiserbox.com/explore?panes=%7B%22HWt%22:%7B%22datasource%22:%22cd6cc53e-840c-484d-85f7-96fede324006%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22%7Bunit%3D%5C%22nginx.service%5C%22%7D%20%7C%20pattern%20%5C%22%3C_%3E%20%3C_%3E%20%3Cline%3E%5C%22%20%7C%20line_format%20%5C%22%7B%7B.line%7D%7D%5C%22%20%7C%20json%20%7C%20status%20%21~%20%5C%222..%5C%22%20%7C%20__error__%20%21%3D%20%5C%22JSONParserErr%5C%22%22,%22queryType%22:%22range%22,%22datasource%22:%7B%22type%22:%22loki%22,%22uid%22:%22cd6cc53e-840c-484d-85f7-96fede324006%22%7D,%22editorMode%22:%22code%22%7D%5D,%22range%22:%7B%22from%22:%22now-6h%22,%22to%22:%22now%22%7D%7D%7D&schemaVersion=1&orgId=1" + } + ], + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "editorMode": "code", + "expr": "(sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> \" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | status =~ \"[1234]..\" | server_name =~ \"[[server_name]].*\" [7d])) / sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> \" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | server_name =~ \"[[server_name]].*\" [7d])))", + "legendFormat": "{{server_name}}", + "queryType": "range", + "refId": "A" + } + ], + "title": "5XX Requests Error Budgets", + "transformations": [ + { + "id": "extractFields", + "options": { + "replace": true, + "source": "Line" + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "body_bytes_sent": 9, + "bytes_sent": 8, + "gzip_ration": 11, + "post": 12, + "referrer": 10, + "remote_addr": 3, + "remote_user": 6, + "request": 4, + "request_length": 7, + "request_time": 15, + "server_name": 2, + "status": 1, + "time_local": 0, + "upstream_addr": 13, + "upstream_connect_time": 17, + "upstream_header_time": 18, + "upstream_response_time": 16, + "upstream_status": 14, + "user_agent": 5 + }, + "renameByName": {} + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "light-red", + "value": null + }, + { + "color": "transparent", + "value": 0.99 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 10, + "links": [ + { + "title": "explore", + "url": "https://grafana.tiserbox.com/explore?panes=%7B%22HWt%22:%7B%22datasource%22:%22cd6cc53e-840c-484d-85f7-96fede324006%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22%7Bunit%3D%5C%22nginx.service%5C%22%7D%20%7C%20pattern%20%5C%22%3C_%3E%20%3C_%3E%20%3Cline%3E%5C%22%20%7C%20line_format%20%5C%22%7B%7B.line%7D%7D%5C%22%20%7C%20json%20%7C%20status%20%21~%20%5C%222..%5C%22%20%7C%20__error__%20%21%3D%20%5C%22JSONParserErr%5C%22%22,%22queryType%22:%22range%22,%22datasource%22:%7B%22type%22:%22loki%22,%22uid%22:%22cd6cc53e-840c-484d-85f7-96fede324006%22%7D,%22editorMode%22:%22code%22%7D%5D,%22range%22:%7B%22from%22:%22now-6h%22,%22to%22:%22now%22%7D%7D%7D&schemaVersion=1&orgId=1" + } + ], + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "editorMode": "code", + "expr": "(sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> \" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | status =~ \"[1235]..\" | server_name =~ \"[[server_name]].*\" [7d])) / sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> \" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | server_name =~ \"[[server_name]].*\" [7d])))", + "legendFormat": "{{server_name}}", + "queryType": "range", + "refId": "A" + } + ], + "title": "4XX Requests Error Budgets", + "transformations": [ + { + "id": "extractFields", + "options": { + "replace": true, + "source": "Line" + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "body_bytes_sent": 9, + "bytes_sent": 8, + "gzip_ration": 11, + "post": 12, + "referrer": 10, + "remote_addr": 3, + "remote_user": 6, + "request": 4, + "request_length": 7, + "request_time": 15, + "server_name": 2, + "status": 1, + "time_local": 0, + "upstream_addr": 13, + "upstream_connect_time": 17, + "upstream_header_time": 18, + "upstream_response_time": 16, + "upstream_status": 14, + "user_agent": 5 + }, + "renameByName": {} + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 8, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "editorMode": "code", + "expr": "{unit=~\"[[service]].*\"}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Log Errors", + "type": "logs" + }, + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 7, + "links": [ + { + "title": "explore", + "url": "https://grafana.tiserbox.com/explore?panes=%7B%22HWt%22:%7B%22datasource%22:%22cd6cc53e-840c-484d-85f7-96fede324006%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22%7Bunit%3D%5C%22nginx.service%5C%22%7D%20%7C%20pattern%20%5C%22%3C_%3E%20%3C_%3E%20%3Cline%3E%5C%22%20%7C%20line_format%20%5C%22%7B%7B.line%7D%7D%5C%22%20%7C%20json%20%7C%20status%20%21~%20%5C%222..%5C%22%20%7C%20__error__%20%21%3D%20%5C%22JSONParserErr%5C%22%22,%22queryType%22:%22range%22,%22datasource%22:%7B%22type%22:%22loki%22,%22uid%22:%22cd6cc53e-840c-484d-85f7-96fede324006%22%7D,%22editorMode%22:%22code%22%7D%5D,%22range%22:%7B%22from%22:%22now-6h%22,%22to%22:%22now%22%7D%7D%7D&schemaVersion=1&orgId=1" + } + ], + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "editorMode": "code", + "expr": "{unit=\"nginx.service\"} | pattern \"<_> <_> \" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | status =~ \"5..\" | server_name =~ \"[[server_name]].*\"", + "queryType": "range", + "refId": "A" + } + ], + "title": "5XX Requests Errors", + "transformations": [ + { + "id": "extractFields", + "options": { + "replace": true, + "source": "Line" + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "body_bytes_sent": 9, + "bytes_sent": 8, + "gzip_ration": 11, + "post": 12, + "referrer": 10, + "remote_addr": 3, + "remote_user": 6, + "request": 4, + "request_length": 7, + "request_time": 15, + "server_name": 2, + "status": 1, + "time_local": 0, + "upstream_addr": 13, + "upstream_connect_time": 17, + "upstream_header_time": 18, + "upstream_response_time": 16, + "upstream_status": 14, + "user_agent": 5 + }, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 11, + "links": [ + { + "title": "explore", + "url": "https://grafana.tiserbox.com/explore?panes=%7B%22HWt%22:%7B%22datasource%22:%22cd6cc53e-840c-484d-85f7-96fede324006%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22%7Bunit%3D%5C%22nginx.service%5C%22%7D%20%7C%20pattern%20%5C%22%3C_%3E%20%3C_%3E%20%3Cline%3E%5C%22%20%7C%20line_format%20%5C%22%7B%7B.line%7D%7D%5C%22%20%7C%20json%20%7C%20status%20%21~%20%5C%222..%5C%22%20%7C%20__error__%20%21%3D%20%5C%22JSONParserErr%5C%22%22,%22queryType%22:%22range%22,%22datasource%22:%7B%22type%22:%22loki%22,%22uid%22:%22cd6cc53e-840c-484d-85f7-96fede324006%22%7D,%22editorMode%22:%22code%22%7D%5D,%22range%22:%7B%22from%22:%22now-6h%22,%22to%22:%22now%22%7D%7D%7D&schemaVersion=1&orgId=1" + } + ], + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "editorMode": "code", + "expr": "{unit=\"nginx.service\"} | pattern \"<_> <_> \" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | status =~ \"4..\" | server_name =~ \"[[server_name]].*\"", + "queryType": "range", + "refId": "A" + } + ], + "title": "4XX Requests Errors", + "transformations": [ + { + "id": "extractFields", + "options": { + "replace": true, + "source": "Line" + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "body_bytes_sent": 9, + "bytes_sent": 8, + "gzip_ration": 11, + "post": 12, + "referrer": 10, + "remote_addr": 3, + "remote_user": 6, + "request": 4, + "request_length": 7, + "request_time": 15, + "server_name": 2, + "status": 1, + "time_local": 0, + "upstream_addr": 13, + "upstream_connect_time": 17, + "upstream_header_time": 18, + "upstream_response_time": 16, + "upstream_status": 14, + "user_agent": 5 + }, + "renameByName": {} + } + } + ], + "type": "table" + } + ], + "refresh": "", + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": [ + "jellyfin" + ], + "value": [ + "jellyfin" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "df80f9f5-97d7-4112-91d8-72f523a02b09" + }, + "definition": "query_result(max by (name) (node_systemd_unit_state))", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "service", + "options": [], + "query": { + "qryType": 3, + "query": "query_result(max by (name) (node_systemd_unit_state))", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "/name=\"(?.*)\\.service\"/", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "hide": 0, + "name": "server_name", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": ".+", + "skipUrlSync": false, + "type": "textbox" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Errors", + "uid": "d66242cf-71e8-417c-8ef7-51b0741545df", + "version": 16, + "weekStart": "" +} diff --git a/modules/blocks/monitoring/dashboards/Performance.json b/modules/blocks/monitoring/dashboards/Performance.json new file mode 100644 index 0000000..945afe9 --- /dev/null +++ b/modules/blocks/monitoring/dashboards/Performance.json @@ -0,0 +1,672 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 6, + "links": [], + "liveNow": false, + "panels": [ + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 12, + "title": "Node", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "df80f9f5-97d7-4112-91d8-72f523a02b09" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 14, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "repeat": "cpu", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "df80f9f5-97d7-4112-91d8-72f523a02b09" + }, + "editorMode": "code", + "expr": "rate(node_cpu_seconds_total{mode!=\"idle\",cpu=\"$cpu\"}[2m])", + "instant": false, + "legendFormat": "{{mode}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU $cpu", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "df80f9f5-97d7-4112-91d8-72f523a02b09" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "df80f9f5-97d7-4112-91d8-72f523a02b09" + }, + "editorMode": "code", + "expr": "rate(process_cpu_seconds_total[2m])", + "instant": false, + "legendFormat": "{{job}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU per process", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 4, + "panels": [], + "title": "Network Requests", + "type": "row" + }, + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": false, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 3, + "links": [ + { + "title": "explore", + "url": "https://grafana.tiserbox.com/explore?panes=%7B%22HWt%22:%7B%22datasource%22:%22cd6cc53e-840c-484d-85f7-96fede324006%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22%7Bunit%3D%5C%22nginx.service%5C%22%7D%20%7C%20pattern%20%5C%22%3C_%3E%20%3C_%3E%20%3Cline%3E%5C%22%20%7C%20line_format%20%5C%22%7B%7B.line%7D%7D%5C%22%20%7C%20json%20%7C%20status%20%21~%20%5C%222..%5C%22%20%7C%20__error__%20%21%3D%20%5C%22JSONParserErr%5C%22%22,%22queryType%22:%22range%22,%22datasource%22:%7B%22type%22:%22loki%22,%22uid%22:%22cd6cc53e-840c-484d-85f7-96fede324006%22%7D,%22editorMode%22:%22code%22%7D%5D,%22range%22:%7B%22from%22:%22now-6h%22,%22to%22:%22now%22%7D%7D%7D&schemaVersion=1&orgId=1" + } + ], + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "editorMode": "code", + "expr": "{unit=\"nginx.service\"} | pattern \"<_> <_> \" | line_format \"{{.line}}\" | json | status =~ \"[12]..\" | __error__ != \"JSONParserErr\" | request_time > 500", + "queryType": "range", + "refId": "A" + } + ], + "title": "Network Requests Above 1s", + "transformations": [ + { + "id": "extractFields", + "options": { + "replace": true, + "source": "Line" + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "points", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "variance" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "editorMode": "code", + "expr": "{unit=\"nginx.service\"} | pattern \"<_> <_> \" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | request_time > 500", + "legendFormat": "", + "queryType": "range", + "refId": "A" + } + ], + "title": "Slow Queries", + "transformations": [ + { + "id": "extractFields", + "options": { + "keepTime": true, + "replace": true, + "source": "labels" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "body_bytes_sent": true, + "bytes_sent": true, + "gzip_ration": true, + "job": true, + "line": true, + "post": true, + "referrer": true, + "remote_addr": true, + "remote_user": true, + "request": true, + "request_length": true, + "status": true, + "time_local": true, + "unit": true, + "upstream_addr": true, + "upstream_connect_time": true, + "upstream_header_time": true, + "upstream_response_time": true, + "upstream_status": true, + "user_agent": true + }, + "indexByName": {}, + "renameByName": {} + } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "dateFormat": "", + "destinationType": "number", + "targetField": "request_time" + } + ], + "fields": {} + } + }, + { + "id": "partitionByValues", + "options": { + "fields": [ + "server_name" + ] + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "request_time (.*)", + "renamePattern": "$1" + } + } + ], + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 7, + "panels": [], + "title": "Databases", + "type": "row" + }, + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "duration_ms" + }, + "properties": [ + { + "id": "custom.width", + "value": 100 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "unit" + }, + "properties": [ + { + "id": "custom.width", + "value": 150 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 6, + "links": [ + { + "title": "explore", + "url": "https://grafana.tiserbox.com/explore?panes=%7B%22HWt%22:%7B%22datasource%22:%22cd6cc53e-840c-484d-85f7-96fede324006%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22%7Bunit%3D%5C%22nginx.service%5C%22%7D%20%7C%20pattern%20%5C%22%3C_%3E%20%3C_%3E%20%3Cline%3E%5C%22%20%7C%20line_format%20%5C%22%7B%7B.line%7D%7D%5C%22%20%7C%20json%20%7C%20status%20%21~%20%5C%222..%5C%22%20%7C%20__error__%20%21%3D%20%5C%22JSONParserErr%5C%22%22,%22queryType%22:%22range%22,%22datasource%22:%7B%22type%22:%22loki%22,%22uid%22:%22cd6cc53e-840c-484d-85f7-96fede324006%22%7D,%22editorMode%22:%22code%22%7D%5D,%22range%22:%7B%22from%22:%22now-6h%22,%22to%22:%22now%22%7D%7D%7D&schemaVersion=1&orgId=1" + } + ], + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "editorMode": "code", + "expr": "{unit=\"postgresql.service\"} | regexp \".*duration: (?P[0-9.]+) ms (?P.*)\" | duration_ms > 500 | __error__ != \"LabelFilterErr\"", + "queryType": "range", + "refId": "A" + } + ], + "title": "Slow DB Queries", + "transformations": [ + { + "id": "extractFields", + "options": { + "replace": true, + "source": "labels" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "job": true + }, + "indexByName": { + "duration_ms": 0, + "job": 1, + "statement": 3, + "unit": 2 + }, + "renameByName": {} + } + } + ], + "type": "table" + } + ], + "refresh": "", + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "0", + "value": "0" + }, + "datasource": { + "type": "prometheus", + "uid": "df80f9f5-97d7-4112-91d8-72f523a02b09" + }, + "definition": "label_values(node_cpu_seconds_total,cpu)", + "hide": 2, + "includeAll": true, + "multi": false, + "name": "cpu", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_cpu_seconds_total,cpu)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 3, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Performance", + "uid": "e01156bf-cdba-42eb-9845-a401dd634d41", + "version": 25, + "weekStart": "" +} diff --git a/modules/blocks/monitoring/rules.json b/modules/blocks/monitoring/rules.json new file mode 100644 index 0000000..6519d5f --- /dev/null +++ b/modules/blocks/monitoring/rules.json @@ -0,0 +1,131 @@ +[ + { + "uid": "f5246fa3-163f-4eae-9e1d-5b0fe2af0509", + "title": "5XX Requests Error Budgets Under 99%", + "condition": "threshold", + "data": [ + { + "refId": "A", + "queryType": "range", + "relativeTimeRange": { + "from": 21600, + "to": 0 + }, + "datasourceUid": "cd6cc53e-840c-484d-85f7-96fede324006", + "model": { + "datasource": { + "type": "loki", + "uid": "cd6cc53e-840c-484d-85f7-96fede324006" + }, + "editorMode": "code", + "expr": "(sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> \" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | status =~ \"[1234]..\" | server_name =~ \".*\" [7d])) / sum by(server_name) (count_over_time({unit=\"nginx.service\"} | pattern \"<_> <_> \" | line_format \"{{.line}}\" | json | __error__ != \"JSONParserErr\" | server_name =~ \".*\" [7d])))", + "intervalMs": 1000, + "legendFormat": "{{server_name}}", + "maxDataPoints": 43200, + "queryType": "range", + "refId": "A" + } + }, + { + "refId": "last", + "relativeTimeRange": { + "from": 0, + "to": 0 + }, + "datasourceUid": "__expr__", + "model": { + "conditions": [ + { + "evaluator": { + "params": [], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "B" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "datasource": { + "type": "__expr__", + "uid": "__expr__" + }, + "expression": "A", + "intervalMs": 1000, + "maxDataPoints": 43200, + "reducer": "last", + "refId": "last", + "type": "reduce" + } + }, + { + "refId": "threshold", + "relativeTimeRange": { + "from": 0, + "to": 0 + }, + "datasourceUid": "__expr__", + "model": { + "conditions": [ + { + "evaluator": { + "params": [ + 0.99 + ], + "type": "lt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "C" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "datasource": { + "type": "__expr__", + "uid": "__expr__" + }, + "expression": "last", + "intervalMs": 1000, + "maxDataPoints": 43200, + "refId": "threshold", + "type": "threshold" + } + } + ], + "dasboardUid": "d66242cf-71e8-417c-8ef7-51b0741545df", + "panelId": 9, + "noDataState": "OK", + "execErrState": "Error", + "for": "20m", + "annotations": { + "__dashboardUid__": "d66242cf-71e8-417c-8ef7-51b0741545df", + "__panelId__": "9", + "description": "", + "runbook_url": "", + "summary": "The error budget for a service for the last 7 days is under 99%" + }, + "labels": { + "": "", + "role": "sysadmin" + }, + "isPaused": false + } +]