diff --git a/docs/blocks/monitoring.md b/docs/blocks/monitoring.md deleted file mode 100644 index 7a26ebb..0000000 --- a/docs/blocks/monitoring.md +++ /dev/null @@ -1,118 +0,0 @@ -# Monitoring Block - -This block sets up the monitoring stack for Self Host Blocks. It is composed of: - -- Grafana as the dashboard frontend. -- Prometheus as the database for metrics. -- Loki as the database for logs. - -## Configuration - -```nix -shb.monitoring = { - enable = true; - subdomain = "grafana"; - inherit domain; - contactPoints = [ "me@example.com" ]; - adminPasswordFile = config.sops.secrets."monitoring/admin_password".path; - secretKeyFile = config.sops.secrets."monitoring/secret_key".path; -}; - -sops.secrets."monitoring/admin_password" = { - sopsFile = ./secrets.yaml; - mode = "0400"; - owner = "grafana"; - group = "grafana"; - restartUnits = [ "grafana.service" ]; -}; -sops.secrets."monitoring/secret_key" = { - sopsFile = ./secrets.yaml; - mode = "0400"; - owner = "grafana"; - group = "grafana"; - restartUnits = [ "grafana.service" ]; -}; -``` - -With that, Grafana, Prometheus, Loki and Promtail are setup! You can access `Grafana` at -`grafana.example.com` with user `admin` and password ``. - -I recommend adding a STMP server configuration so you receive alerts by email: - -```nix -shb.monitoring.smtp = { - from_address = "grafana@$example.com"; - from_name = "Grafana"; - host = "smtp.mailgun.org"; - port = 587; - username = "postmaster@mg.example.com"; - passwordFile = config.sops.secrets."monitoring/smtp".path; -}; - -sops.secrets."monitoring/secret_key" = { - sopsFile = ./secrets.yaml; - mode = "0400"; - owner = "grafana"; - group = "grafana"; - restartUnits = [ "grafana.service" ]; -}; -``` - -Since all logs are now stored in Loki, you can probably reduce the systemd journal retention -time with: - -```nix -# See https://www.freedesktop.org/software/systemd/man/journald.conf.html#SystemMaxUse= -services.journald.extraConfig = '' -SystemMaxUse=2G -SystemKeepFree=4G -SystemMaxFileSize=100M -MaxFileSec=day -''; -``` - -## Provisioning - -Self Host Blocks will create automatically the following resources: - -- For Grafana: - - datasources - - dashboards - - contact points - - notification policies - - alerts -- For Prometheus, the following exporters and related scrapers: - - node - - smartctl - - nginx -- For Loki, the following exporters and related scrapers: - - systemd - -Those resources are namespaced as appropriate under the Self Host Blocks namespace: - -![](../assets/monitoring_grafana_folder.png) - -## Errors Dashboard - -This dashboard is meant to be the first stop to understand why a service is misbehaving. - -![](../assets/monitoring_grafana_dashboards_Errors_1.png) -![](../assets/monitoring_grafana_dashboards_Errors_2.png) - -The yellow and red dashed vertical bars correspond to the [Requests Error Budget -Alert](#requests-error-budget-alert) firing. - -## Performance Dashboard - -This dashboard is meant to be the first stop to understand why a service is performing poorly. - -![](../assets/monitoring_grafana_dashboards_Performance_1.png) -![](../assets/monitoring_grafana_dashboards_Performance_2.png) - -## Requests Error Budget Alert - -This alert will fire when the ratio between number of requests getting a 5XX response from a service -and the total requests to that service exceeds 1%. - -![](../assets/monitoring_grafana_alert_rules_5xx_1.png) -![](../assets/monitoring_grafana_alert_rules_5xx_2.png) diff --git a/docs/blocks/monitoring/alerts-requests-error-budger.md b/docs/blocks/monitoring/alerts-requests-error-budger.md new file mode 100644 index 0000000..fd199e0 --- /dev/null +++ b/docs/blocks/monitoring/alerts-requests-error-budger.md @@ -0,0 +1,7 @@ +# Requests Error Budget Alert {#blocks-monitoring-budget-alerts} + +This alert will fire when the ratio between number of requests getting a 5XX response from a service +and the total requests to that service exceeds 1%. + +![](./assets/alert_rules_5xx_1.png) +![](./assets/alert_rules_5xx_2.png) diff --git a/docs/assets/monitoring_grafana_alert_rules_5xx_1.png b/docs/blocks/monitoring/assets/alert_rules_5xx_1.png similarity index 100% rename from docs/assets/monitoring_grafana_alert_rules_5xx_1.png rename to docs/blocks/monitoring/assets/alert_rules_5xx_1.png diff --git a/docs/assets/monitoring_grafana_alert_rules_5xx_2.png b/docs/blocks/monitoring/assets/alert_rules_5xx_2.png similarity index 100% rename from docs/assets/monitoring_grafana_alert_rules_5xx_2.png rename to docs/blocks/monitoring/assets/alert_rules_5xx_2.png diff --git a/docs/assets/monitoring_grafana_dashboards_Errors_1.png b/docs/blocks/monitoring/assets/dashboards_Errors_1.png similarity index 100% rename from docs/assets/monitoring_grafana_dashboards_Errors_1.png rename to docs/blocks/monitoring/assets/dashboards_Errors_1.png diff --git a/docs/assets/monitoring_grafana_dashboards_Errors_2.png b/docs/blocks/monitoring/assets/dashboards_Errors_2.png similarity index 100% rename from docs/assets/monitoring_grafana_dashboards_Errors_2.png rename to docs/blocks/monitoring/assets/dashboards_Errors_2.png diff --git a/docs/assets/monitoring_grafana_dashboards_Performance_1.png b/docs/blocks/monitoring/assets/dashboards_Performance_1.png similarity index 100% rename from docs/assets/monitoring_grafana_dashboards_Performance_1.png rename to docs/blocks/monitoring/assets/dashboards_Performance_1.png diff --git a/docs/assets/monitoring_grafana_dashboards_Performance_2.png b/docs/blocks/monitoring/assets/dashboards_Performance_2.png similarity index 100% rename from docs/assets/monitoring_grafana_dashboards_Performance_2.png rename to docs/blocks/monitoring/assets/dashboards_Performance_2.png diff --git a/docs/assets/monitoring_grafana_folder.png b/docs/blocks/monitoring/assets/folder.png similarity index 100% rename from docs/assets/monitoring_grafana_folder.png rename to docs/blocks/monitoring/assets/folder.png diff --git a/docs/blocks/monitoring/configuration.md b/docs/blocks/monitoring/configuration.md new file mode 100644 index 0000000..357eba6 --- /dev/null +++ b/docs/blocks/monitoring/configuration.md @@ -0,0 +1,64 @@ +# Configuration {#blocks-monitoring-configuration} + +```nix +shb.monitoring = { + enable = true; + subdomain = "grafana"; + inherit domain; + contactPoints = [ "me@example.com" ]; + adminPasswordFile = config.sops.secrets."monitoring/admin_password".path; + secretKeyFile = config.sops.secrets."monitoring/secret_key".path; +}; + +sops.secrets."monitoring/admin_password" = { + sopsFile = ./secrets.yaml; + mode = "0400"; + owner = "grafana"; + group = "grafana"; + restartUnits = [ "grafana.service" ]; +}; +sops.secrets."monitoring/secret_key" = { + sopsFile = ./secrets.yaml; + mode = "0400"; + owner = "grafana"; + group = "grafana"; + restartUnits = [ "grafana.service" ]; +}; +``` + +With that, Grafana, Prometheus, Loki and Promtail are setup! You can access `Grafana` at +`grafana.example.com` with user `admin` and password ``. + +I recommend adding a STMP server configuration so you receive alerts by email: + +```nix +shb.monitoring.smtp = { + from_address = "grafana@$example.com"; + from_name = "Grafana"; + host = "smtp.mailgun.org"; + port = 587; + username = "postmaster@mg.example.com"; + passwordFile = config.sops.secrets."monitoring/smtp".path; +}; + +sops.secrets."monitoring/secret_key" = { + sopsFile = ./secrets.yaml; + mode = "0400"; + owner = "grafana"; + group = "grafana"; + restartUnits = [ "grafana.service" ]; +}; +``` + +Since all logs are now stored in Loki, you can probably reduce the systemd journal retention +time with: + +```nix +# See https://www.freedesktop.org/software/systemd/man/journald.conf.html#SystemMaxUse= +services.journald.extraConfig = '' +SystemMaxUse=2G +SystemKeepFree=4G +SystemMaxFileSize=100M +MaxFileSec=day +''; +``` diff --git a/docs/blocks/monitoring/dashboard-errors.md b/docs/blocks/monitoring/dashboard-errors.md new file mode 100644 index 0000000..20438da --- /dev/null +++ b/docs/blocks/monitoring/dashboard-errors.md @@ -0,0 +1,9 @@ +# Errors Dashboard {#blocks-monitoring-error-dashboard} + +This dashboard is meant to be the first stop to understand why a service is misbehaving. + +![](./assets/dashboards_Errors_1.png) +![](./assets/dashboards_Errors_2.png) + +The yellow and red dashed vertical bars correspond to the [Requests Error Budget +Alert](#blocks-monitoring-budget-alerts) firing. diff --git a/docs/blocks/monitoring/dashboard-performance.md b/docs/blocks/monitoring/dashboard-performance.md new file mode 100644 index 0000000..e2e6557 --- /dev/null +++ b/docs/blocks/monitoring/dashboard-performance.md @@ -0,0 +1,6 @@ +# Performance Dashboard {#blocks-monitoring-performance-dashboard} + +This dashboard is meant to be the first stop to understand why a service is performing poorly. + +![Performance Dashboard Top Part](./assets/dashboards_Performance_1.png) +![Performance Dashboard Bottom Part](./assets/dashboards_Performance_2.png) diff --git a/docs/blocks/monitoring/default.md b/docs/blocks/monitoring/default.md new file mode 100644 index 0000000..80b9e6f --- /dev/null +++ b/docs/blocks/monitoring/default.md @@ -0,0 +1,17 @@ +# Monitoring Block {#blocks-monitoring} + +Defined in [`/modules/blocks/monitoring.nix`](@REPO@/modules/blocks/monitoring.nix). + +This block sets up the monitoring stack for Self Host Blocks. It is composed of: + +- Grafana as the dashboard frontend. +- Prometheus as the database for metrics. +- Loki as the database for logs. + +```{=include=} parts +configuration.md +provisioning.md +dashboard-errors.md +dashboard-performance.md +alerts-requests-error-budger.md +``` diff --git a/docs/blocks/monitoring/provisioning.md b/docs/blocks/monitoring/provisioning.md new file mode 100644 index 0000000..6a0a4e2 --- /dev/null +++ b/docs/blocks/monitoring/provisioning.md @@ -0,0 +1,20 @@ +# Provisioning {#blocks-monitoring-provisioning} + +Self Host Blocks will create automatically the following resources: + +- For Grafana: + - datasources + - dashboards + - contact points + - notification policies + - alerts +- For Prometheus, the following exporters and related scrapers: + - node + - smartctl + - nginx +- For Loki, the following exporters and related scrapers: + - systemd + +Those resources are namespaced as appropriate under the Self Host Blocks namespace: + +[](./assets/folder.png) diff --git a/docs/manual.md b/docs/manual.md index 9260884..506c909 100644 --- a/docs/manual.md +++ b/docs/manual.md @@ -7,6 +7,10 @@ preface.md ``` +```{=include=} parts html:into-file=//blocks-monitoring.html +blocks/monitoring/default.md +``` + ```{=include=} appendix html:into-file=//options.html options.md ```