From 5a3e072f3494a5e23b3e23cf706935d35928a3d5 Mon Sep 17 00:00:00 2001 From: ibizaman Date: Thu, 7 Dec 2023 10:04:48 -0800 Subject: [PATCH] use chapters instead of parts to allow sub-headings --- .../alerts-requests-error-budger.md | 7 -- docs/blocks/monitoring/configuration.md | 64 ---------- docs/blocks/monitoring/dashboard-errors.md | 9 -- .../monitoring/dashboard-performance.md | 6 - docs/blocks/monitoring/default.md | 115 +++++++++++++++++- docs/blocks/monitoring/provisioning.md | 20 --- docs/default.nix | 12 +- docs/manual.md | 10 +- docs/preface.md | 2 + 9 files changed, 118 insertions(+), 127 deletions(-) delete mode 100644 docs/blocks/monitoring/alerts-requests-error-budger.md delete mode 100644 docs/blocks/monitoring/configuration.md delete mode 100644 docs/blocks/monitoring/dashboard-errors.md delete mode 100644 docs/blocks/monitoring/dashboard-performance.md delete mode 100644 docs/blocks/monitoring/provisioning.md diff --git a/docs/blocks/monitoring/alerts-requests-error-budger.md b/docs/blocks/monitoring/alerts-requests-error-budger.md deleted file mode 100644 index fd199e0..0000000 --- a/docs/blocks/monitoring/alerts-requests-error-budger.md +++ /dev/null @@ -1,7 +0,0 @@ -# Requests Error Budget Alert {#blocks-monitoring-budget-alerts} - -This alert will fire when the ratio between number of requests getting a 5XX response from a service -and the total requests to that service exceeds 1%. - -![](./assets/alert_rules_5xx_1.png) -![](./assets/alert_rules_5xx_2.png) diff --git a/docs/blocks/monitoring/configuration.md b/docs/blocks/monitoring/configuration.md deleted file mode 100644 index 357eba6..0000000 --- a/docs/blocks/monitoring/configuration.md +++ /dev/null @@ -1,64 +0,0 @@ -# Configuration {#blocks-monitoring-configuration} - -```nix -shb.monitoring = { - enable = true; - subdomain = "grafana"; - inherit domain; - contactPoints = [ "me@example.com" ]; - adminPasswordFile = config.sops.secrets."monitoring/admin_password".path; - secretKeyFile = config.sops.secrets."monitoring/secret_key".path; -}; - -sops.secrets."monitoring/admin_password" = { - sopsFile = ./secrets.yaml; - mode = "0400"; - owner = "grafana"; - group = "grafana"; - restartUnits = [ "grafana.service" ]; -}; -sops.secrets."monitoring/secret_key" = { - sopsFile = ./secrets.yaml; - mode = "0400"; - owner = "grafana"; - group = "grafana"; - restartUnits = [ "grafana.service" ]; -}; -``` - -With that, Grafana, Prometheus, Loki and Promtail are setup! You can access `Grafana` at -`grafana.example.com` with user `admin` and password ``. - -I recommend adding a STMP server configuration so you receive alerts by email: - -```nix -shb.monitoring.smtp = { - from_address = "grafana@$example.com"; - from_name = "Grafana"; - host = "smtp.mailgun.org"; - port = 587; - username = "postmaster@mg.example.com"; - passwordFile = config.sops.secrets."monitoring/smtp".path; -}; - -sops.secrets."monitoring/secret_key" = { - sopsFile = ./secrets.yaml; - mode = "0400"; - owner = "grafana"; - group = "grafana"; - restartUnits = [ "grafana.service" ]; -}; -``` - -Since all logs are now stored in Loki, you can probably reduce the systemd journal retention -time with: - -```nix -# See https://www.freedesktop.org/software/systemd/man/journald.conf.html#SystemMaxUse= -services.journald.extraConfig = '' -SystemMaxUse=2G -SystemKeepFree=4G -SystemMaxFileSize=100M -MaxFileSec=day -''; -``` diff --git a/docs/blocks/monitoring/dashboard-errors.md b/docs/blocks/monitoring/dashboard-errors.md deleted file mode 100644 index 20438da..0000000 --- a/docs/blocks/monitoring/dashboard-errors.md +++ /dev/null @@ -1,9 +0,0 @@ -# Errors Dashboard {#blocks-monitoring-error-dashboard} - -This dashboard is meant to be the first stop to understand why a service is misbehaving. - -![](./assets/dashboards_Errors_1.png) -![](./assets/dashboards_Errors_2.png) - -The yellow and red dashed vertical bars correspond to the [Requests Error Budget -Alert](#blocks-monitoring-budget-alerts) firing. diff --git a/docs/blocks/monitoring/dashboard-performance.md b/docs/blocks/monitoring/dashboard-performance.md deleted file mode 100644 index e2e6557..0000000 --- a/docs/blocks/monitoring/dashboard-performance.md +++ /dev/null @@ -1,6 +0,0 @@ -# Performance Dashboard {#blocks-monitoring-performance-dashboard} - -This dashboard is meant to be the first stop to understand why a service is performing poorly. - -![Performance Dashboard Top Part](./assets/dashboards_Performance_1.png) -![Performance Dashboard Bottom Part](./assets/dashboards_Performance_2.png) diff --git a/docs/blocks/monitoring/default.md b/docs/blocks/monitoring/default.md index 80b9e6f..b23dd5c 100644 --- a/docs/blocks/monitoring/default.md +++ b/docs/blocks/monitoring/default.md @@ -8,10 +8,113 @@ This block sets up the monitoring stack for Self Host Blocks. It is composed of: - Prometheus as the database for metrics. - Loki as the database for logs. -```{=include=} parts -configuration.md -provisioning.md -dashboard-errors.md -dashboard-performance.md -alerts-requests-error-budger.md +## Configuration {#blocks-monitoring-configuration} + +```nix +shb.monitoring = { + enable = true; + subdomain = "grafana"; + inherit domain; + contactPoints = [ "me@example.com" ]; + adminPasswordFile = config.sops.secrets."monitoring/admin_password".path; + secretKeyFile = config.sops.secrets."monitoring/secret_key".path; +}; + +sops.secrets."monitoring/admin_password" = { + sopsFile = ./secrets.yaml; + mode = "0400"; + owner = "grafana"; + group = "grafana"; + restartUnits = [ "grafana.service" ]; +}; +sops.secrets."monitoring/secret_key" = { + sopsFile = ./secrets.yaml; + mode = "0400"; + owner = "grafana"; + group = "grafana"; + restartUnits = [ "grafana.service" ]; +}; ``` + +With that, Grafana, Prometheus, Loki and Promtail are setup! You can access `Grafana` at +`grafana.example.com` with user `admin` and password ``. + +I recommend adding a STMP server configuration so you receive alerts by email: + +```nix +shb.monitoring.smtp = { + from_address = "grafana@$example.com"; + from_name = "Grafana"; + host = "smtp.mailgun.org"; + port = 587; + username = "postmaster@mg.example.com"; + passwordFile = config.sops.secrets."monitoring/smtp".path; +}; + +sops.secrets."monitoring/secret_key" = { + sopsFile = ./secrets.yaml; + mode = "0400"; + owner = "grafana"; + group = "grafana"; + restartUnits = [ "grafana.service" ]; +}; +``` + +Since all logs are now stored in Loki, you can probably reduce the systemd journal retention +time with: + +```nix +# See https://www.freedesktop.org/software/systemd/man/journald.conf.html#SystemMaxUse= +services.journald.extraConfig = '' +SystemMaxUse=2G +SystemKeepFree=4G +SystemMaxFileSize=100M +MaxFileSec=day +''; +``` + +## Provisioning {#blocks-monitoring-provisioning} + +Self Host Blocks will create automatically the following resources: + +- For Grafana: + - datasources + - dashboards + - contact points + - notification policies + - alerts +- For Prometheus, the following exporters and related scrapers: + - node + - smartctl + - nginx +- For Loki, the following exporters and related scrapers: + - systemd + +Those resources are namespaced as appropriate under the Self Host Blocks namespace: + +[](./assets/folder.png) + +## Errors Dashboard {#blocks-monitoring-error-dashboard} + +This dashboard is meant to be the first stop to understand why a service is misbehaving. + +![](./assets/dashboards_Errors_1.png) +![](./assets/dashboards_Errors_2.png) + +The yellow and red dashed vertical bars correspond to the [Requests Error Budget +Alert](#blocks-monitoring-budget-alerts) firing. + +## Performance Dashboard {#blocks-monitoring-performance-dashboard} + +This dashboard is meant to be the first stop to understand why a service is performing poorly. + +![Performance Dashboard Top Part](./assets/dashboards_Performance_1.png) +![Performance Dashboard Bottom Part](./assets/dashboards_Performance_2.png) + +## Requests Error Budget Alert {#blocks-monitoring-budget-alerts} + +This alert will fire when the ratio between number of requests getting a 5XX response from a service +and the total requests to that service exceeds 1%. + +![](./assets/alert_rules_5xx_1.png) +![](./assets/alert_rules_5xx_2.png) diff --git a/docs/blocks/monitoring/provisioning.md b/docs/blocks/monitoring/provisioning.md deleted file mode 100644 index 6a0a4e2..0000000 --- a/docs/blocks/monitoring/provisioning.md +++ /dev/null @@ -1,20 +0,0 @@ -# Provisioning {#blocks-monitoring-provisioning} - -Self Host Blocks will create automatically the following resources: - -- For Grafana: - - datasources - - dashboards - - contact points - - notification policies - - alerts -- For Prometheus, the following exporters and related scrapers: - - node - - smartctl - - nginx -- For Loki, the following exporters and related scrapers: - - systemd - -Those resources are namespaced as appropriate under the Self Host Blocks namespace: - -[](./assets/folder.png) diff --git a/docs/default.nix b/docs/default.nix index d72d57e..4b91844 100644 --- a/docs/default.nix +++ b/docs/default.nix @@ -96,12 +96,12 @@ in stdenv.mkDerivation { ${optionsDocs.optionsJSON}/share/doc/nixos/options.json find . -name "*.md" -print0 | \ - while IFS= read -r -d ''' f; do - substituteInPlace "''${f}" \ - --replace \ - '@REPO@' \ - "${lib.debug.traceVal ghRoot}" - done + while IFS= read -r -d ''' f; do + substituteInPlace "''${f}" \ + --replace \ + '@REPO@' \ + "${ghRoot}" + done nixos-render-docs manual html \ --manpage-urls ${manpage-urls} \ diff --git a/docs/manual.md b/docs/manual.md index 506c909..786a76c 100644 --- a/docs/manual.md +++ b/docs/manual.md @@ -7,18 +7,10 @@ preface.md ``` -```{=include=} parts html:into-file=//blocks-monitoring.html +```{=include=} chapters html:into-file=//blocks-monitoring.html blocks/monitoring/default.md ``` ```{=include=} appendix html:into-file=//options.html options.md ``` - - - - - - - - diff --git a/docs/preface.md b/docs/preface.md index 5665f85..e95a099 100644 --- a/docs/preface.md +++ b/docs/preface.md @@ -1,6 +1,8 @@ # Preface {#preface} +::: {.note} This document is the complete manual for Self Host Blocks, the building blocks for self-hosting with battery included. +::: Self Host Blocks is hosted on [GitHub](https://github.com/ibizaman/selfhostblocks). If you encounter problems or bugs then please report them on the [issue