1
0
Fork 0
selfhostblocks/modules/blocks/monitoring.nix
Pierre Penninckx 222dfa755c
Revert to loki v2 (#230)
This is needed because v3 requires manual intervention to upgrade
otherwise Loki refuses to start. So until there's a fix, reverting is
the easiest fix.
2024-04-21 23:07:32 -07:00

520 lines
16 KiB
Nix

{ config, options, pkgs, lib, ... }:
let
cfg = config.shb.monitoring;
contracts = pkgs.callPackage ../contracts {};
fqdn = "${cfg.subdomain}.${cfg.domain}";
in
{
options.shb.monitoring = {
enable = lib.mkEnableOption "selfhostblocks.monitoring";
subdomain = lib.mkOption {
type = lib.types.str;
description = "Subdomain under which home-assistant will be served.";
example = "grafana";
};
domain = lib.mkOption {
type = lib.types.str;
description = "domain under which home-assistant will be served.";
example = "mydomain.com";
};
ssl = lib.mkOption {
description = "Path to SSL files";
type = lib.types.nullOr contracts.ssl.certs;
default = null;
};
grafanaPort = lib.mkOption {
type = lib.types.port;
description = "Port where Grafana listens to HTTP requests.";
default = 3000;
};
prometheusPort = lib.mkOption {
type = lib.types.port;
description = "Port where Prometheus listens to HTTP requests.";
default = 3001;
};
lokiPort = lib.mkOption {
type = lib.types.port;
description = "Port where Loki listens to HTTP requests.";
default = 3002;
};
lokiMajorVersion = lib.mkOption {
type = lib.types.enum [ 2 3 ];
description = ''
Switching from version 2 to 3 requires manual intervention
https://grafana.com/docs/loki/latest/setup/upgrade/#main--unreleased. So this let's the user
upgrade at their own pace.
'';
default = 2;
};
debugLog = lib.mkOption {
type = lib.types.bool;
description = "Set to true to enable debug logging of the infrastructure serving Grafana.";
default = false;
example = true;
};
orgId = lib.mkOption {
type = lib.types.int;
description = "Org ID where all self host blocks related config will be stored.";
default = 1;
};
provisionDashboards = lib.mkOption {
type = lib.types.bool;
description = "Provision Self Host Blocks dashboards under 'Self Host Blocks' folder.";
default = true;
};
contactPoints = lib.mkOption {
type = lib.types.listOf lib.types.str;
description = "List of email addresses to send alerts to";
default = [];
};
adminPasswordFile = lib.mkOption {
type = lib.types.path;
description = "File containing the initial admin password.";
};
secretKeyFile = lib.mkOption {
type = lib.types.path;
description = "File containing the secret key used for signing.";
};
smtp = lib.mkOption {
description = "SMTP options.";
default = null;
type = lib.types.nullOr (lib.types.submodule {
options = {
from_address = lib.mkOption {
type = lib.types.str;
description = "SMTP address from which the emails originate.";
example = "vaultwarden@mydomain.com";
};
from_name = lib.mkOption {
type = lib.types.str;
description = "SMTP name from which the emails originate.";
default = "Vaultwarden";
};
host = lib.mkOption {
type = lib.types.str;
description = "SMTP host to send the emails to.";
};
port = lib.mkOption {
type = lib.types.port;
description = "SMTP port to send the emails to.";
default = 25;
};
username = lib.mkOption {
type = lib.types.str;
description = "Username to connect to the SMTP host.";
};
passwordFile = lib.mkOption {
type = lib.types.str;
description = "File containing the password to connect to the SMTP host.";
};
};
});
};
};
config = lib.mkIf cfg.enable {
assertions = [
{
assertion = (!(isNull cfg.smtp)) -> builtins.length cfg.contactPoints > 0;
message = "Must have at least one contact point for alerting";
}
];
shb.postgresql.ensures = [
{
username = "grafana";
database = "grafana";
}
];
services.grafana = {
enable = true;
settings = {
database = {
host = "/run/postgresql";
user = "grafana";
name = "grafana";
type = "postgres";
# Uses peer auth for local users, so we don't need a password.
# Here's the syntax anyway for future refence:
# password = "$__file{/run/secrets/homeassistant/dbpass}";
};
security = {
secret_key = "$__file{${cfg.secretKeyFile}}";
disable_initial_admin_creation = false; # Enable when LDAP support is configured.
admin_password = "$__file{${cfg.adminPasswordFile}}"; # Remove when LDAP support is configured.
};
server = {
http_addr = "127.0.0.1";
http_port = cfg.grafanaPort;
domain = fqdn;
root_url = "https://${fqdn}";
router_logging = cfg.debugLog;
};
smtp = lib.mkIf (!(isNull cfg.smtp)) {
enabled = true;
inherit (cfg.smtp) from_address from_name;
host = "${cfg.smtp.host}:${toString cfg.smtp.port}";
user = cfg.smtp.username;
password = "$__file{${cfg.smtp.passwordFile}}";
};
};
};
services.grafana.provision = {
dashboards.settings = lib.mkIf cfg.provisionDashboards {
apiVersion = 1;
providers = [{
folder = "Self Host Blocks";
options.path = ./monitoring/dashboards;
allowUiUpdates = true;
disableDeletion = true;
}];
};
datasources.settings = {
apiVersion = 1;
datasources = [
{
inherit (cfg) orgId;
name = "Prometheus";
type = "prometheus";
url = "http://127.0.0.1:${toString config.services.prometheus.port}";
uid = "df80f9f5-97d7-4112-91d8-72f523a02b09";
isDefault = true;
version = 1;
}
{
inherit (cfg) orgId;
name = "Loki";
type = "loki";
url = "http://127.0.0.1:${toString config.services.loki.configuration.server.http_listen_port}";
uid = "cd6cc53e-840c-484d-85f7-96fede324006";
version = 1;
}
];
deleteDatasources = [
{
inherit (cfg) orgId;
name = "Prometheus";
}
{
inherit (cfg) orgId;
name = "Loki";
}
];
};
alerting.contactPoints.settings = {
apiVersion = 1;
contactPoints = [{
inherit (cfg) orgId;
name = "grafana-default-email";
receivers = lib.optionals ((builtins.length cfg.contactPoints) > 0) [{
uid = "sysadmin";
type = "email";
settings.addresses = lib.concatStringsSep ";" cfg.contactPoints;
}];
}];
};
alerting.policies.settings = {
apiVersion = 1;
policies = [{
inherit (cfg) orgId;
receiver = "grafana-default-email";
group_by = [ "grafana_folder" "alertname" ];
group_wait = "30s";
group_interval = "5m";
repeat_interval = "4h";
}];
# resetPolicies seems to happen after setting the above policies, effectively rolling back
# any updates.
};
alerting.rules.settings =
let
rules = builtins.fromJSON (builtins.readFile ./monitoring/rules.json);
ruleIds = map (r: r.uid) rules;
in
{
apiVersion = 1;
groups = [{
inherit (cfg) orgId;
name = "SysAdmin";
folder = "Self Host Blocks";
interval = "10m";
inherit rules;
}];
# deleteRules seems to happen after creating the above rules, effectively rolling back
# any updates.
};
};
services.prometheus = {
enable = true;
port = cfg.prometheusPort;
};
services.loki = {
enable = true;
dataDir = "/var/lib/loki";
package = if cfg.lokiMajorVersion == 3 then pkgs.grafana-loki else
# Comes from https://github.com/NixOS/nixpkgs/commit/8f95320f39d7e4e4a29ee70b8718974295a619f4
(pkgs.grafana-loki.overrideAttrs (finalAttrs: previousAttrs: rec {
version = "2.9.6";
src = pkgs.fetchFromGitHub {
owner = "grafana";
repo = "loki";
rev = "v${version}";
hash = "sha256-79hK7axHf6soku5DvdXkE/0K4WKc4pnS9VMbVc1FS2I=";
};
ldflags = let t = "github.com/grafana/loki/pkg/util/build"; in [
"-s"
"-w"
"-X ${t}.Version=${version}"
"-X ${t}.BuildUser=nix@nixpkgs"
"-X ${t}.BuildDate=unknown"
"-X ${t}.Branch=unknown"
"-X ${t}.Revision=unknown"
];
}));
configuration = {
auth_enabled = false;
server.http_listen_port = cfg.lokiPort;
ingester = {
lifecycler = {
address = "127.0.0.1";
ring = {
kvstore.store = "inmemory";
replication_factor = 1;
};
final_sleep = "0s";
};
chunk_idle_period = "5m";
chunk_retain_period = "30s";
};
schema_config = {
configs = [
{
from = "2018-04-15";
store = "boltdb";
object_store = "filesystem";
schema = "v9";
index.prefix = "index_";
index.period = "168h";
}
];
};
storage_config = {
boltdb.directory = "/tmp/loki/index";
filesystem.directory = "/tmp/loki/chunks";
};
limits_config = {
enforce_metric_name = false;
reject_old_samples = true;
reject_old_samples_max_age = "168h";
};
chunk_store_config = {
max_look_back_period = 0;
};
table_manager = {
chunk_tables_provisioning = {
inactive_read_throughput = 0;
inactive_write_throughput = 0;
provisioned_read_throughput = 0;
provisioned_write_throughput = 0;
};
index_tables_provisioning = {
inactive_read_throughput = 0;
inactive_write_throughput = 0;
provisioned_read_throughput = 0;
provisioned_write_throughput = 0;
};
retention_deletes_enabled = false;
retention_period = 0;
};
};
};
services.promtail = {
enable = true;
configuration = {
server = {
http_listen_port = 9080;
grpc_listen_port = 0;
};
positions.filename = "/tmp/positions.yaml";
client.url = "http://localhost:${toString config.services.loki.configuration.server.http_listen_port}/api/prom/push";
scrape_configs = [
{
job_name = "systemd";
journal = {
json = false;
max_age = "12h";
path = "/var/log/journal";
# matches = "_TRANSPORT=kernel";
labels = {
job = "systemd-journal";
};
};
relabel_configs = [
{
source_labels = [ "__journal__systemd_unit" ];
target_label = "unit";
}
];
}
];
};
};
services.nginx = {
enable = true;
virtualHosts.${fqdn} = {
forceSSL = !(isNull cfg.ssl);
sslCertificate = lib.mkIf (!(isNull cfg.ssl)) cfg.ssl.paths.cert;
sslCertificateKey = lib.mkIf (!(isNull cfg.ssl)) cfg.ssl.paths.key;
locations."/" = {
proxyPass = "http://${toString config.services.grafana.settings.server.http_addr}:${toString config.services.grafana.settings.server.http_port}";
proxyWebsockets = true;
extraConfig = ''
proxy_set_header Host $host;
'';
};
};
};
services.prometheus.scrapeConfigs = [
{
job_name = "node";
static_configs = [{
targets = ["127.0.0.1:${toString config.services.prometheus.exporters.node.port}"];
}];
}
{
job_name = "netdata";
metrics_path = "/api/v1/allmetrics";
params.format = [ "prometheus" ];
honor_labels = true;
static_configs = [{
targets = [ "127.0.0.1:19999" ];
}];
}
{
job_name = "smartctl";
static_configs = [{
targets = ["127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}"];
}];
}
{
job_name = "prometheus_internal";
static_configs = [{
targets = ["127.0.0.1:${toString config.services.prometheus.port}"];
}];
}
] ++ (lib.lists.optional config.services.nginx.enable {
job_name = "nginx";
static_configs = [{
targets = ["127.0.0.1:${toString config.services.prometheus.exporters.nginx.port}"];
}];
# }) ++ (lib.optional (builtins.length (lib.attrNames config.services.redis.servers) > 0) {
# job_name = "redis";
# static_configs = [
# {
# targets = ["127.0.0.1:${toString config.services.prometheus.exporters.redis.port}"];
# }
# ];
# }) ++ (lib.optional (builtins.length (lib.attrNames config.services.openvpn.servers) > 0) {
# job_name = "openvpn";
# static_configs = [
# {
# targets = ["127.0.0.1:${toString config.services.prometheus.exporters.openvpn.port}"];
# }
# ];
}) ++ (lib.optional config.services.dnsmasq.enable {
job_name = "dnsmasq";
static_configs = [{
targets = ["127.0.0.1:${toString config.services.prometheus.exporters.dnsmasq.port}"];
}];
});
services.prometheus.exporters.nginx = lib.mkIf config.services.nginx.enable {
enable = true;
port = 9111;
listenAddress = "127.0.0.1";
scrapeUri = "http://localhost:80/nginx_status";
};
services.prometheus.exporters.node = {
enable = true;
# https://github.com/prometheus/node_exporter#collectors
enabledCollectors = ["ethtool"];
port = 9112;
listenAddress = "127.0.0.1";
};
services.prometheus.exporters.smartctl = {
enable = true;
port = 9115;
listenAddress = "127.0.0.1";
};
# services.prometheus.exporters.redis = lib.mkIf (builtins.length (lib.attrNames config.services.redis.servers) > 0) {
# enable = true;
# port = 9119;
# listenAddress = "127.0.0.1";
# };
# services.prometheus.exporters.openvpn = lib.mkIf (builtins.length (lib.attrNames config.services.openvpn.servers) > 0) {
# enable = true;
# port = 9121;
# listenAddress = "127.0.0.1";
# statusPaths = lib.mapAttrsToList (name: _config: "/tmp/openvpn/${name}.status") config.services.openvpn.servers;
# };
services.prometheus.exporters.dnsmasq = lib.mkIf config.services.dnsmasq.enable {
enable = true;
port = 9211;
listenAddress = "127.0.0.1";
};
services.nginx.statusPage = lib.mkDefault config.services.nginx.enable;
services.netdata = {
enable = true;
config = {
# web.mode = "none";
# web."bind to" = "127.0.0.1:19999";
global = {
"debug log" = "syslog";
"access log" = "syslog";
"error log" = "syslog";
};
};
};
};
}