From 3c48f56f6ea443fd64660a1c75ca5dd9b6652016 Mon Sep 17 00:00:00 2001 From: Luke Granger-Brown Date: Tue, 20 Apr 2021 14:00:17 +0000 Subject: [PATCH] totoro: track NixOS channels in my local Prometheus --- ops/nixos/totoro/default.nix | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/ops/nixos/totoro/default.nix b/ops/nixos/totoro/default.nix index 4b0e12c641..2cf4756126 100644 --- a/ops/nixos/totoro/default.nix +++ b/ops/nixos/totoro/default.nix @@ -196,6 +196,7 @@ in { targets = ["localhost:${toString config.services.prometheus.alertmanager.port}"]; }]; }]; + globalConfig.scrape_interval = "15s"; scrapeConfigs = (builtins.attrValues depot.ops.nixos.systemExporters) ++ [{ job_name = "blade-oa/snmp"; metrics_path = "/snmp"; @@ -215,6 +216,20 @@ in { target_label = "__address__"; replacement = "totoro:${toString config.services.prometheus.exporters.snmp.port}"; }]; + } { + job_name = "nixos/prometheus"; + metrics_path = "/prometheus/federate"; + honor_labels = true; + params = { + "match[]" = [ + ''hydra_job_failed{current="1"}'' + ''hydra_job_completion_time{current="1"}'' + ]; + }; + scheme = "https"; + static_configs = [{ + targets = ["monitoring.nixos.org:443"]; + }]; }]; pushgateway.enable = true; @@ -257,6 +272,16 @@ in { annotations: summary: "Node exporter no longer scrapable" description: "{{ $labels.system }} is not reachable from totoro." + - name: nixos + rules: + - alert: NixOSChannelBad + expr: hydra_job_failed{} == 1 + for: 30m + labels: + severity: email + annotations: + summary: "NixOS Channel {{ $labels.channel }} failing" + description: "The channel {{ $labels.channel }} is failing - see https://hydra.nixos.org/job/{{ $labels.project }}/{{ $labels.jobset }}/tested" '' ];