From 683e6ffc21df0ec257edecf172838635dbd41021 Mon Sep 17 00:00:00 2001 From: Luke Granger-Brown Date: Thu, 2 Sep 2021 18:35:18 +0000 Subject: [PATCH] totoro: add alert for BFD session failure --- ops/nixos/totoro/default.nix | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/ops/nixos/totoro/default.nix b/ops/nixos/totoro/default.nix index 3de2e54a2b..e3cfbc5dde 100644 --- a/ops/nixos/totoro/default.nix +++ b/ops/nixos/totoro/default.nix @@ -300,7 +300,7 @@ in { severity: page annotations: summary: "Average packet loss from {{ $labels.system }} high" - description: "The average packet loss from {{ $labels.system }} is {{ $value | humanize }}%, which is too high." + description: "The average packet loss from {{ $labels.system }} is {{ $value | humanizePercentage }}%, which is too high." - alert: SmokepingPacketLossVeryHigh expr: ((rate(smokeping_requests_total{host=~"(([a-z0-9]+.)+[a-z]+|([0-9]+.){3}[0-9]+)"}[5m]) - rate(smokeping_response_duration_seconds_count[5m])) / rate(smokeping_requests_total[5m])) >= 0.10 for: 10m @@ -308,7 +308,7 @@ in { severity: page annotations: summary: "Packet loss to {{ $labels.host }} from {{ $labels.system }} high" - description: "The packet loss from {{ $labels.system }} to {{ $labels.host }} (IP: {{ $labels.ip }}) is very high ({{ $value | humanize }}%)." + description: "The packet loss from {{ $labels.system }} to {{ $labels.host }} (IP: {{ $labels.ip }}) is very high ({{ $value | humanizePercentage }}%)." # Ping latency - alert: Smokeping95LatencyHigh @@ -319,6 +319,15 @@ in { annotations: summary: "Ping latency from {{ $labels.system }} to {{ $labels.host }} high" description: "The 95th-percentile ping latency from {{ $labels.system }} to {{ $labels.host }} is {{ $value }}." + + # Internet connectivity + - alert: MaldenRoadInternetConnectivityFailure + expr: sum(bird_bfd_session_state{state="Up"} * on(instance,name,neighbor_address,system) group_left(device) bird_bfd_session_device) by (instance,neighbor_address,device,state,system) < 1 + labels: + severity: page + annotations: + summary: "Device {{ $labels.device }} on {{ $labels.system }} reports BFD down to neighbour {{ $labels.neighbor_address }}" + description: "Ruh roh, Raggy" '' ];