totoro: add alerts for smokeping

This commit is contained in:
Luke Granger-Brown 2021-07-13 00:55:53 +00:00
parent eea81a640e
commit 5fdf26f3e8

View file

@ -291,6 +291,34 @@ in {
annotations:
summary: "NixOS Channel {{ $labels.channel }} failing"
description: "The channel {{ $labels.channel }} is failing - see https://hydra.nixos.org/job/{{ $labels.project }}/{{ $labels.jobset }}/tested"
# Packet loss
- alert: SmokepingAveragePacketLossHigh
expr: (avg((rate(smokeping_requests_total{host=~"(([a-z0-9]+.)+[a-z]+|([0-9]+.){3}[0-9]+)"}[5m]) - rate(smokeping_response_duration_seconds_count[5m])) / rate(smokeping_requests_total[5m])) by (system)) >= 0.01
for: 10m
labels:
severity: page
annotations:
summary: "Average packet loss from {{ $labels.system }} high"
description: "The average packet loss from {{ $labels.system }} is {{ $value | humanize }}%, which is too high."
- alert: SmokepingPacketLossVeryHigh
expr: ((rate(smokeping_requests_total{host=~"(([a-z0-9]+.)+[a-z]+|([0-9]+.){3}[0-9]+)"}[5m]) - rate(smokeping_response_duration_seconds_count[5m])) / rate(smokeping_requests_total[5m])) >= 0.10
for: 10m
labels:
severity: page
annotations:
summary: "Packet loss to {{ $labels.host }} from {{ $labels.system }} high"
description: "The packet loss from {{ $labels.system }} to {{ $labels.host }} (IP: {{ $labels.ip }}) is very high ({{ $value | humanize }}%)."
# Ping latency
- alert: Smokeping95LatencyHigh
expr: histogram_quantile(0.95, sum(rate(smokeping_response_duration_seconds_bucket{host=~"^(1.1.1.1|8.8.8.8)$"}[5m])) by (le, host, system)) > 0.03
for: 15m
labels:
severity: page
annotations:
summary: "Ping latency from {{ $labels.system }} to {{ $labels.host }} high"
description: "The 95th-percentile ping latency from {{ $labels.system }} to {{ $labels.host }} is {{ $value }}."
''
];