totoro: add alerts for smokeping
This commit is contained in:
parent
eea81a640e
commit
5fdf26f3e8
1 changed files with 28 additions and 0 deletions
|
@ -291,6 +291,34 @@ in {
|
|||
annotations:
|
||||
summary: "NixOS Channel {{ $labels.channel }} failing"
|
||||
description: "The channel {{ $labels.channel }} is failing - see https://hydra.nixos.org/job/{{ $labels.project }}/{{ $labels.jobset }}/tested"
|
||||
|
||||
# Packet loss
|
||||
- alert: SmokepingAveragePacketLossHigh
|
||||
expr: (avg((rate(smokeping_requests_total{host=~"(([a-z0-9]+.)+[a-z]+|([0-9]+.){3}[0-9]+)"}[5m]) - rate(smokeping_response_duration_seconds_count[5m])) / rate(smokeping_requests_total[5m])) by (system)) >= 0.01
|
||||
for: 10m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
summary: "Average packet loss from {{ $labels.system }} high"
|
||||
description: "The average packet loss from {{ $labels.system }} is {{ $value | humanize }}%, which is too high."
|
||||
- alert: SmokepingPacketLossVeryHigh
|
||||
expr: ((rate(smokeping_requests_total{host=~"(([a-z0-9]+.)+[a-z]+|([0-9]+.){3}[0-9]+)"}[5m]) - rate(smokeping_response_duration_seconds_count[5m])) / rate(smokeping_requests_total[5m])) >= 0.10
|
||||
for: 10m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
summary: "Packet loss to {{ $labels.host }} from {{ $labels.system }} high"
|
||||
description: "The packet loss from {{ $labels.system }} to {{ $labels.host }} (IP: {{ $labels.ip }}) is very high ({{ $value | humanize }}%)."
|
||||
|
||||
# Ping latency
|
||||
- alert: Smokeping95LatencyHigh
|
||||
expr: histogram_quantile(0.95, sum(rate(smokeping_response_duration_seconds_bucket{host=~"^(1.1.1.1|8.8.8.8)$"}[5m])) by (le, host, system)) > 0.03
|
||||
for: 15m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
summary: "Ping latency from {{ $labels.system }} to {{ $labels.host }} high"
|
||||
description: "The 95th-percentile ping latency from {{ $labels.system }} to {{ $labels.host }} is {{ $value }}."
|
||||
''
|
||||
];
|
||||
|
||||
|
|
Loading…
Reference in a new issue