totoro: tweak alertmanager setup
This commit is contained in:
parent
ed03e709c5
commit
ff0eff593d
1 changed files with 4 additions and 2 deletions
|
@ -270,6 +270,7 @@ in {
|
|||
services.prometheus = {
|
||||
enable = true;
|
||||
stateDir = "export/monitoring/prometheus";
|
||||
webExternalUrl = "https://prometheus.int.lukegb.com";
|
||||
alertmanagers = [{
|
||||
scheme = "http";
|
||||
static_configs = [{
|
||||
|
@ -373,13 +374,13 @@ in {
|
|||
|
||||
# Packet loss
|
||||
- alert: SmokepingAveragePacketLossHigh
|
||||
expr: (avg((rate(smokeping_requests_total{host=~"(([a-z0-9]+.)+[a-z]+|([0-9]+.){3}[0-9]+)"}[5m]) - rate(smokeping_response_duration_seconds_count[5m])) / rate(smokeping_requests_total[5m])) by (system)) >= 0.01
|
||||
expr: sum(clamp((rate(smokeping_requests_total{host=~"(([a-z0-9]+.)+[a-z]+|([0-9]+.){3}[0-9]+)"}[5m]) - rate(smokeping_response_duration_seconds_count[5m])) / rate(smokeping_requests_total[5m]) > 0.01, 1, 1)) by (system) > sum(clamp(smokeping_requests_total{host=~"(([a-z0-9]+.)+[a-z]+|([0-9]+.){3}[0-9]+)"}, 1, 1)) by (system) * 0.4
|
||||
for: 10m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
summary: "Average packet loss from {{ $labels.system }} high"
|
||||
description: "The average packet loss from {{ $labels.system }} is {{ $value | humanizePercentage }}%, which is too high."
|
||||
description: "Too many endpoints are failing packet loss checks from {{ $labels.system }} ({{ $value }} targets)."
|
||||
- alert: SmokepingPacketLossVeryHigh
|
||||
expr: ((rate(smokeping_requests_total{host=~"(([a-z0-9]+.)+[a-z]+|([0-9]+.){3}[0-9]+)"}[5m]) - rate(smokeping_response_duration_seconds_count[5m])) / rate(smokeping_requests_total[5m])) >= 0.10
|
||||
for: 10m
|
||||
|
@ -413,6 +414,7 @@ in {
|
|||
|
||||
alertmanager = {
|
||||
enable = true;
|
||||
webExternalUrl = "https://alertmanager.int.lukegb.com";
|
||||
configuration = {
|
||||
global = {};
|
||||
route = {
|
||||
|
|
Loading…
Reference in a new issue