totoro: move all rules into a single group

This commit is contained in:
Luke Granger-Brown 2021-04-20 14:35:23 +00:00
parent eb9b1a43c0
commit 6cba0be3b5

View file

@ -237,8 +237,10 @@ in {
rules = [ rules = [
'' ''
groups: groups:
- name: blade-oa - name: alerting
rules: rules:
# Blade power
- alert: AveragePowerUsageTooHigh - alert: AveragePowerUsageTooHigh
expr: (sum(avg_over_time(cpqRackPowerSupplyCurPwrOutput{job="blade-oa/snmp"}[10m])) / 230) > 6.5 expr: (sum(avg_over_time(cpqRackPowerSupplyCurPwrOutput{job="blade-oa/snmp"}[10m])) / 230) > 6.5
labels: labels:
@ -262,8 +264,8 @@ in {
annotations: annotations:
summary: "Blade: Single Blade Power Usage Out of Bounds" summary: "Blade: Single Blade Power Usage Out of Bounds"
description: "{{ $labels.system }} has power usage of {{ $value }}, which is out of expected bounds." description: "{{ $labels.system }} has power usage of {{ $value }}, which is out of expected bounds."
- name: availability
rules: # Systems
- alert: NodeExporterDown - alert: NodeExporterDown
expr: up{exporter="node", system=~"(blade-(tuvok|paris|janeway|torres)|kusakabe|marukuru|swann|totoro|clouvider-.*|etheroute-.*)"} < 1 expr: up{exporter="node", system=~"(blade-(tuvok|paris|janeway|torres)|kusakabe|marukuru|swann|totoro|clouvider-.*|etheroute-.*)"} < 1
for: 30m for: 30m
@ -272,8 +274,8 @@ in {
annotations: annotations:
summary: "Node exporter no longer scrapable" summary: "Node exporter no longer scrapable"
description: "{{ $labels.system }} is not reachable from totoro." description: "{{ $labels.system }} is not reachable from totoro."
- name: nixos
rules: # Alert if the NixOS channels are broken
- alert: NixOSChannelBad - alert: NixOSChannelBad
expr: hydra_job_failed{} == 1 expr: hydra_job_failed{} == 1
for: 30m for: 30m