120 lines
4.2 KiB
Nix
120 lines
4.2 KiB
Nix
import ../make-test-python.nix (
|
|
{ lib, pkgs, ... }:
|
|
|
|
{
|
|
name = "prometheus-config-reload";
|
|
|
|
nodes = {
|
|
prometheus =
|
|
{ config, pkgs, ... }:
|
|
{
|
|
environment.systemPackages = [ pkgs.jq ];
|
|
|
|
networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
|
|
|
|
services.prometheus = {
|
|
enable = true;
|
|
enableReload = true;
|
|
globalConfig.scrape_interval = "2s";
|
|
scrapeConfigs = [
|
|
{
|
|
job_name = "prometheus";
|
|
static_configs = [
|
|
{
|
|
targets = [
|
|
"prometheus:${toString config.services.prometheus.port}"
|
|
];
|
|
}
|
|
];
|
|
}
|
|
];
|
|
};
|
|
|
|
specialisation = {
|
|
"prometheus-config-change" = {
|
|
configuration = {
|
|
environment.systemPackages = [ pkgs.yq ];
|
|
|
|
# This configuration just adds a new prometheus job
|
|
# to scrape the node_exporter metrics of the s3 machine.
|
|
services.prometheus = {
|
|
scrapeConfigs = [
|
|
{
|
|
job_name = "node";
|
|
static_configs = [
|
|
{
|
|
targets = [ "node:${toString config.services.prometheus.exporters.node.port}" ];
|
|
}
|
|
];
|
|
}
|
|
];
|
|
};
|
|
};
|
|
};
|
|
};
|
|
};
|
|
};
|
|
|
|
testScript = ''
|
|
prometheus.wait_for_unit("prometheus")
|
|
prometheus.wait_for_open_port(9090)
|
|
|
|
# Check if switching to a NixOS configuration that changes the prometheus
|
|
# configuration reloads (instead of restarts) prometheus before the switch
|
|
# finishes successfully:
|
|
with subtest("config change reloads prometheus"):
|
|
import json
|
|
# We check if prometheus has finished reloading by looking for the message
|
|
# "Completed loading of configuration file" in the journal between the start
|
|
# and finish of switching to the new NixOS configuration.
|
|
#
|
|
# To mark the start we record the journal cursor before starting the switch:
|
|
cursor_before_switching = json.loads(
|
|
prometheus.succeed("journalctl -n1 -o json --output-fields=__CURSOR")
|
|
)["__CURSOR"]
|
|
|
|
# Now we switch:
|
|
prometheus_config_change = prometheus.succeed(
|
|
"readlink /run/current-system/specialisation/prometheus-config-change"
|
|
).strip()
|
|
prometheus.succeed(prometheus_config_change + "/bin/switch-to-configuration test")
|
|
|
|
# Next we retrieve all logs since the start of switching:
|
|
logs_after_starting_switching = prometheus.succeed(
|
|
"""
|
|
journalctl --after-cursor='{cursor_before_switching}' -o json --output-fields=MESSAGE
|
|
""".format(
|
|
cursor_before_switching=cursor_before_switching
|
|
)
|
|
)
|
|
|
|
# Finally we check if the message "Completed loading of configuration file"
|
|
# occurs before the "finished switching to system configuration" message:
|
|
finished_switching_msg = (
|
|
"finished switching to system configuration " + prometheus_config_change
|
|
)
|
|
reloaded_before_switching_finished = False
|
|
finished_switching = False
|
|
for log_line in logs_after_starting_switching.split("\n"):
|
|
msg = json.loads(log_line)["MESSAGE"]
|
|
if "Completed loading of configuration file" in msg:
|
|
reloaded_before_switching_finished = True
|
|
if msg == finished_switching_msg:
|
|
finished_switching = True
|
|
break
|
|
|
|
assert reloaded_before_switching_finished
|
|
assert finished_switching
|
|
|
|
# Check if the reloaded config includes the new node job:
|
|
prometheus.succeed(
|
|
"""
|
|
curl -sf http://127.0.0.1:9090/api/v1/status/config \
|
|
| jq -r .data.yaml \
|
|
| yq '.scrape_configs | any(.job_name == "node")' \
|
|
| grep true
|
|
"""
|
|
)
|
|
'';
|
|
}
|
|
)
|