381 lines
11 KiB
Nix
381 lines
11 KiB
Nix
# SPDX-FileCopyrightText: 2020 Luke Granger-Brown <depot@lukegb.com>
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
{ depot, lib, pkgs, rebuilder, config, ... }:
|
|
let
|
|
inherit (depot.ops) secrets;
|
|
in {
|
|
imports = [
|
|
../../../third_party/nixpkgs/nixos/modules/installer/scan/not-detected.nix
|
|
../lib/client.nix
|
|
../lib/whitby-distributed.nix
|
|
../lib/twitternuke.nix
|
|
../lib/quotes.bfob.gg.nix
|
|
];
|
|
|
|
boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usb_storage" "usbhid" "sd_mod" ];
|
|
boot.kernelModules = lib.mkAfter [ "kvm-intel" ];
|
|
boot.kernelParams = [ "mitigations=off" ];
|
|
|
|
fileSystems = let
|
|
zfs = device: {
|
|
device = device;
|
|
fsType = "zfs";
|
|
};
|
|
in {
|
|
"/" = zfs "zboot/safe/root";
|
|
"/nix" = zfs "zboot/local/nix";
|
|
|
|
"/home" = zfs "tank/safe/home";
|
|
"/export" = zfs "tank/safe/export";
|
|
"/srv" = zfs "tank/safe/srv";
|
|
"/srv/pancake" = zfs "tank/safe/srv/pancake";
|
|
|
|
"/persist" = zfs "tank/safe/persist";
|
|
"/store" = zfs "tank/local/store";
|
|
|
|
"/boot" = {
|
|
device = "/dev/disk/by-uuid/D178-4E19";
|
|
fsType = "vfat";
|
|
};
|
|
};
|
|
|
|
# Use the systemd-boot EFI boot loader.
|
|
boot.loader.systemd-boot.enable = true;
|
|
boot.loader.efi.canTouchEfiVariables = true;
|
|
|
|
services.postgresql.package = pkgs.postgresql_13;
|
|
|
|
nix.maxJobs = lib.mkDefault 8;
|
|
powerManagement.cpuFreqGovernor = lib.mkDefault "performance";
|
|
virtualisation = {
|
|
podman.enable = true;
|
|
};
|
|
|
|
# Extra packages.
|
|
environment.systemPackages = with pkgs; [
|
|
(depot.nix.pkgs.secretsync.configure {
|
|
workingDir = "/home/lukegb/depot";
|
|
gitlabAccessToken = secrets.deployer.gitlabAccessToken;
|
|
manifestVariable = "SECRETS_MANIFEST";
|
|
variablesToFile = {
|
|
"OPS_SECRETS_DEFAULT_NIX" = "ops/secrets/default.nix";
|
|
};
|
|
})
|
|
];
|
|
|
|
# Networking!
|
|
networking = {
|
|
hostName = "totoro"; # Define your hostname.
|
|
domain = "int.as205479.net";
|
|
hostId = "676c08c4";
|
|
useDHCP = false;
|
|
interfaces.br-ext.useDHCP = true;
|
|
bridges.br-ext.interfaces = [ "enp0s31f6" ];
|
|
|
|
interfaces.br-int = {
|
|
virtual = true;
|
|
useDHCP = false;
|
|
ipv4.addresses = [{ address = "10.0.0.2"; prefixLength = 24; }];
|
|
};
|
|
bridges.br-int.interfaces = [];
|
|
firewall.allowedTCPPorts = [
|
|
80 443 # web
|
|
4001 # ipfs
|
|
];
|
|
firewall.allowedUDPPorts = [
|
|
4001 # ipfs
|
|
];
|
|
};
|
|
my.ip.tailscale = "100.122.86.11";
|
|
|
|
# Virtualisation
|
|
virtualisation.libvirtd = {
|
|
enable = true;
|
|
allowedBridges = [ "virbr0" "br-ext" ];
|
|
};
|
|
users.users.lukegb = {
|
|
packages = with depot.pkgs; [ irssi ];
|
|
extraGroups = lib.mkAfter [ "libvirtd" ];
|
|
};
|
|
users.users.pancake = {
|
|
isSystemUser = true;
|
|
group = "pancake";
|
|
home = "/srv/pancake";
|
|
};
|
|
users.users.nginx.extraGroups = lib.mkAfter [ "acme" ];
|
|
users.groups.pancake = {
|
|
members = ["pancake" "nginx"];
|
|
};
|
|
|
|
systemd.tmpfiles.rules = [
|
|
"L /var/lib/export - - - - /export"
|
|
];
|
|
|
|
services.nginx = {
|
|
enable = true;
|
|
virtualHosts = {
|
|
"invoices.lukegb.com" = let
|
|
fastcgi = {
|
|
extraConfig = ''
|
|
rewrite ^(.*)$ /index.php break;
|
|
fastcgi_split_path_info ^(.+\.php)(/.+)$;
|
|
fastcgi_index index.php;
|
|
fastcgi_pass unix:${config.services.phpfpm.pools.pancake.socket};
|
|
include ${pkgs.nginx}/conf/fastcgi_params;
|
|
include ${pkgs.nginx}/conf/fastcgi.conf;
|
|
'';
|
|
};
|
|
in {
|
|
root = "/srv/pancake/public_html";
|
|
useACMEHost = "invoices.lukegb.com";
|
|
forceSSL = true;
|
|
locations."/" = {
|
|
tryFiles = "$uri $uri/ @router";
|
|
index = "index.html index.php";
|
|
extraConfig = ''
|
|
error_page 403 = @router;
|
|
error_page 404 = @router;
|
|
'';
|
|
};
|
|
locations."~ (.php|\\/[^./]+)$" = fastcgi;
|
|
locations."@router" = fastcgi;
|
|
};
|
|
};
|
|
};
|
|
services.phpfpm = let settingsBase = {
|
|
"listen.owner" = config.services.nginx.user;
|
|
"pm" = "dynamic";
|
|
"pm.max_children" = 32;
|
|
"pm.max_requests" = 500;
|
|
"pm.start_servers" = 2;
|
|
"pm.min_spare_servers" = 2;
|
|
"pm.max_spare_servers" = 5;
|
|
"php_admin_value[error_log]" = "stderr";
|
|
"php_admin_flag[log_errors]" = true;
|
|
"catch_workers_output" = true;
|
|
}; in {
|
|
pools.pancake = {
|
|
user = "pancake";
|
|
group = "pancake";
|
|
settings = settingsBase;
|
|
phpEnv."PATH" = lib.makeBinPath [ pkgs.php ];
|
|
};
|
|
};
|
|
services.mysql = {
|
|
enable = true;
|
|
package = pkgs.mariadb;
|
|
ensureDatabases = ["pancake"];
|
|
ensureUsers = [{
|
|
name = "pancake";
|
|
ensurePermissions = {
|
|
"pancake.*" = "ALL PRIVILEGES";
|
|
};
|
|
}];
|
|
};
|
|
|
|
security.acme = {
|
|
acceptTerms = true;
|
|
email = "letsencrypt@lukegb.com";
|
|
certs."invoices.lukegb.com" = {
|
|
domain = "invoices.lukegb.com";
|
|
dnsProvider = "cloudflare";
|
|
credentialsFile = secrets.cloudflareCredentials;
|
|
postRun = ''
|
|
systemctl reload nginx
|
|
'';
|
|
};
|
|
};
|
|
|
|
services.prometheus = {
|
|
enable = true;
|
|
stateDir = "export/monitoring/prometheus";
|
|
alertmanagers = [{
|
|
scheme = "http";
|
|
static_configs = [{
|
|
targets = ["localhost:${toString config.services.prometheus.alertmanager.port}"];
|
|
}];
|
|
}];
|
|
globalConfig.scrape_interval = "15s";
|
|
scrapeConfigs = (builtins.attrValues depot.ops.nixos.systemExporters) ++ [{
|
|
job_name = "blade-oa/snmp";
|
|
metrics_path = "/snmp";
|
|
params = {
|
|
module = ["hpe"];
|
|
};
|
|
static_configs = [{
|
|
targets = ["10.100.1.200"];
|
|
}];
|
|
relabel_configs = [{
|
|
source_labels = ["__address__"];
|
|
target_label = "__param_target";
|
|
} {
|
|
source_labels = ["__param_target"];
|
|
target_label = "instance";
|
|
} {
|
|
target_label = "__address__";
|
|
replacement = "totoro:${toString config.services.prometheus.exporters.snmp.port}";
|
|
}];
|
|
} {
|
|
job_name = "nixos/prometheus";
|
|
metrics_path = "/prometheus/federate";
|
|
honor_labels = true;
|
|
params = {
|
|
"match[]" = [
|
|
''hydra_job_failed{current="1"}''
|
|
''hydra_job_completion_time{current="1"}''
|
|
];
|
|
};
|
|
scheme = "https";
|
|
static_configs = [{
|
|
targets = ["monitoring.nixos.org:443"];
|
|
}];
|
|
}];
|
|
|
|
pushgateway.enable = true;
|
|
|
|
rules = [
|
|
''
|
|
groups:
|
|
- name: alerting
|
|
rules:
|
|
|
|
# Blade power
|
|
- alert: AveragePowerUsageTooHigh
|
|
expr: (sum(avg_over_time(cpqRackPowerSupplyCurPwrOutput{job="blade-oa/snmp"}[10m])) / 230) > 6.5
|
|
labels:
|
|
severity: page
|
|
annotations:
|
|
summary: "Blade: Power Usage Too High (rolling)"
|
|
description: "Power usage of blade system has been too high for last 10 minutes ({{ $value }}). https://grafana.int.lukegb.com/d/g-u3XQ8Gk/blade-power"
|
|
- alert: PowerUsageTooHigh
|
|
expr: (sum(cpqRackPowerSupplyCurPwrOutput{job="blade-oa/snmp"}) / 230) > 6.5
|
|
for: 10m
|
|
labels:
|
|
severity: page
|
|
annotations:
|
|
summary: "Blade: Power Usage Too High"
|
|
description: "Power usage of blade system has been too high for last 10 minutes ({{ $value }}). https://grafana.int.lukegb.com/d/g-u3XQ8Gk/blade-power"
|
|
- alert: BladePowerUsageOutOfBounds
|
|
expr: node_hwmon_power_average_watt{system=~"blade-.*"} > on () group_left() (1.5 * quantile(0.5, node_hwmon_power_average_watt{system=~"blade-.*"}))
|
|
for: 60m
|
|
labels:
|
|
severity: page
|
|
annotations:
|
|
summary: "Blade: Single Blade Power Usage Out of Bounds"
|
|
description: "{{ $labels.system }} has power usage of {{ $value }}, which is out of expected bounds."
|
|
|
|
# Systems
|
|
- alert: NodeExporterDown
|
|
expr: up{exporter="node", system=~"(blade-(tuvok|paris|janeway|torres)|kusakabe|marukuru|swann|totoro|clouvider-.*|etheroute-.*)"} < 1
|
|
for: 30m
|
|
labels:
|
|
severity: page
|
|
annotations:
|
|
summary: "Node exporter no longer scrapable"
|
|
description: "{{ $labels.system }} is not reachable from totoro."
|
|
|
|
# Alert if the NixOS channels are broken
|
|
- alert: NixOSChannelBad
|
|
expr: hydra_job_failed{} == 1
|
|
for: 30m
|
|
labels:
|
|
severity: email
|
|
annotations:
|
|
summary: "NixOS Channel {{ $labels.channel }} failing"
|
|
description: "The channel {{ $labels.channel }} is failing - see https://hydra.nixos.org/job/{{ $labels.project }}/{{ $labels.jobset }}/tested"
|
|
''
|
|
];
|
|
|
|
alertmanager = {
|
|
enable = true;
|
|
configuration = {
|
|
global = {};
|
|
route = {
|
|
receiver = "default-receiver";
|
|
};
|
|
receivers = [{
|
|
name = "default-receiver";
|
|
webhook_configs = [{
|
|
url = "http://localhost:9997";
|
|
}];
|
|
pushover_configs = [{
|
|
user_key = secrets.pushover.userKey;
|
|
token = secrets.pushover.tokens.alertmanager;
|
|
}];
|
|
}];
|
|
};
|
|
};
|
|
|
|
exporters.snmp = {
|
|
enable = true;
|
|
configurationPath = depot.nix.pkgs.prometheus-snmp-config;
|
|
};
|
|
};
|
|
services.grafana = {
|
|
enable = true;
|
|
addr = "0.0.0.0";
|
|
port = 3000;
|
|
domain = "grafana.int.lukegb.com";
|
|
rootUrl = "https://grafana.int.lukegb.com/";
|
|
|
|
extraOptions = let
|
|
convertName = name: lib.toUpper (builtins.replaceStrings ["." "-"] ["_" "_"] name);
|
|
convertOptionSection = sectionName: lib.mapAttrsToList (name: value: { name = "${convertName sectionName}_${convertName name}"; inherit value; });
|
|
convertOptions = opts: builtins.listToAttrs (builtins.concatLists (lib.mapAttrsToList convertOptionSection opts));
|
|
in convertOptions {
|
|
"auth.proxy" = {
|
|
enabled = "true";
|
|
header_name = "X-Pomerium-Claim-Email";
|
|
header_property = "email";
|
|
headers = "username:X-Pomerium-Claim-User";
|
|
auto_sign_up = "true";
|
|
};
|
|
security.cookie_secure = "true";
|
|
};
|
|
};
|
|
systemd.services.grafana.preStart = let
|
|
cfg = config.services.grafana;
|
|
plugins = with depot.pkgs.grafana-plugins; [
|
|
grafana-piechart-panel
|
|
grafana-clock-panel
|
|
grafana-worldmap-panel
|
|
grafana-polystat-panel
|
|
];
|
|
pluginLines = lib.concatMapStringsSep "\n" (pkg: ''
|
|
ln -sf ${pkg} ${cfg.dataDir}/plugins/${pkg.pname}
|
|
'') plugins;
|
|
in lib.mkAfter ''
|
|
rm -rf ${cfg.dataDir}/plugins
|
|
mkdir ${cfg.dataDir}/plugins
|
|
${pluginLines}
|
|
'';
|
|
|
|
services.ipfs = {
|
|
enable = true;
|
|
dataDir = "/store/ipfs";
|
|
extraConfig = {
|
|
Experimental.FilestoreEnabled = true;
|
|
};
|
|
};
|
|
|
|
systemd.services.alertmanager-discord = {
|
|
enable = true;
|
|
serviceConfig = {
|
|
ExecStart = "${depot.pkgs.alertmanager-discord}/bin/alertmanager-discord -listen.address 127.0.0.1:9997";
|
|
EnvironmentFile = pkgs.writeText "discord-secret" ''
|
|
DISCORD_WEBHOOK=${secrets.monitoring.alertmanager.discord.api_url}
|
|
'';
|
|
DynamicUser = true;
|
|
MountAPIVFS = true;
|
|
PrivateTmp = true;
|
|
PrivateUsers = true;
|
|
ProtectControlGroups = true;
|
|
ProtectKernelModules = true;
|
|
ProtectKernelTunables = true;
|
|
};
|
|
};
|
|
|
|
system.stateVersion = "20.03";
|
|
}
|