depot/ops/nixos/totoro/default.nix

605 lines
19 KiB
Nix

# SPDX-FileCopyrightText: 2020 Luke Granger-Brown <depot@lukegb.com>
#
# SPDX-License-Identifier: Apache-2.0
{ depot, lib, pkgs, config, ... }:
let
inherit (depot.ops) secrets;
in {
imports = [
../../../third_party/nixpkgs/nixos/modules/installer/scan/not-detected.nix
../lib/client.nix
../lib/whitby-distributed.nix
../lib/nixbuild-distributed.nix
../lib/twitternuke.nix
../lib/quotes.bfob.gg.nix
../lib/baserow.nix
../lib/deluge.nix
../lib/plex.nix
../lib/tumblrandom.nix
../lib/freeswitch.nix
./home-assistant.nix
./authentik.nix
./adsb.nix
];
boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "nvme" "usb_storage" "usbhid" "sd_mod" ];
boot.kernelModules = lib.mkAfter [ "kvm-intel" ];
boot.kernelParams = [ "mitigations=off" ];
fileSystems = let
zfs = device: {
device = device;
fsType = "zfs";
};
in {
"/" = zfs "zboot/safe/root";
"/nix" = zfs "zboot/local/nix";
"/home" = zfs "tank/safe/home";
"/export" = zfs "tank/safe/export";
"/srv" = zfs "tank/safe/srv";
"/srv/pancake" = zfs "tank/safe/srv/pancake";
"/persist" = zfs "tank/safe/persist";
"/persist/var/lib/containers" = zfs "tank/safe/persist/containers";
"/store" = zfs "tank/local/store";
"/store/run/containers" = zfs "tank/local/store/containers";
"/boot" = {
device = "/dev/disk/by-uuid/D178-4E19";
fsType = "vfat";
};
};
# Use the systemd-boot EFI boot loader.
boot.loader.systemd-boot.enable = true;
boot.loader.efi.canTouchEfiVariables = true;
services.postgresql.package = pkgs.postgresql_13;
services.postgresql.settings.shared_buffers = "16GB";
services.postgresql.settings.work_mem = "1GB";
services.postgresql.settings.maintenance_work_mem = "1GB";
nix.settings.max-jobs = lib.mkDefault 8;
powerManagement.cpuFreqGovernor = lib.mkDefault "performance";
virtualisation = {
podman.enable = true;
containers.storage.settings.storage = {
driver = "zfs";
runroot = "/store/run/containers/storage";
graphroot = "/persist/var/lib/containers/storage";
};
};
systemd.services.podman.path = lib.mkAfter [
pkgs.zfs
];
services.openssh.settings.X11Forwarding = true;
# Extra packages.
environment.systemPackages = with pkgs; [
(depot.nix.pkgs.secretsync.configure {
workingDir = "/home/lukegb/depot";
gitlabAccessToken = secrets.deployer.gitlabAccessToken;
manifestVariable = "SECRETS_MANIFEST";
variablesToFile = {
"OPS_SECRETS_DEFAULT_NIX" = "ops/secrets/default.nix";
};
})
];
# Networking!
networking = {
hostName = "totoro"; # Define your hostname.
domain = "int.as205479.net";
hostId = "676c08c4";
useNetworkd = true;
bridges.br-ext.interfaces = [ "enp0s31f6" ];
interfaces.br-ext = {
ipv4.addresses = [
{ address = "192.168.1.40"; prefixLength = 24; }
];
ipv6.addresses = [
{ address = "2a09:a443::1000"; prefixLength = 128; }
];
};
interfaces.br-int = {
ipv4.addresses = [{ address = "10.0.0.2"; prefixLength = 24; }];
};
bridges.br-int.interfaces = [];
firewall.allowedTCPPorts = [
80 443 # web
4001 # ipfs
139 445 # SMB
5357 # samba-wsdd
];
firewall.allowedUDPPorts = [
4001 # ipfs
137 138 # SMB
3702 # samba-wsdd
];
firewall.checkReversePath = false; # breaks Lifx
firewall.extraCommands = ''
# Allow all inbound UDP from localnet for Lifx purposes...
iptables -A nixos-fw -p udp --src 192.168.1.0/24 --dst 192.168.1.40 -j nixos-fw-accept
'';
macvlans.mv-plex = { interface = "br-ext"; };
interfaces.mv-plex = {
ipv4.addresses = [
# plex-totoro
{ address = "92.118.30.20"; prefixLength = 32; }
];
ipv6.addresses = [
# plex-totoro
{ address = "2a09:a443::1:1000"; prefixLength = 128; }
];
};
interfaces.lo.ipv4.addresses = [
{ address = "92.118.30.19"; prefixLength = 32; }
];
};
systemd.network = {
networks."40-br-int" = {
linkConfig.RequiredForOnline = "no";
};
networks."40-br-ext" = {
gateway = [ "192.168.1.1" ];
};
};
my.ip.tailscale = "100.122.86.11";
my.ip.tailscale6 = "fd7a:115c:a1e0:ab12:4843:cd96:627a:560b";
# Virtualisation
virtualisation.libvirtd = {
enable = true;
allowedBridges = [ "virbr0" "br-ext" ];
};
security.polkit.enable = true;
users.users.lukegb = {
packages = with depot.pkgs; [ irssi ];
extraGroups = lib.mkAfter [ "libvirtd" "acme" "podman" ];
};
users.users.pancake = {
isSystemUser = true;
group = "pancake";
home = "/srv/pancake";
};
users.users.nginx.extraGroups = lib.mkAfter [ "acme" ];
users.groups.pancake = {
members = ["pancake" "nginx"];
};
systemd.tmpfiles.rules = [
"L /var/lib/export - - - - /export"
];
services.nginx = {
enable = true;
package = pkgs.nginxMainline;
additionalModules = with pkgs.nginxModules; [
rtmp
];
appendConfig = ''
rtmp {
server {
listen 1935;
chunk_size 4000;
application app {
live on;
record off;
allow publish all;
allow play all;
push rtmp://ingest.beam.bfob.gg/beam/thecakeisalie;
}
application live2 {
live on;
record off;
allow publish all;
allow play all;
push rtmp://ingest.beam.bfob.gg/beam/thecakeisalie;
}
}
}
'';
virtualHosts = {
"invoices.lukegb.com" = let
fastcgi = {
extraConfig = ''
rewrite ^(.*)$ /index.php break;
fastcgi_split_path_info ^(.+\.php)(/.+)$;
fastcgi_index index.php;
fastcgi_pass unix:${config.services.phpfpm.pools.pancake.socket};
include ${pkgs.nginx}/conf/fastcgi_params;
include ${pkgs.nginx}/conf/fastcgi.conf;
'';
};
in {
root = "/srv/pancake/public_html";
forceSSL = true;
locations."/" = {
tryFiles = "$uri $uri/ @router";
index = "index.html index.php";
extraConfig = ''
error_page 403 = @router;
error_page 404 = @router;
'';
};
locations."~ (.php|\\/[^./]+)$" = fastcgi;
locations."@router" = fastcgi;
};
"plex-totoro.lukegb.com" = {
forceSSL = true;
locations."/" = {
proxyPass = "http://localhost:32400/";
proxyWebsockets = true;
};
};
};
};
services.phpfpm = let settingsBase = {
"listen.owner" = config.services.nginx.user;
"pm" = "dynamic";
"pm.max_children" = 32;
"pm.max_requests" = 500;
"pm.start_servers" = 2;
"pm.min_spare_servers" = 2;
"pm.max_spare_servers" = 5;
"php_admin_value[error_log]" = "stderr";
"php_admin_flag[log_errors]" = true;
"catch_workers_output" = true;
}; in {
pools.pancake = {
user = "pancake";
group = "pancake";
settings = settingsBase;
phpEnv."PATH" = lib.makeBinPath [ pkgs.php ];
};
};
services.mysql = {
enable = true;
package = pkgs.mariadb;
ensureDatabases = ["pancake"];
ensureUsers = [{
name = "pancake";
ensurePermissions = {
"pancake.*" = "ALL PRIVILEGES";
};
}];
};
services.prometheus = {
enable = true;
stateDir = "export/monitoring/prometheus";
webExternalUrl = "https://prometheus.int.lukegb.com";
alertmanagers = [{
scheme = "http";
static_configs = [{
targets = ["localhost:${toString config.services.prometheus.alertmanager.port}"];
}];
}];
globalConfig.scrape_interval = "15s";
scrapeConfigs = (builtins.attrValues depot.ops.nixos.systemExporters) ++ [{
job_name = "blade-oa/snmp";
metrics_path = "/snmp";
params = {
module = ["hpe"];
};
static_configs = [{
targets = ["10.100.1.200"];
}];
relabel_configs = [{
source_labels = ["__address__"];
target_label = "__param_target";
} {
source_labels = ["__param_target"];
target_label = "instance";
} {
target_label = "__address__";
replacement = "totoro:${toString config.services.prometheus.exporters.snmp.port}";
}];
} {
job_name = "minotar/minotarproxy";
scheme = "https";
static_configs = [{
targets = ["minotarproxy.lukegb.xyz:443"];
}];
} {
job_name = "nixos/prometheus";
metrics_path = "/prometheus/federate";
honor_labels = true;
params = {
"match[]" = [
''hydra_job_failed{current="1"}''
''hydra_job_completion_time{current="1"}''
];
};
scheme = "https";
static_configs = [{
targets = ["monitoring.nixos.org:443"];
}];
}];
pushgateway.enable = true;
rules = [
''
groups:
- name: alerting
rules:
# Blade power
- alert: AveragePowerUsageTooHigh
expr: (sum(avg_over_time(cpqRackPowerSupplyCurPwrOutput{job="blade-oa/snmp"}[10m])) / 230) > 6.5
labels:
severity: page
annotations:
summary: "Blade: Power Usage Too High (rolling)"
description: "Power usage of blade system has been too high for last 10 minutes ({{ $value }}). https://grafana.int.lukegb.com/d/g-u3XQ8Gk/blade-power"
- alert: PowerUsageTooHigh
expr: (sum(cpqRackPowerSupplyCurPwrOutput{job="blade-oa/snmp"}) / 230) > 6.5
for: 10m
labels:
severity: page
annotations:
summary: "Blade: Power Usage Too High"
description: "Power usage of blade system has been too high for last 10 minutes ({{ $value }}). https://grafana.int.lukegb.com/d/g-u3XQ8Gk/blade-power"
- alert: BladePowerUsageOutOfBounds
expr: node_hwmon_power_average_watt{system=~"blade-.*"} > on () group_left() (1.5 * quantile(0.5, node_hwmon_power_average_watt{system=~"blade-.*"}))
for: 60m
labels:
severity: page
annotations:
summary: "Blade: Single Blade Power Usage Out of Bounds"
description: "{{ $labels.system }} has power usage of {{ $value }}, which is out of expected bounds."
# Systems
- alert: NodeExporterDown
expr: up{exporter="node", system=~"(blade-(tuvok|paris|janeway|torres)|kusakabe|swann|totoro|clouvider-.*|etheroute-.*|bvm-.*)"} < 1
for: 30m
labels:
severity: page
annotations:
summary: "Node exporter no longer scrapable"
description: "{{ $labels.system }} is not reachable from totoro."
# Alert if the NixOS channels are broken
- alert: NixOSChannelBad
expr: hydra_job_failed{} == 1
for: 30m
labels:
severity: email
annotations:
summary: "NixOS Channel {{ $labels.channel }} failing"
description: "The channel {{ $labels.channel }} is failing - see https://hydra.nixos.org/job/{{ $labels.project }}/{{ $labels.jobset }}/tested"
# Packet loss
- alert: SmokepingAveragePacketLossHigh
expr: sum(clamp((rate(smokeping_requests_total{host=~"(([a-z0-9]+.)+[a-z]+|([0-9]+.){3}[0-9]+)"}[5m]) - rate(smokeping_response_duration_seconds_count[5m])) / rate(smokeping_requests_total[5m]) > 0.01, 1, 1)) by (system) > sum(clamp(smokeping_requests_total{host=~"(([a-z0-9]+.)+[a-z]+|([0-9]+.){3}[0-9]+)"}, 1, 1)) by (system) * 0.4
for: 10m
labels:
severity: page
annotations:
summary: "Average packet loss from {{ $labels.system }} high"
description: "Too many endpoints are failing packet loss checks from {{ $labels.system }} ({{ $value }} targets)."
- alert: SmokepingPacketLossVeryHigh
expr: ((rate(smokeping_requests_total{host=~"(([a-z0-9]+.)+[a-z]+|([0-9]+.){3}[0-9]+)"}[5m]) - rate(smokeping_response_duration_seconds_count[5m])) / rate(smokeping_requests_total[5m])) >= 0.10
for: 10m
labels:
severity: page
annotations:
summary: "Packet loss to {{ $labels.host }} from {{ $labels.system }} high"
description: "The packet loss from {{ $labels.system }} to {{ $labels.host }} (IP: {{ $labels.ip }}) is very high ({{ $value | humanizePercentage }}%)."
# Ping latency
- alert: Smokeping95LatencyHigh
expr: histogram_quantile(0.95, sum(rate(smokeping_response_duration_seconds_bucket{host=~"^(1.1.1.1|8.8.8.8)$"}[5m])) by (le, host, system)) > 0.1
for: 15m
labels:
severity: page
annotations:
summary: "Ping latency from {{ $labels.system }} to {{ $labels.host }} high"
description: "The 95th-percentile ping latency from {{ $labels.system }} to {{ $labels.host }} is {{ $value }}."
# Internet connectivity
- alert: MaldenRoadInternetConnectivityFailure
expr: sum(bird_bfd_session_state{state="Up"} * on(instance,name,neighbor_address,system) group_left(device) bird_bfd_session_device) by (instance,neighbor_address,device,state,system) < 1
for: 15m
labels:
severity: page
annotations:
summary: "Device {{ $labels.device }} on {{ $labels.system }} reports BFD down to neighbour {{ $labels.neighbor_address }}"
description: "Ruh roh, Raggy"
''
];
alertmanager = {
enable = true;
webExternalUrl = "https://alertmanager.int.lukegb.com";
configuration = {
global = {};
route = {
receiver = "default-receiver";
};
receivers = [{
name = "default-receiver";
webhook_configs = [{
url = "http://localhost:9997";
}];
pushover_configs = [{
user_key = secrets.pushover.userKey;
token = secrets.pushover.tokens.alertmanager;
}];
}];
};
};
exporters.snmp = {
enable = true;
configurationPath = depot.nix.pkgs.prometheus-snmp-config;
};
};
services.grafana = {
enable = true;
settings = {
server.root_url = "https://grafana.int.lukegb.com/";
server.http_addr = "0.0.0.0";
server.http_port = 3000;
server.domain = "grafana.int.lukegb.com";
"auth.proxy" = {
enabled = "true";
header_name = "X-Pomerium-Claim-Email";
header_property = "email";
headers = "username:X-Pomerium-Claim-User";
auto_sign_up = "true";
};
security.cookie_secure = true;
};
};
systemd.services.grafana.preStart = let
cfg = config.services.grafana;
plugins = with depot.pkgs.grafana-plugins; [
grafana-piechart-panel
grafana-clock-panel
grafana-worldmap-panel
grafana-polystat-panel
];
pluginLines = lib.concatMapStringsSep "\n" (pkg: ''
ln -sf ${pkg} ${cfg.dataDir}/plugins/${pkg.pname}
'') plugins;
in lib.mkAfter ''
rm -rf ${cfg.dataDir}/plugins
mkdir ${cfg.dataDir}/plugins
${pluginLines}
'';
services.kubo = {
enable = true;
dataDir = "/store/ipfs";
settings = {
Experimental.FilestoreEnabled = true;
};
};
systemd.services.alertmanager-discord = {
enable = true;
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${depot.pkgs.alertmanager-discord}/bin/alertmanager-discord -listen.address 127.0.0.1:9997";
EnvironmentFile = pkgs.writeText "discord-secret" ''
DISCORD_WEBHOOK=${secrets.monitoring.alertmanager.discord.api_url}
'';
DynamicUser = true;
MountAPIVFS = true;
PrivateTmp = true;
PrivateUsers = true;
ProtectControlGroups = true;
ProtectKernelModules = true;
ProtectKernelTunables = true;
};
};
systemd.services.sslrenew-raritan = {
enable = true;
after = [ "network-online.target" ];
serviceConfig = {
Type = "oneshot";
ExecStart = "${depot.ops.raritan.ssl-renew}/lego.sh";
EnvironmentFile = pkgs.writeText "sslrenew-secret" ''
CERTIFICATE_DOMAIN=kvm.lukegb.xyz
CERTIFICATE_ROLE=google-cloudflare
RARITAN_IP=192.168.1.50
'';
DynamicUser = true;
User = "sslrenew-raritan";
StateDirectory = "sslrenew-raritan";
StateDirectoryMode = "0700";
WorkingDirectory = "/var/lib/sslrenew-raritan";
};
};
systemd.timers.sslrenew-raritan = {
enable = true;
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "daily";
};
};
systemd.services.streetworks = {
enable = true;
after = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${depot.go.streetworks}/bin/streetworks -postcode='NW5 4HS' -pushover_token='${secrets.pushover.tokens.depot}' -pushover_user='${secrets.pushover.userKey}'";
DynamicUser = true;
MountAPIVFS = true;
PrivateTmp = true;
PrivateUsers = true;
ProtectControlGroups = true;
ProtectKernelModules = true;
ProtectKernelTunables = true;
};
};
my.prometheus.additionalExporterPorts.trains = 2112;
services.samba-wsdd = {
enable = true;
workgroup = "WORKGROUP";
hostname = "TOTORO";
interface = "br-ext";
};
services.samba = {
enable = true;
enableNmbd = false; # Eh, SMB1.0
extraConfig = ''
server min protocol = SMB3_11
client min protocol = SMB3_11
restrict anonymous = 1
'';
shares.content = {
comment = "Content";
browseable = "yes";
"read only" = "yes";
"guest ok" = "yes";
};
shares.homes = {
comment = "Home Directories";
browseable = "no";
"read only" = "no";
"create mask" = "0755";
"directory mask" = "0755";
"valid users" = "%S";
};
};
services.nfs.server = {
enable = true;
exports = ''
/export 192.168.1.0/24(rw,fsid=0,no_subtree_check,sync)
'';
};
my.vault.acmeCertificates = {
"plex-totoro.lukegb.com" = { hostnames = [ "plex-totoro.lukegb.com" ]; nginxVirtualHosts = [ "plex-totoro.lukegb.com" ]; };
"invoices.lukegb.com" = { hostnames = [ "invoices.lukegb.com" ]; nginxVirtualHosts = [ "invoices.lukegb.com" ]; };
};
boot.binfmt.emulatedSystems = [ "aarch64-linux" ];
services.openvscode-server = {
enable = true;
user = "lukegb";
withoutConnectionToken = true;
host = config.my.ip.tailscale6;
port = 3002;
};
system.stateVersion = "22.11";
}