depot/nixos/modules/virtualisation/xen-dom0.nix

876 lines
30 KiB
Nix
Raw Normal View History

# Xen Project Hypervisor (Dom0) support.
{
config,
lib,
pkgs,
...
}:
let
inherit (builtins) readFile;
inherit (lib.modules) mkRemovedOptionModule mkRenamedOptionModule mkIf;
inherit (lib.options)
mkOption
mkEnableOption
literalExpression
mkPackageOption
;
inherit (lib.types)
listOf
str
ints
lines
enum
path
submodule
addCheck
float
bool
int
nullOr
;
inherit (lib.lists) optional optionals;
inherit (lib.strings) hasSuffix optionalString;
inherit (lib.meta) getExe;
inherit (lib.attrsets) optionalAttrs;
inherit (lib.trivial) boolToString;
inherit (lib.teams.xen) members;
cfg = config.virtualisation.xen;
xenBootBuilder = pkgs.writeShellApplication {
name = "xenBootBuilder";
runtimeInputs =
(with pkgs; [
binutils
coreutils
findutils
gawk
gnugrep
gnused
jq
])
++ optionals (cfg.efi.bootBuilderVerbosity == "info") (
with pkgs;
[
bat
diffutils
]
);
runtimeEnv = {
efiMountPoint = config.boot.loader.efi.efiSysMountPoint;
};
# We disable SC2016 because we don't want to expand the regexes in the sed commands.
excludeShellChecks = [ "SC2016" ];
text = readFile ./xen-boot-builder.sh;
};
in
{
imports = [
(mkRemovedOptionModule
[
"virtualisation"
"xen"
"bridge"
"name"
]
"The Xen Network Bridge options are currently unavailable. Please set up your own bridge manually."
)
(mkRemovedOptionModule
[
"virtualisation"
"xen"
"bridge"
"address"
]
"The Xen Network Bridge options are currently unavailable. Please set up your own bridge manually."
)
(mkRemovedOptionModule
[
"virtualisation"
"xen"
"bridge"
"prefixLength"
]
"The Xen Network Bridge options are currently unavailable. Please set up your own bridge manually."
)
(mkRemovedOptionModule
[
"virtualisation"
"xen"
"bridge"
"forwardDns"
]
"The Xen Network Bridge options are currently unavailable. Please set up your own bridge manually."
)
(mkRenamedOptionModule
[
"virtualisation"
"xen"
"qemu-package"
]
[
"virtualisation"
"xen"
"qemu"
"package"
]
)
(mkRenamedOptionModule
[
"virtualisation"
"xen"
"package-qemu"
]
[
"virtualisation"
"xen"
"qemu"
"package"
]
)
(mkRenamedOptionModule
[
"virtualisation"
"xen"
"stored"
]
[
"virtualisation"
"xen"
"store"
"path"
]
)
];
## Interface ##
options.virtualisation.xen = {
enable = mkEnableOption "the Xen Project Hypervisor, a virtualisation technology defined as a *type-1 hypervisor*, which allows multiple virtual machines, known as *domains*, to run concurrently on the physical machine. NixOS runs as the privileged *Domain 0*. This option requires a reboot into a Xen kernel to take effect";
debug = mkEnableOption "Xen debug features for Domain 0. This option enables some hidden debugging tests and features, and should not be used in production";
trace = mkOption {
type = bool;
default = cfg.debug;
defaultText = literalExpression "false";
example = true;
description = "Whether to enable Xen debug tracing and logging for Domain 0.";
};
package = mkPackageOption pkgs "Xen Hypervisor" { default = [ "xen" ]; };
qemu = {
package = mkPackageOption pkgs "QEMU (with Xen Hypervisor support)" {
default = [ "qemu_xen" ];
};
pidFile = mkOption {
type = path;
default = "/run/xen/qemu-dom0.pid";
example = "/var/run/xen/qemu-dom0.pid";
description = "Path to the QEMU PID file.";
};
};
bootParams = mkOption {
default = [ ];
example = ''
[
"iommu=force:true,qinval:true,debug:true"
"noreboot=true"
"vga=ask"
]
'';
type = listOf str;
description = ''
Xen Command Line parameters passed to Domain 0 at boot time.
Note: these are different from `boot.kernelParams`. See
the [Xen documentation](https://xenbits.xenproject.org/docs/unstable/misc/xen-command-line.html) for more information.
'';
};
efi = {
bootBuilderVerbosity = mkOption {
type = enum [
"default"
"info"
"debug"
"quiet"
];
default = "default";
example = "info";
description = ''
The EFI boot entry builder script should be called with exactly one of the following arguments in order to specify its verbosity:
- `quiet` supresses all messages.
- `default` adds a simple "Installing Xen Project Hypervisor boot entries...done." message to the script.
- `info` is the same as `default`, but it also prints a diff with information on which generations were altered.
- This option adds two extra dependencies to the script: `diffutils` and `bat`.
- `debug` prints information messages for every single step of the script.
This option does not alter the actual functionality of the script, just the number of messages printed when rebuilding the system.
'';
};
path = mkOption {
type = path;
default = "${cfg.package.boot}/${cfg.package.efi}";
defaultText = literalExpression "\${config.virtualisation.xen.package.boot}/\${config.virtualisation.xen.package.efi}";
example = literalExpression "\${config.virtualisation.xen.package}/boot/efi/efi/nixos/xen-\${config.virtualisation.xen.package.version}.efi";
description = ''
Path to xen.efi. `pkgs.xen` is patched to install the xen.efi file
on `$boot/boot/xen.efi`, but an unpatched Xen build may install it
somewhere else, such as `$out/boot/efi/efi/nixos/xen.efi`. Unless
you're building your own Xen derivation, you should leave this
option as the default value.
'';
};
};
dom0Resources = {
maxVCPUs = mkOption {
default = 0;
example = 4;
type = ints.unsigned;
description = ''
Amount of virtual CPU cores allocated to Domain 0 on boot.
If set to 0, all cores are assigned to Domain 0, and
unprivileged domains will compete with Domain 0 for CPU time.
'';
};
memory = mkOption {
default = 0;
example = 512;
type = ints.unsigned;
description = ''
Amount of memory (in MiB) allocated to Domain 0 on boot.
If set to 0, all memory is assigned to Domain 0, and
unprivileged domains will compete with Domain 0 for free RAM.
'';
};
maxMemory = mkOption {
default = cfg.dom0Resources.memory;
defaultText = literalExpression "config.virtualisation.xen.dom0Resources.memory";
example = 1024;
type = ints.unsigned;
description = ''
Maximum amount of memory (in MiB) that Domain 0 can
dynamically allocate to itself. Does nothing if set
to the same amount as virtualisation.xen.memory, or
if that option is set to 0.
'';
};
};
domains = {
extraConfig = mkOption {
type = lines;
default = "";
example = ''
XENDOMAINS_SAVE=/persist/xen/save
XENDOMAINS_RESTORE=false
XENDOMAINS_CREATE_USLEEP=10000000
'';
description = ''
Options defined here will override the defaults for xendomains.
The default options can be seen in the file included from
/etc/default/xendomains.
'';
};
};
store = {
path = mkOption {
type = path;
default = "${cfg.package}/bin/oxenstored";
defaultText = literalExpression "\${config.virtualisation.xen.package}/bin/oxenstored";
example = literalExpression "\${config.virtualisation.xen.package}/bin/xenstored";
description = ''
Path to the Xen Store Daemon. This option is useful to
switch between the legacy C-based Xen Store Daemon, and
the newer OCaml-based Xen Store Daemon, `oxenstored`.
'';
};
type = mkOption {
type = enum [
"c"
"ocaml"
];
default = if (hasSuffix "oxenstored" cfg.store.path) then "ocaml" else "c";
internal = true;
readOnly = true;
description = "Helper internal option that determines the type of the Xen Store Daemon based on cfg.store.path.";
};
settings = mkOption {
default = { };
example = {
enableMerge = false;
quota.maxWatchEvents = 2048;
quota.enable = true;
conflict.maxHistorySeconds = 0.12;
conflict.burstLimit = 15.0;
xenstored.log.file = "/dev/null";
xenstored.log.level = "info";
};
description = ''
The OCaml-based Xen Store Daemon configuration. This
option does nothing with the C-based `xenstored`.
'';
type = submodule {
options = {
pidFile = mkOption {
default = "/run/xen/xenstored.pid";
example = "/var/run/xen/xenstored.pid";
type = path;
description = "Path to the Xen Store Daemon PID file.";
};
testEAGAIN = mkOption {
default = cfg.debug;
defaultText = literalExpression "config.virtualisation.xen.debug";
example = true;
type = bool;
visible = false;
description = "Randomly fail a transaction with EAGAIN. This option is used for debugging purposes only.";
};
enableMerge = mkOption {
default = true;
example = false;
type = bool;
description = "Whether to enable transaction merge support.";
};
conflict = {
burstLimit = mkOption {
default = 5.0;
example = 15.0;
type = addCheck (
float
// {
name = "nonnegativeFloat";
description = "nonnegative floating point number, meaning >=0";
descriptionClass = "nonRestrictiveClause";
}
) (n: n >= 0);
description = ''
Limits applied to domains whose writes cause other domains' transaction
commits to fail. Must include decimal point.
The burst limit is the number of conflicts a domain can cause to
fail in a short period; this value is used for both the initial and
the maximum value of each domain's conflict-credit, which falls by
one point for each conflict caused, and when it reaches zero the
domain's requests are ignored.
'';
};
maxHistorySeconds = mkOption {
default = 5.0e-2;
example = 1.0;
type = addCheck (float // { description = "nonnegative floating point number, meaning >=0"; }) (
n: n >= 0
);
description = ''
Limits applied to domains whose writes cause other domains' transaction
commits to fail. Must include decimal point.
The conflict-credit is replenished over time:
one point is issued after each conflict.maxHistorySeconds, so this
is the minimum pause-time during which a domain will be ignored.
'';
};
rateLimitIsAggregate = mkOption {
default = true;
example = false;
type = bool;
description = ''
If the conflict.rateLimitIsAggregate option is `true`, then after each
tick one point of conflict-credit is given to just one domain: the
one at the front of the queue. If `false`, then after each tick each
domain gets a point of conflict-credit.
In environments where it is known that every transaction will
involve a set of nodes that is writable by at most one other domain,
then it is safe to set this aggregate limit flag to `false` for better
performance. (This can be determined by considering the layout of
the xenstore tree and permissions, together with the content of the
transactions that require protection.)
A transaction which involves a set of nodes which can be modified by
multiple other domains can suffer conflicts caused by any of those
domains, so the flag must be set to `true`.
'';
};
};
perms = {
enable = mkOption {
default = true;
example = false;
type = bool;
description = "Whether to enable the node permission system.";
};
enableWatch = mkOption {
default = true;
example = false;
type = bool;
description = ''
Whether to enable the watch permission system.
When this is set to `true`, unprivileged guests can only get watch events
for xenstore entries that they would've been able to read.
When this is set to `false`, unprivileged guests may get watch events
for xenstore entries that they cannot read. The watch event contains
only the entry name, not the value.
This restores behaviour prior to [XSA-115](https://xenbits.xenproject.org/xsa/advisory-115.html).
'';
};
};
quota = {
enable = mkOption {
default = true;
example = false;
type = bool;
description = "Whether to enable the quota system.";
};
maxEntity = mkOption {
default = 1000;
example = 1024;
type = ints.positive;
description = "Entity limit for transactions.";
};
maxSize = mkOption {
default = 2048;
example = 4096;
type = ints.positive;
description = "Size limit for transactions.";
};
maxWatch = mkOption {
default = 100;
example = 256;
type = ints.positive;
description = "Maximum number of watches by the Xenstore Watchdog.";
};
transaction = mkOption {
default = 10;
example = 50;
type = ints.positive;
description = "Maximum number of transactions.";
};
maxRequests = mkOption {
default = 1024;
example = 1024;
type = ints.positive;
description = "Maximum number of requests per transaction.";
};
maxPath = mkOption {
default = 1024;
example = 1024;
type = ints.positive;
description = "Path limit for the quota system.";
};
maxOutstanding = mkOption {
default = 1024;
example = 1024;
type = ints.positive;
description = "Maximum outstanding requests, i.e. in-flight requests / domain.";
};
maxWatchEvents = mkOption {
default = 1024;
example = 2048;
type = ints.positive;
description = "Maximum number of outstanding watch events per watch.";
};
};
persistent = mkOption {
default = false;
example = true;
type = bool;
description = "Whether to activate the filed base backend.";
};
xenstored = {
log = {
file = mkOption {
default = "/var/log/xen/xenstored.log";
example = "/dev/null";
type = path;
description = "Path to the Xen Store log file.";
};
level = mkOption {
default = if cfg.trace then "debug" else null;
defaultText = literalExpression "if (config.virtualisation.xen.trace == true) then \"debug\" else null";
example = "error";
type = nullOr (enum [
"debug"
"info"
"warn"
"error"
]);
description = "Logging level for the Xen Store.";
};
# The hidden options below have no upstream documentation whatsoever.
# The nb* options appear to alter the log rotation behaviour, and
# the specialOps option appears to affect the Xenbus logging logic.
nbFiles = mkOption {
default = 10;
example = 16;
type = int;
visible = false;
description = "Set `xenstored-log-nb-files`.";
};
};
accessLog = {
file = mkOption {
default = "/var/log/xen/xenstored-access.log";
example = "/var/log/security/xenstored-access.log";
type = path;
description = "Path to the Xen Store access log file.";
};
nbLines = mkOption {
default = 13215;
example = 16384;
type = int;
visible = false;
description = "Set `access-log-nb-lines`.";
};
nbChars = mkOption {
default = 180;
example = 256;
type = int;
visible = false;
description = "Set `acesss-log-nb-chars`.";
};
specialOps = mkOption {
default = false;
example = true;
type = bool;
visible = false;
description = "Set `access-log-special-ops`.";
};
};
xenfs = {
kva = mkOption {
default = "/proc/xen/xsd_kva";
example = cfg.store.settings.xenstored.xenfs.kva;
type = path;
visible = false;
description = ''
Path to the Xen Store Daemon KVA location inside the XenFS pseudo-filesystem.
While it is possible to alter this value, some drivers may be hardcoded to follow the default paths.
'';
};
port = mkOption {
default = "/proc/xen/xsd_port";
example = cfg.store.settings.xenstored.xenfs.port;
type = path;
visible = false;
description = ''
Path to the Xen Store Daemon userspace port inside the XenFS pseudo-filesystem.
While it is possible to alter this value, some drivers may be hardcoded to follow the default paths.
'';
};
};
};
ringScanInterval = mkOption {
default = 20;
example = 30;
type = addCheck (
int
// {
name = "nonzeroInt";
description = "nonzero signed integer, meaning !=0";
descriptionClass = "nonRestrictiveClause";
}
) (n: n != 0);
description = ''
Perodic scanning for all the rings as a safenet for lazy clients.
Define the interval in seconds; set to a negative integer to disable.
'';
};
};
};
};
};
};
## Implementation ##
config = mkIf cfg.enable {
assertions = [
{
assertion = pkgs.stdenv.hostPlatform.isx86_64;
message = "Xen is currently not supported on ${pkgs.stdenv.hostPlatform.system}.";
}
{
assertion =
config.boot.loader.systemd-boot.enable
|| (config.boot ? lanzaboote) && config.boot.lanzaboote.enable;
message = "Xen only supports booting on systemd-boot or Lanzaboote.";
}
{
assertion = config.boot.initrd.systemd.enable;
message = "Xen does not support the legacy script-based Stage 1 initrd.";
}
{
assertion = cfg.dom0Resources.maxMemory >= cfg.dom0Resources.memory;
message = ''
You have allocated more memory to dom0 than virtualisation.xen.dom0Resources.maxMemory
allows for. Please increase the maximum memory limit, or decrease the default memory allocation.
'';
}
{
assertion = cfg.debug -> cfg.trace;
message = "Xen's debugging features are enabled, but logging is disabled. This is most likely not what you want.";
}
{
assertion = cfg.store.settings.quota.maxWatchEvents >= cfg.store.settings.quota.maxOutstanding;
message = ''
Upstream Xen recommends that maxWatchEvents be equal to or greater than maxOutstanding,
in order to mitigate denial of service attacks from malicious frontends.
'';
}
];
virtualisation.xen.bootParams =
optionals cfg.trace [
"loglvl=all"
"guest_loglvl=all"
]
++
optional (cfg.dom0Resources.memory != 0)
"dom0_mem=${toString cfg.dom0Resources.memory}M${
optionalString (
cfg.dom0Resources.memory != cfg.dom0Resources.maxMemory
) ",max:${toString cfg.dom0Resources.maxMemory}M"
}"
++ optional (
cfg.dom0Resources.maxVCPUs != 0
) "dom0_max_vcpus=${toString cfg.dom0Resources.maxVCPUs}";
boot = {
kernelModules = [
"xen-evtchn"
"xen-gntdev"
"xen-gntalloc"
"xen-blkback"
"xen-netback"
"xen-pciback"
"evtchn"
"gntdev"
"netbk"
"blkbk"
"xen-scsibk"
"usbbk"
"pciback"
"xen-acpi-processor"
"blktap2"
"tun"
"netxen_nic"
"xen_wdt"
"xen-acpi-processor"
"xen-privcmd"
"xen-scsiback"
"xenfs"
];
# The xenfs module is needed to mount /proc/xen.
initrd.kernelModules = [ "xenfs" ];
# Increase the number of loopback devices from the default (8),
# which is way too small because every VM virtual disk requires a
# loopback device.
extraModprobeConfig = ''
options loop max_loop=64
'';
# Xen Bootspec extension. This extension allows NixOS bootloaders to
# fetch the `xen.efi` path and access the `cfg.bootParams` option.
bootspec.extensions = {
"org.xenproject.bootspec.v1" = {
xen = cfg.efi.path;
xenParams = cfg.bootParams;
};
};
# See the `xenBootBuilder` script in the main `let...in` statement of this file.
loader.systemd-boot.extraInstallCommands = ''
${getExe xenBootBuilder} ${cfg.efi.bootBuilderVerbosity}
'';
};
# Domain 0 requires a pvops-enabled kernel.
# All NixOS kernels come with this enabled by default; this is merely a sanity check.
system.requiredKernelConfig = with config.lib.kernelConfig; [
(isYes "XEN")
(isYes "X86_IO_APIC")
(isYes "ACPI")
(isYes "XEN_DOM0")
(isYes "PCI_XEN")
(isYes "XEN_DEV_EVTCHN")
(isYes "XENFS")
(isYes "XEN_COMPAT_XENFS")
(isYes "XEN_SYS_HYPERVISOR")
(isYes "XEN_GNTDEV")
(isYes "XEN_BACKEND")
(isModule "XEN_NETDEV_BACKEND")
(isModule "XEN_BLKDEV_BACKEND")
(isModule "XEN_PCIDEV_BACKEND")
(isYes "XEN_BALLOON")
(isYes "XEN_SCRUB_PAGES")
];
environment = {
systemPackages = [
cfg.package
cfg.qemu.package
];
etc =
# Set up Xen Domain 0 configuration files.
{
"xen/xl.conf".source = "${cfg.package}/etc/xen/xl.conf"; # TODO: Add options to configure xl.conf declaratively. It's worth considering making a new "xl value" type, as it could be reused to produce xl.cfg (domain definition) files.
"xen/scripts-xen" = {
source = "${cfg.package}/etc/xen/scripts/*";
target = "xen/scripts";
};
"default/xencommons".text = ''
source ${cfg.package}/etc/default/xencommons
XENSTORED="${cfg.store.path}"
QEMU_XEN="${cfg.qemu.package}/${cfg.qemu.package.qemu-system-i386}"
${optionalString cfg.trace ''
XENSTORED_TRACE=yes
XENCONSOLED_TRACE=all
''}
'';
"default/xendomains".text = ''
source ${cfg.package}/etc/default/xendomains
${cfg.domains.extraConfig}
'';
}
# The OCaml-based Xen Store Daemon requires /etc/xen/oxenstored.conf to start.
// optionalAttrs (cfg.store.type == "ocaml") {
"xen/oxenstored.conf".text = ''
pid-file = ${cfg.store.settings.pidFile}
test-eagain = ${boolToString cfg.store.settings.testEAGAIN}
merge-activate = ${toString cfg.store.settings.enableMerge}
conflict-burst-limit = ${toString cfg.store.settings.conflict.burstLimit}
conflict-max-history-seconds = ${toString cfg.store.settings.conflict.maxHistorySeconds}
conflict-rate-limit-is-aggregate = ${toString cfg.store.settings.conflict.rateLimitIsAggregate}
perms-activate = ${toString cfg.store.settings.perms.enable}
perms-watch-activate = ${toString cfg.store.settings.perms.enableWatch}
quota-activate = ${toString cfg.store.settings.quota.enable}
quota-maxentity = ${toString cfg.store.settings.quota.maxEntity}
quota-maxsize = ${toString cfg.store.settings.quota.maxSize}
quota-maxwatch = ${toString cfg.store.settings.quota.maxWatch}
quota-transaction = ${toString cfg.store.settings.quota.transaction}
quota-maxrequests = ${toString cfg.store.settings.quota.maxRequests}
quota-path-max = ${toString cfg.store.settings.quota.maxPath}
quota-maxoutstanding = ${toString cfg.store.settings.quota.maxOutstanding}
quota-maxwatchevents = ${toString cfg.store.settings.quota.maxWatchEvents}
persistent = ${boolToString cfg.store.settings.persistent}
xenstored-log-file = ${cfg.store.settings.xenstored.log.file}
xenstored-log-level = ${
if isNull cfg.store.settings.xenstored.log.level then
"null"
else
cfg.store.settings.xenstored.log.level
}
xenstored-log-nb-files = ${toString cfg.store.settings.xenstored.log.nbFiles}
access-log-file = ${cfg.store.settings.xenstored.accessLog.file}
access-log-nb-lines = ${toString cfg.store.settings.xenstored.accessLog.nbLines}
acesss-log-nb-chars = ${toString cfg.store.settings.xenstored.accessLog.nbChars}
access-log-special-ops = ${boolToString cfg.store.settings.xenstored.accessLog.specialOps}
ring-scan-interval = ${toString cfg.store.settings.ringScanInterval}
xenstored-kva = ${cfg.store.settings.xenstored.xenfs.kva}
xenstored-port = ${cfg.store.settings.xenstored.xenfs.port}
'';
};
};
# Xen provides udev rules.
services.udev.packages = [ cfg.package ];
systemd = {
# Xen provides systemd units.
packages = [ cfg.package ];
mounts = [
{
description = "Mount /proc/xen files";
what = "xenfs";
where = "/proc/xen";
type = "xenfs";
unitConfig = {
ConditionPathExists = "/proc/xen";
RefuseManualStop = "true";
};
}
];
services = {
# While this service is installed by the `xen` package, it shouldn't be used in dom0.
xendriverdomain.enable = false;
xenstored = {
wantedBy = [ "multi-user.target" ];
preStart = ''
export XENSTORED_ROOTDIR="/var/lib/xenstored"
rm -f "$XENSTORED_ROOTDIR"/tdb* &>/dev/null
mkdir -p /var/{run,log,lib}/xen
'';
};
xen-init-dom0 = {
restartIfChanged = false;
wantedBy = [ "multi-user.target" ];
};
xen-qemu-dom0-disk-backend = {
wantedBy = [ "multi-user.target" ];
serviceConfig = {
PIDFile = cfg.qemu.pidFile;
ExecStart = ''
${cfg.qemu.package}/${cfg.qemu.package.qemu-system-i386} \
-xen-domid 0 -xen-attach -name dom0 -nographic -M xenpv \
-daemonize -monitor /dev/null -serial /dev/null -parallel \
/dev/null -nodefaults -no-user-config -pidfile \
${cfg.qemu.pidFile}
'';
};
};
xenconsoled.wantedBy = [ "multi-user.target" ];
xen-watchdog = {
wantedBy = [ "multi-user.target" ];
serviceConfig = {
RestartSec = "1";
Restart = "on-failure";
};
};
xendomains = {
restartIfChanged = false;
path = [
cfg.package
cfg.qemu.package
];
preStart = "mkdir -p /var/lock/subsys -m 755";
wantedBy = [ "multi-user.target" ];
};
};
};
};
meta.maintainers = members;
}