319 lines
9.4 KiB
Nix
319 lines
9.4 KiB
Nix
|
{
|
||
|
config,
|
||
|
lib,
|
||
|
pkgs,
|
||
|
...
|
||
|
}:
|
||
|
let
|
||
|
inherit (lib) literalExpression types;
|
||
|
|
||
|
cfg = config.services.ollama;
|
||
|
ollamaPackage = cfg.package.override { inherit (cfg) acceleration; };
|
||
|
|
||
|
staticUser = cfg.user != null && cfg.group != null;
|
||
|
in
|
||
|
{
|
||
|
imports = [
|
||
|
(lib.mkRemovedOptionModule [
|
||
|
"services"
|
||
|
"ollama"
|
||
|
"listenAddress"
|
||
|
] "Use `services.ollama.host` and `services.ollama.port` instead.")
|
||
|
(lib.mkRemovedOptionModule [
|
||
|
"services"
|
||
|
"ollama"
|
||
|
"sandbox"
|
||
|
] "Set `services.ollama.user` and `services.ollama.group` instead.")
|
||
|
(lib.mkRemovedOptionModule
|
||
|
[
|
||
|
"services"
|
||
|
"ollama"
|
||
|
"writablePaths"
|
||
|
]
|
||
|
"The `models` directory is now always writable. To make other directories writable, use `systemd.services.ollama.serviceConfig.ReadWritePaths`."
|
||
|
)
|
||
|
];
|
||
|
|
||
|
options = {
|
||
|
services.ollama = {
|
||
|
enable = lib.mkEnableOption "ollama server for local large language models";
|
||
|
package = lib.mkPackageOption pkgs "ollama" { };
|
||
|
|
||
|
user = lib.mkOption {
|
||
|
type = with types; nullOr str;
|
||
|
default = null;
|
||
|
example = "ollama";
|
||
|
description = ''
|
||
|
User account under which to run ollama. Defaults to [`DynamicUser`](https://www.freedesktop.org/software/systemd/man/latest/systemd.exec.html#DynamicUser=)
|
||
|
when set to `null`.
|
||
|
|
||
|
The user will automatically be created, if this option is set to a non-null value.
|
||
|
'';
|
||
|
};
|
||
|
group = lib.mkOption {
|
||
|
type = with types; nullOr str;
|
||
|
default = cfg.user;
|
||
|
defaultText = literalExpression "config.services.ollama.user";
|
||
|
example = "ollama";
|
||
|
description = ''
|
||
|
Group under which to run ollama. Only used when `services.ollama.user` is set.
|
||
|
|
||
|
The group will automatically be created, if this option is set to a non-null value.
|
||
|
'';
|
||
|
};
|
||
|
|
||
|
home = lib.mkOption {
|
||
|
type = types.str;
|
||
|
default = "/var/lib/ollama";
|
||
|
example = "/home/foo";
|
||
|
description = ''
|
||
|
The home directory that the ollama service is started in.
|
||
|
'';
|
||
|
};
|
||
|
models = lib.mkOption {
|
||
|
type = types.str;
|
||
|
default = "${cfg.home}/models";
|
||
|
defaultText = "\${config.services.ollama.home}/models";
|
||
|
example = "/path/to/ollama/models";
|
||
|
description = ''
|
||
|
The directory that the ollama service will read models from and download new models to.
|
||
|
'';
|
||
|
};
|
||
|
|
||
|
host = lib.mkOption {
|
||
|
type = types.str;
|
||
|
default = "127.0.0.1";
|
||
|
example = "[::]";
|
||
|
description = ''
|
||
|
The host address which the ollama server HTTP interface listens to.
|
||
|
'';
|
||
|
};
|
||
|
port = lib.mkOption {
|
||
|
type = types.port;
|
||
|
default = 11434;
|
||
|
example = 11111;
|
||
|
description = ''
|
||
|
Which port the ollama server listens to.
|
||
|
'';
|
||
|
};
|
||
|
|
||
|
acceleration = lib.mkOption {
|
||
|
type = types.nullOr (
|
||
|
types.enum [
|
||
|
false
|
||
|
"rocm"
|
||
|
"cuda"
|
||
|
]
|
||
|
);
|
||
|
default = null;
|
||
|
example = "rocm";
|
||
|
description = ''
|
||
|
What interface to use for hardware acceleration.
|
||
|
|
||
|
- `null`: default behavior
|
||
|
- if `nixpkgs.config.rocmSupport` is enabled, uses `"rocm"`
|
||
|
- if `nixpkgs.config.cudaSupport` is enabled, uses `"cuda"`
|
||
|
- otherwise defaults to `false`
|
||
|
- `false`: disable GPU, only use CPU
|
||
|
- `"rocm"`: supported by most modern AMD GPUs
|
||
|
- may require overriding gpu type with `services.ollama.rocmOverrideGfx`
|
||
|
if rocm doesn't detect your AMD gpu
|
||
|
- `"cuda"`: supported by most modern NVIDIA GPUs
|
||
|
'';
|
||
|
};
|
||
|
rocmOverrideGfx = lib.mkOption {
|
||
|
type = types.nullOr types.str;
|
||
|
default = null;
|
||
|
example = "10.3.0";
|
||
|
description = ''
|
||
|
Override what rocm will detect your gpu model as.
|
||
|
For example, if you have an RX 5700 XT, try setting this to `"10.1.0"` (gfx 1010).
|
||
|
|
||
|
This sets the value of `HSA_OVERRIDE_GFX_VERSION`. See [ollama's docs](
|
||
|
https://github.com/ollama/ollama/blob/main/docs/gpu.md#amd-radeon
|
||
|
) for details.
|
||
|
'';
|
||
|
};
|
||
|
|
||
|
environmentVariables = lib.mkOption {
|
||
|
type = types.attrsOf types.str;
|
||
|
default = { };
|
||
|
example = {
|
||
|
OLLAMA_LLM_LIBRARY = "cpu";
|
||
|
HIP_VISIBLE_DEVICES = "0,1";
|
||
|
};
|
||
|
description = ''
|
||
|
Set arbitrary environment variables for the ollama service.
|
||
|
|
||
|
Be aware that these are only seen by the ollama server (systemd service),
|
||
|
not normal invocations like `ollama run`.
|
||
|
Since `ollama run` is mostly a shell around the ollama server, this is usually sufficient.
|
||
|
'';
|
||
|
};
|
||
|
loadModels = lib.mkOption {
|
||
|
type = types.listOf types.str;
|
||
|
default = [ ];
|
||
|
description = ''
|
||
|
Download these models using `ollama pull` as soon as `ollama.service` has started.
|
||
|
|
||
|
This creates a systemd unit `ollama-model-loader.service`.
|
||
|
|
||
|
Search for models of your choice from: https://ollama.com/library
|
||
|
'';
|
||
|
};
|
||
|
openFirewall = lib.mkOption {
|
||
|
type = types.bool;
|
||
|
default = false;
|
||
|
description = ''
|
||
|
Whether to open the firewall for ollama.
|
||
|
|
||
|
This adds `services.ollama.port` to `networking.firewall.allowedTCPPorts`.
|
||
|
'';
|
||
|
};
|
||
|
};
|
||
|
};
|
||
|
|
||
|
config = lib.mkIf cfg.enable {
|
||
|
users = lib.mkIf staticUser {
|
||
|
users.${cfg.user} = {
|
||
|
inherit (cfg) home;
|
||
|
isSystemUser = true;
|
||
|
group = cfg.group;
|
||
|
};
|
||
|
groups.${cfg.group} = { };
|
||
|
};
|
||
|
|
||
|
systemd.services.ollama = {
|
||
|
description = "Server for local large language models";
|
||
|
wantedBy = [ "multi-user.target" ];
|
||
|
after = [ "network.target" ];
|
||
|
environment =
|
||
|
cfg.environmentVariables
|
||
|
// {
|
||
|
HOME = cfg.home;
|
||
|
OLLAMA_MODELS = cfg.models;
|
||
|
OLLAMA_HOST = "${cfg.host}:${toString cfg.port}";
|
||
|
}
|
||
|
// lib.optionalAttrs (cfg.rocmOverrideGfx != null) {
|
||
|
HSA_OVERRIDE_GFX_VERSION = cfg.rocmOverrideGfx;
|
||
|
};
|
||
|
serviceConfig =
|
||
|
lib.optionalAttrs staticUser {
|
||
|
User = cfg.user;
|
||
|
Group = cfg.group;
|
||
|
}
|
||
|
// {
|
||
|
Type = "exec";
|
||
|
DynamicUser = true;
|
||
|
ExecStart = "${lib.getExe ollamaPackage} serve";
|
||
|
WorkingDirectory = cfg.home;
|
||
|
StateDirectory = [ "ollama" ];
|
||
|
ReadWritePaths = [
|
||
|
cfg.home
|
||
|
cfg.models
|
||
|
];
|
||
|
|
||
|
CapabilityBoundingSet = [ "" ];
|
||
|
DeviceAllow = [
|
||
|
# CUDA
|
||
|
# https://docs.nvidia.com/dgx/pdf/dgx-os-5-user-guide.pdf
|
||
|
"char-nvidiactl"
|
||
|
"char-nvidia-caps"
|
||
|
"char-nvidia-frontend"
|
||
|
"char-nvidia-uvm"
|
||
|
# ROCm
|
||
|
"char-drm"
|
||
|
"char-kfd"
|
||
|
];
|
||
|
DevicePolicy = "closed";
|
||
|
LockPersonality = true;
|
||
|
MemoryDenyWriteExecute = true;
|
||
|
NoNewPrivileges = true;
|
||
|
PrivateDevices = false; # hides acceleration devices
|
||
|
PrivateTmp = true;
|
||
|
PrivateUsers = true;
|
||
|
ProcSubset = "all"; # /proc/meminfo
|
||
|
ProtectClock = true;
|
||
|
ProtectControlGroups = true;
|
||
|
ProtectHome = true;
|
||
|
ProtectHostname = true;
|
||
|
ProtectKernelLogs = true;
|
||
|
ProtectKernelModules = true;
|
||
|
ProtectKernelTunables = true;
|
||
|
ProtectProc = "invisible";
|
||
|
ProtectSystem = "strict";
|
||
|
RemoveIPC = true;
|
||
|
RestrictNamespaces = true;
|
||
|
RestrictRealtime = true;
|
||
|
RestrictSUIDSGID = true;
|
||
|
RestrictAddressFamilies = [
|
||
|
"AF_INET"
|
||
|
"AF_INET6"
|
||
|
"AF_UNIX"
|
||
|
];
|
||
|
SupplementaryGroups = [ "render" ]; # for rocm to access /dev/dri/renderD* devices
|
||
|
SystemCallArchitectures = "native";
|
||
|
SystemCallFilter = [
|
||
|
"@system-service @resources"
|
||
|
"~@privileged"
|
||
|
];
|
||
|
UMask = "0077";
|
||
|
};
|
||
|
};
|
||
|
|
||
|
systemd.services.ollama-model-loader = lib.mkIf (cfg.loadModels != [ ]) {
|
||
|
description = "Download ollama models in the background";
|
||
|
wantedBy = [
|
||
|
"multi-user.target"
|
||
|
"ollama.service"
|
||
|
];
|
||
|
after = [ "ollama.service" ];
|
||
|
bindsTo = [ "ollama.service" ];
|
||
|
environment = config.systemd.services.ollama.environment;
|
||
|
serviceConfig = {
|
||
|
Type = "exec";
|
||
|
DynamicUser = true;
|
||
|
Restart = "on-failure";
|
||
|
# bounded exponential backoff
|
||
|
RestartSec = "1s";
|
||
|
RestartMaxDelaySec = "2h";
|
||
|
RestartSteps = "10";
|
||
|
};
|
||
|
|
||
|
script = ''
|
||
|
total=${toString (builtins.length cfg.loadModels)}
|
||
|
failed=0
|
||
|
|
||
|
for model in ${lib.escapeShellArgs cfg.loadModels}; do
|
||
|
'${lib.getExe ollamaPackage}' pull "$model" &
|
||
|
done
|
||
|
|
||
|
for job in $(jobs -p); do
|
||
|
set +e
|
||
|
wait $job
|
||
|
exit_code=$?
|
||
|
set -e
|
||
|
|
||
|
if [ $exit_code != 0 ]; then
|
||
|
failed=$((failed + 1))
|
||
|
fi
|
||
|
done
|
||
|
|
||
|
if [ $failed != 0 ]; then
|
||
|
echo "error: $failed out of $total attempted model downloads failed" >&2
|
||
|
exit 1
|
||
|
fi
|
||
|
'';
|
||
|
};
|
||
|
|
||
|
networking.firewall = lib.mkIf cfg.openFirewall { allowedTCPPorts = [ cfg.port ]; };
|
||
|
|
||
|
environment.systemPackages = [ ollamaPackage ];
|
||
|
};
|
||
|
|
||
|
meta.maintainers = with lib.maintainers; [
|
||
|
abysssol
|
||
|
onny
|
||
|
];
|
||
|
}
|