{ config, lib, pkgs, ... }: let inherit (lib) literalExpression types; cfg = config.services.ollama; ollamaPackage = cfg.package.override { inherit (cfg) acceleration; }; staticUser = cfg.user != null && cfg.group != null; in { imports = [ (lib.mkRemovedOptionModule [ "services" "ollama" "listenAddress" ] "Use `services.ollama.host` and `services.ollama.port` instead.") (lib.mkRemovedOptionModule [ "services" "ollama" "sandbox" ] "Set `services.ollama.user` and `services.ollama.group` instead.") (lib.mkRemovedOptionModule [ "services" "ollama" "writablePaths" ] "The `models` directory is now always writable. To make other directories writable, use `systemd.services.ollama.serviceConfig.ReadWritePaths`." ) ]; options = { services.ollama = { enable = lib.mkEnableOption "ollama server for local large language models"; package = lib.mkPackageOption pkgs "ollama" { }; user = lib.mkOption { type = with types; nullOr str; default = null; example = "ollama"; description = '' User account under which to run ollama. Defaults to [`DynamicUser`](https://www.freedesktop.org/software/systemd/man/latest/systemd.exec.html#DynamicUser=) when set to `null`. The user will automatically be created, if this option is set to a non-null value. ''; }; group = lib.mkOption { type = with types; nullOr str; default = cfg.user; defaultText = literalExpression "config.services.ollama.user"; example = "ollama"; description = '' Group under which to run ollama. Only used when `services.ollama.user` is set. The group will automatically be created, if this option is set to a non-null value. ''; }; home = lib.mkOption { type = types.str; default = "/var/lib/ollama"; example = "/home/foo"; description = '' The home directory that the ollama service is started in. ''; }; models = lib.mkOption { type = types.str; default = "${cfg.home}/models"; defaultText = "\${config.services.ollama.home}/models"; example = "/path/to/ollama/models"; description = '' The directory that the ollama service will read models from and download new models to. ''; }; host = lib.mkOption { type = types.str; default = "127.0.0.1"; example = "[::]"; description = '' The host address which the ollama server HTTP interface listens to. ''; }; port = lib.mkOption { type = types.port; default = 11434; example = 11111; description = '' Which port the ollama server listens to. ''; }; acceleration = lib.mkOption { type = types.nullOr ( types.enum [ false "rocm" "cuda" ] ); default = null; example = "rocm"; description = '' What interface to use for hardware acceleration. - `null`: default behavior - if `nixpkgs.config.rocmSupport` is enabled, uses `"rocm"` - if `nixpkgs.config.cudaSupport` is enabled, uses `"cuda"` - otherwise defaults to `false` - `false`: disable GPU, only use CPU - `"rocm"`: supported by most modern AMD GPUs - may require overriding gpu type with `services.ollama.rocmOverrideGfx` if rocm doesn't detect your AMD gpu - `"cuda"`: supported by most modern NVIDIA GPUs ''; }; rocmOverrideGfx = lib.mkOption { type = types.nullOr types.str; default = null; example = "10.3.0"; description = '' Override what rocm will detect your gpu model as. For example, if you have an RX 5700 XT, try setting this to `"10.1.0"` (gfx 1010). This sets the value of `HSA_OVERRIDE_GFX_VERSION`. See [ollama's docs]( https://github.com/ollama/ollama/blob/main/docs/gpu.md#amd-radeon ) for details. ''; }; environmentVariables = lib.mkOption { type = types.attrsOf types.str; default = { }; example = { OLLAMA_LLM_LIBRARY = "cpu"; HIP_VISIBLE_DEVICES = "0,1"; }; description = '' Set arbitrary environment variables for the ollama service. Be aware that these are only seen by the ollama server (systemd service), not normal invocations like `ollama run`. Since `ollama run` is mostly a shell around the ollama server, this is usually sufficient. ''; }; loadModels = lib.mkOption { type = types.listOf types.str; default = [ ]; description = '' Download these models using `ollama pull` as soon as `ollama.service` has started. This creates a systemd unit `ollama-model-loader.service`. Search for models of your choice from: https://ollama.com/library ''; }; openFirewall = lib.mkOption { type = types.bool; default = false; description = '' Whether to open the firewall for ollama. This adds `services.ollama.port` to `networking.firewall.allowedTCPPorts`. ''; }; }; }; config = lib.mkIf cfg.enable { users = lib.mkIf staticUser { users.${cfg.user} = { inherit (cfg) home; isSystemUser = true; group = cfg.group; }; groups.${cfg.group} = { }; }; systemd.services.ollama = { description = "Server for local large language models"; wantedBy = [ "multi-user.target" ]; after = [ "network.target" ]; environment = cfg.environmentVariables // { HOME = cfg.home; OLLAMA_MODELS = cfg.models; OLLAMA_HOST = "${cfg.host}:${toString cfg.port}"; } // lib.optionalAttrs (cfg.rocmOverrideGfx != null) { HSA_OVERRIDE_GFX_VERSION = cfg.rocmOverrideGfx; }; serviceConfig = lib.optionalAttrs staticUser { User = cfg.user; Group = cfg.group; } // { Type = "exec"; DynamicUser = true; ExecStart = "${lib.getExe ollamaPackage} serve"; WorkingDirectory = cfg.home; StateDirectory = [ "ollama" ]; ReadWritePaths = [ cfg.home cfg.models ]; CapabilityBoundingSet = [ "" ]; DeviceAllow = [ # CUDA # https://docs.nvidia.com/dgx/pdf/dgx-os-5-user-guide.pdf "char-nvidiactl" "char-nvidia-caps" "char-nvidia-frontend" "char-nvidia-uvm" # ROCm "char-drm" "char-kfd" ]; DevicePolicy = "closed"; LockPersonality = true; MemoryDenyWriteExecute = true; NoNewPrivileges = true; PrivateDevices = false; # hides acceleration devices PrivateTmp = true; PrivateUsers = true; ProcSubset = "all"; # /proc/meminfo ProtectClock = true; ProtectControlGroups = true; ProtectHome = true; ProtectHostname = true; ProtectKernelLogs = true; ProtectKernelModules = true; ProtectKernelTunables = true; ProtectProc = "invisible"; ProtectSystem = "strict"; RemoveIPC = true; RestrictNamespaces = true; RestrictRealtime = true; RestrictSUIDSGID = true; RestrictAddressFamilies = [ "AF_INET" "AF_INET6" "AF_UNIX" ]; SupplementaryGroups = [ "render" ]; # for rocm to access /dev/dri/renderD* devices SystemCallArchitectures = "native"; SystemCallFilter = [ "@system-service @resources" "~@privileged" ]; UMask = "0077"; }; }; systemd.services.ollama-model-loader = lib.mkIf (cfg.loadModels != [ ]) { description = "Download ollama models in the background"; wantedBy = [ "multi-user.target" "ollama.service" ]; after = [ "ollama.service" ]; bindsTo = [ "ollama.service" ]; environment = config.systemd.services.ollama.environment; serviceConfig = { Type = "exec"; DynamicUser = true; Restart = "on-failure"; # bounded exponential backoff RestartSec = "1s"; RestartMaxDelaySec = "2h"; RestartSteps = "10"; }; script = '' total=${toString (builtins.length cfg.loadModels)} failed=0 for model in ${lib.escapeShellArgs cfg.loadModels}; do '${lib.getExe ollamaPackage}' pull "$model" & done for job in $(jobs -p); do set +e wait $job exit_code=$? set -e if [ $exit_code != 0 ]; then failed=$((failed + 1)) fi done if [ $failed != 0 ]; then echo "error: $failed out of $total attempted model downloads failed" >&2 exit 1 fi ''; }; networking.firewall = lib.mkIf cfg.openFirewall { allowedTCPPorts = [ cfg.port ]; }; environment.systemPackages = [ ollamaPackage ]; }; meta.maintainers = with lib.maintainers; [ abysssol onny ]; }