112 lines
2.9 KiB
Nix
112 lines
2.9 KiB
Nix
|
{ config, lib, pkgs, utils, ... }:
|
||
|
|
||
|
let
|
||
|
cfg = config.services.llama-cpp;
|
||
|
in {
|
||
|
|
||
|
options = {
|
||
|
|
||
|
services.llama-cpp = {
|
||
|
enable = lib.mkEnableOption "LLaMA C++ server";
|
||
|
|
||
|
package = lib.mkPackageOption pkgs "llama-cpp" { };
|
||
|
|
||
|
model = lib.mkOption {
|
||
|
type = lib.types.path;
|
||
|
example = "/models/mistral-instruct-7b/ggml-model-q4_0.gguf";
|
||
|
description = "Model path.";
|
||
|
};
|
||
|
|
||
|
extraFlags = lib.mkOption {
|
||
|
type = lib.types.listOf lib.types.str;
|
||
|
description = "Extra flags passed to llama-cpp-server.";
|
||
|
example = ["-c" "4096" "-ngl" "32" "--numa"];
|
||
|
default = [];
|
||
|
};
|
||
|
|
||
|
host = lib.mkOption {
|
||
|
type = lib.types.str;
|
||
|
default = "127.0.0.1";
|
||
|
example = "0.0.0.0";
|
||
|
description = "IP address the LLaMA C++ server listens on.";
|
||
|
};
|
||
|
|
||
|
port = lib.mkOption {
|
||
|
type = lib.types.port;
|
||
|
default = 8080;
|
||
|
description = "Listen port for LLaMA C++ server.";
|
||
|
};
|
||
|
|
||
|
openFirewall = lib.mkOption {
|
||
|
type = lib.types.bool;
|
||
|
default = false;
|
||
|
description = "Open ports in the firewall for LLaMA C++ server.";
|
||
|
};
|
||
|
};
|
||
|
|
||
|
};
|
||
|
|
||
|
config = lib.mkIf cfg.enable {
|
||
|
|
||
|
systemd.services.llama-cpp = {
|
||
|
description = "LLaMA C++ server";
|
||
|
after = ["network.target"];
|
||
|
wantedBy = ["multi-user.target"];
|
||
|
|
||
|
serviceConfig = {
|
||
|
Type = "idle";
|
||
|
KillSignal = "SIGINT";
|
||
|
ExecStart = "${cfg.package}/bin/llama-cpp-server --log-disable --host ${cfg.host} --port ${builtins.toString cfg.port} -m ${cfg.model} ${utils.escapeSystemdExecArgs cfg.extraFlags}";
|
||
|
Restart = "on-failure";
|
||
|
RestartSec = 300;
|
||
|
|
||
|
# for GPU acceleration
|
||
|
PrivateDevices = false;
|
||
|
|
||
|
# hardening
|
||
|
DynamicUser = true;
|
||
|
CapabilityBoundingSet = "";
|
||
|
RestrictAddressFamilies = [
|
||
|
"AF_INET"
|
||
|
"AF_INET6"
|
||
|
"AF_UNIX"
|
||
|
];
|
||
|
NoNewPrivileges = true;
|
||
|
PrivateMounts = true;
|
||
|
PrivateTmp = true;
|
||
|
PrivateUsers = true;
|
||
|
ProtectClock = true;
|
||
|
ProtectControlGroups = true;
|
||
|
ProtectHome = true;
|
||
|
ProtectKernelLogs = true;
|
||
|
ProtectKernelModules = true;
|
||
|
ProtectKernelTunables = true;
|
||
|
ProtectSystem = "strict";
|
||
|
MemoryDenyWriteExecute = true;
|
||
|
LockPersonality = true;
|
||
|
RemoveIPC = true;
|
||
|
RestrictNamespaces = true;
|
||
|
RestrictRealtime = true;
|
||
|
RestrictSUIDSGID = true;
|
||
|
SystemCallArchitectures = "native";
|
||
|
SystemCallFilter = [
|
||
|
"@system-service"
|
||
|
"~@privileged"
|
||
|
"~@resources"
|
||
|
];
|
||
|
SystemCallErrorNumber = "EPERM";
|
||
|
ProtectProc = "invisible";
|
||
|
ProtectHostname = true;
|
||
|
ProcSubset = "pid";
|
||
|
};
|
||
|
};
|
||
|
|
||
|
networking.firewall = lib.mkIf cfg.openFirewall {
|
||
|
allowedTCPPorts = [ cfg.port ];
|
||
|
};
|
||
|
|
||
|
};
|
||
|
|
||
|
meta.maintainers = with lib.maintainers; [ newam ];
|
||
|
}
|