228 lines
6.4 KiB
Nix
228 lines
6.4 KiB
Nix
|
{
|
||
|
lib,
|
||
|
rustPlatform,
|
||
|
fetchFromGitHub,
|
||
|
|
||
|
# nativeBuildInputs
|
||
|
pkg-config,
|
||
|
python3,
|
||
|
|
||
|
# buildInputs
|
||
|
oniguruma,
|
||
|
openssl,
|
||
|
mkl,
|
||
|
stdenv,
|
||
|
darwin,
|
||
|
|
||
|
# env
|
||
|
fetchurl,
|
||
|
|
||
|
versionCheckHook,
|
||
|
|
||
|
testers,
|
||
|
mistral-rs,
|
||
|
nix-update-script,
|
||
|
|
||
|
cudaPackages,
|
||
|
cudaCapability ? null,
|
||
|
|
||
|
config,
|
||
|
# one of `[ null false "cuda" "mkl" "metal" ]`
|
||
|
acceleration ? null,
|
||
|
|
||
|
}:
|
||
|
|
||
|
let
|
||
|
accelIsValid = builtins.elem acceleration [
|
||
|
null
|
||
|
false
|
||
|
"cuda"
|
||
|
"mkl"
|
||
|
"metal"
|
||
|
];
|
||
|
|
||
|
cudaSupport =
|
||
|
assert accelIsValid;
|
||
|
(acceleration == "cuda") || (config.cudaSupport && acceleration == null);
|
||
|
|
||
|
minRequiredCudaCapability = "6.1"; # build fails with 6.0
|
||
|
inherit (cudaPackages.cudaFlags) cudaCapabilities;
|
||
|
cudaCapabilityString =
|
||
|
if cudaCapability == null then
|
||
|
(builtins.head (
|
||
|
(builtins.filter (cap: lib.versionAtLeast cap minRequiredCudaCapability) cudaCapabilities)
|
||
|
++ [
|
||
|
(lib.warn "mistral-rs doesn't support ${lib.concatStringsSep " " cudaCapabilities}" minRequiredCudaCapability)
|
||
|
]
|
||
|
))
|
||
|
else
|
||
|
cudaCapability;
|
||
|
cudaCapability' = lib.toInt (cudaPackages.cudaFlags.dropDot cudaCapabilityString);
|
||
|
|
||
|
mklSupport =
|
||
|
assert accelIsValid;
|
||
|
(acceleration == "mkl");
|
||
|
|
||
|
metalSupport =
|
||
|
assert accelIsValid;
|
||
|
(acceleration == "metal")
|
||
|
|| (stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64 && (acceleration == null));
|
||
|
|
||
|
darwinBuildInputs =
|
||
|
with darwin.apple_sdk.frameworks;
|
||
|
[
|
||
|
Accelerate
|
||
|
CoreVideo
|
||
|
CoreGraphics
|
||
|
]
|
||
|
++ lib.optionals metalSupport [
|
||
|
MetalKit
|
||
|
MetalPerformanceShaders
|
||
|
];
|
||
|
in
|
||
|
|
||
|
rustPlatform.buildRustPackage rec {
|
||
|
pname = "mistral-rs";
|
||
|
version = "0.3.2";
|
||
|
|
||
|
src = fetchFromGitHub {
|
||
|
owner = "EricLBuehler";
|
||
|
repo = "mistral.rs";
|
||
|
rev = "refs/tags/v${version}";
|
||
|
hash = "sha256-aflzpJZ48AFBqNTssZl2KxkspQb662nGkEU6COIluxk=";
|
||
|
};
|
||
|
|
||
|
cargoLock = {
|
||
|
lockFile = ./Cargo.lock;
|
||
|
outputHashes = {
|
||
|
"bindgen_cuda-0.1.6" = "sha256-OWGcQxT+x5HyIFskNVWpPr6Qfkh6Mv/g4PVSm5oA27g=";
|
||
|
"candle-core-0.7.2" = "sha256-OovBzD1gEYToa3HT8oQtbY6sDy0heRwAH2cK7gz5Jm0=";
|
||
|
};
|
||
|
};
|
||
|
|
||
|
nativeBuildInputs = [
|
||
|
pkg-config
|
||
|
python3
|
||
|
] ++ lib.optionals cudaSupport [ cudaPackages.cuda_nvcc ];
|
||
|
|
||
|
buildInputs =
|
||
|
[
|
||
|
oniguruma
|
||
|
openssl
|
||
|
]
|
||
|
++ lib.optionals cudaSupport [
|
||
|
cudaPackages.cuda_cudart
|
||
|
cudaPackages.cuda_nvrtc
|
||
|
cudaPackages.libcublas
|
||
|
cudaPackages.libcurand
|
||
|
]
|
||
|
++ lib.optionals mklSupport [ mkl ]
|
||
|
++ lib.optionals stdenv.hostPlatform.isDarwin darwinBuildInputs;
|
||
|
|
||
|
cargoBuildFlags =
|
||
|
[
|
||
|
# This disables the plotly crate which fails to build because of the kaleido feature requiring
|
||
|
# network access at build-time.
|
||
|
# See https://github.com/NixOS/nixpkgs/pull/323788#issuecomment-2206085825
|
||
|
"--no-default-features"
|
||
|
]
|
||
|
++ lib.optionals cudaSupport [ "--features=cuda" ]
|
||
|
++ lib.optionals mklSupport [ "--features=mkl" ]
|
||
|
++ lib.optionals (stdenv.hostPlatform.isDarwin && metalSupport) [ "--features=metal" ];
|
||
|
|
||
|
env =
|
||
|
{
|
||
|
SWAGGER_UI_DOWNLOAD_URL =
|
||
|
let
|
||
|
# When updating:
|
||
|
# - Look for the version of `utoipa-swagger-ui` at:
|
||
|
# https://github.com/EricLBuehler/mistral.rs/blob/v<MISTRAL-RS-VERSION>/mistralrs-server/Cargo.toml
|
||
|
# - Look at the corresponding version of `swagger-ui` at:
|
||
|
# https://github.com/juhaku/utoipa/blob/utoipa-swagger-ui-<UTOPIA-SWAGGER-UI-VERSION>/utoipa-swagger-ui/build.rs#L21-L22
|
||
|
swaggerUiVersion = "5.17.12";
|
||
|
|
||
|
swaggerUi = fetchurl {
|
||
|
url = "https://github.com/swagger-api/swagger-ui/archive/refs/tags/v${swaggerUiVersion}.zip";
|
||
|
hash = "sha256-HK4z/JI+1yq8BTBJveYXv9bpN/sXru7bn/8g5mf2B/I=";
|
||
|
};
|
||
|
in
|
||
|
"file://${swaggerUi}";
|
||
|
|
||
|
RUSTONIG_SYSTEM_LIBONIG = true;
|
||
|
}
|
||
|
// (lib.optionalAttrs cudaSupport {
|
||
|
CUDA_COMPUTE_CAP = cudaCapability';
|
||
|
|
||
|
# Apparently, cudart is enough: No need to provide the entire cudaPackages.cudatoolkit derivation.
|
||
|
CUDA_TOOLKIT_ROOT_DIR = lib.getDev cudaPackages.cuda_cudart;
|
||
|
});
|
||
|
|
||
|
NVCC_PREPEND_FLAGS = lib.optionals cudaSupport [
|
||
|
"-I${lib.getDev cudaPackages.cuda_cudart}/include"
|
||
|
"-I${lib.getDev cudaPackages.cuda_cccl}/include"
|
||
|
];
|
||
|
|
||
|
# swagger-ui will once more be copied in the target directory during the check phase
|
||
|
# Not deleting the existing unpacked archive leads to a `PermissionDenied` error
|
||
|
preCheck = ''
|
||
|
rm -rf target/${stdenv.hostPlatform.config}/release/build/
|
||
|
'';
|
||
|
|
||
|
# Try to access internet
|
||
|
checkFlags = [
|
||
|
"--skip=gguf::gguf_tokenizer::tests::test_decode_gpt2"
|
||
|
"--skip=gguf::gguf_tokenizer::tests::test_decode_llama"
|
||
|
"--skip=gguf::gguf_tokenizer::tests::test_encode_decode_gpt2"
|
||
|
"--skip=gguf::gguf_tokenizer::tests::test_encode_decode_llama"
|
||
|
"--skip=gguf::gguf_tokenizer::tests::test_encode_gpt2"
|
||
|
"--skip=gguf::gguf_tokenizer::tests::test_encode_llama"
|
||
|
"--skip=sampler::tests::test_argmax"
|
||
|
"--skip=sampler::tests::test_gumbel_speculative"
|
||
|
"--skip=util::tests::test_parse_image_url"
|
||
|
];
|
||
|
|
||
|
nativeInstallCheckInputs = [
|
||
|
versionCheckHook
|
||
|
];
|
||
|
versionCheckProgram = "${placeholder "out"}/bin/mistralrs-server";
|
||
|
versionCheckProgramArg = [ "--version" ];
|
||
|
doInstallCheck = true;
|
||
|
|
||
|
passthru = {
|
||
|
tests = {
|
||
|
version = testers.testVersion { package = mistral-rs; };
|
||
|
|
||
|
# TODO: uncomment when mkl support will be fixed
|
||
|
withMkl = lib.optionalAttrs (stdenv.hostPlatform == "x86_64-linux") (
|
||
|
mistral-rs.override { acceleration = "mkl"; }
|
||
|
);
|
||
|
withCuda = lib.optionalAttrs stdenv.hostPlatform.isLinux (
|
||
|
mistral-rs.override { acceleration = "cuda"; }
|
||
|
);
|
||
|
withMetal = lib.optionalAttrs (stdenv.hostPlatform == "aarch64-darwin") (
|
||
|
mistral-rs.override { acceleration = "metal"; }
|
||
|
);
|
||
|
};
|
||
|
updateScript = nix-update-script { };
|
||
|
};
|
||
|
|
||
|
meta = {
|
||
|
description = "Blazingly fast LLM inference";
|
||
|
homepage = "https://github.com/EricLBuehler/mistral.rs";
|
||
|
changelog = "https://github.com/EricLBuehler/mistral.rs/releases/tag/v${version}";
|
||
|
license = lib.licenses.mit;
|
||
|
maintainers = with lib.maintainers; [ GaetanLepage ];
|
||
|
mainProgram = "mistralrs-server";
|
||
|
platforms =
|
||
|
if cudaSupport then
|
||
|
lib.platforms.linux
|
||
|
else if metalSupport then
|
||
|
[ "aarch64-darwin" ]
|
||
|
else if mklSupport then
|
||
|
[ "x86_64-linux" ]
|
||
|
else
|
||
|
lib.platforms.unix;
|
||
|
broken = mklSupport;
|
||
|
};
|
||
|
}
|