2024-06-05 15:53:02 +00:00
|
|
|
{
|
|
|
|
lib,
|
|
|
|
buildPythonPackage,
|
|
|
|
fetchFromGitHub,
|
|
|
|
which,
|
|
|
|
ninja,
|
|
|
|
packaging,
|
|
|
|
setuptools,
|
|
|
|
torch,
|
|
|
|
outlines,
|
|
|
|
wheel,
|
|
|
|
psutil,
|
|
|
|
ray,
|
|
|
|
pandas,
|
|
|
|
pyarrow,
|
|
|
|
sentencepiece,
|
|
|
|
numpy,
|
|
|
|
transformers,
|
|
|
|
xformers,
|
|
|
|
fastapi,
|
|
|
|
uvicorn,
|
|
|
|
pydantic,
|
|
|
|
aioprometheus,
|
|
|
|
pynvml,
|
|
|
|
cupy,
|
|
|
|
writeShellScript,
|
2024-02-29 20:09:43 +00:00
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
config,
|
2024-02-29 20:09:43 +00:00
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
cudaSupport ? config.cudaSupport,
|
|
|
|
cudaPackages ? { },
|
2024-02-29 20:09:43 +00:00
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
rocmSupport ? config.rocmSupport,
|
|
|
|
rocmPackages ? { },
|
|
|
|
gpuTargets ? [ ],
|
2024-02-29 20:09:43 +00:00
|
|
|
}:
|
|
|
|
|
|
|
|
buildPythonPackage rec {
|
|
|
|
pname = "vllm";
|
2024-04-21 15:54:59 +00:00
|
|
|
version = "0.3.3";
|
2024-02-29 20:09:43 +00:00
|
|
|
format = "pyproject";
|
|
|
|
|
|
|
|
src = fetchFromGitHub {
|
|
|
|
owner = "vllm-project";
|
|
|
|
repo = pname;
|
|
|
|
rev = "v${version}";
|
2024-04-21 15:54:59 +00:00
|
|
|
hash = "sha256-LU5pCPVv+Ws9dL8oWL1sJGzwQKI1IFk2A1I6TP9gXL4=";
|
2024-02-29 20:09:43 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
# Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing.
|
2024-06-05 15:53:02 +00:00
|
|
|
PYTORCH_ROCM_ARCH = lib.optionalString rocmSupport (
|
|
|
|
lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets
|
|
|
|
);
|
2024-02-29 20:09:43 +00:00
|
|
|
|
|
|
|
# xformers 0.0.23.post1 github release specifies its version as 0.0.24
|
|
|
|
#
|
|
|
|
# cupy-cuda12x is the same wheel as cupy, but built with cuda dependencies, we already have it set up
|
|
|
|
# like that in nixpkgs. Version upgrade is due to upstream shenanigans
|
|
|
|
# https://github.com/vllm-project/vllm/pull/2845/commits/34a0ad7f9bb7880c0daa2992d700df3e01e91363
|
|
|
|
#
|
|
|
|
# hipcc --version works badly on NixOS due to unresolved paths.
|
2024-06-05 15:53:02 +00:00
|
|
|
postPatch =
|
|
|
|
''
|
|
|
|
substituteInPlace requirements.txt \
|
|
|
|
--replace "xformers == 0.0.23.post1" "xformers == 0.0.24"
|
|
|
|
substituteInPlace requirements.txt \
|
|
|
|
--replace "cupy-cuda12x == 12.1.0" "cupy == 12.3.0"
|
|
|
|
substituteInPlace requirements-build.txt \
|
|
|
|
--replace "torch==2.1.2" "torch == 2.2.1"
|
|
|
|
substituteInPlace pyproject.toml \
|
|
|
|
--replace "torch == 2.1.2" "torch == 2.2.1"
|
|
|
|
substituteInPlace requirements.txt \
|
|
|
|
--replace "torch == 2.1.2" "torch == 2.2.1"
|
|
|
|
''
|
|
|
|
+ lib.optionalString rocmSupport ''
|
|
|
|
substituteInPlace setup.py \
|
|
|
|
--replace "'hipcc', '--version'" "'${writeShellScript "hipcc-version-stub" "echo HIP version: 0.0"}'"
|
|
|
|
'';
|
2024-02-29 20:09:43 +00:00
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
preBuild =
|
|
|
|
lib.optionalString cudaSupport ''
|
|
|
|
export CUDA_HOME=${cudaPackages.cuda_nvcc}
|
|
|
|
''
|
|
|
|
+ lib.optionalString rocmSupport ''
|
|
|
|
export ROCM_HOME=${rocmPackages.clr}
|
|
|
|
export PATH=$PATH:${rocmPackages.hipcc}
|
|
|
|
'';
|
2024-02-29 20:09:43 +00:00
|
|
|
|
|
|
|
nativeBuildInputs = [
|
|
|
|
ninja
|
|
|
|
packaging
|
|
|
|
setuptools
|
|
|
|
torch
|
|
|
|
wheel
|
|
|
|
which
|
2024-06-05 15:53:02 +00:00
|
|
|
] ++ lib.optionals rocmSupport [ rocmPackages.hipcc ];
|
2024-02-29 20:09:43 +00:00
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
buildInputs =
|
|
|
|
(lib.optionals cudaSupport (
|
|
|
|
with cudaPackages;
|
|
|
|
[
|
|
|
|
cuda_cudart # cuda_runtime.h, -lcudart
|
|
|
|
cuda_cccl.dev # <thrust/*>
|
|
|
|
libcusparse.dev # cusparse.h
|
|
|
|
libcublas.dev # cublas_v2.h
|
|
|
|
libcusolver # cusolverDn.h
|
|
|
|
]
|
|
|
|
))
|
|
|
|
++ (lib.optionals rocmSupport (
|
|
|
|
with rocmPackages;
|
|
|
|
[
|
|
|
|
clr
|
|
|
|
rocthrust
|
|
|
|
rocprim
|
|
|
|
hipsparse
|
|
|
|
hipblas
|
|
|
|
]
|
|
|
|
));
|
2024-02-29 20:09:43 +00:00
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
propagatedBuildInputs =
|
|
|
|
[
|
|
|
|
psutil
|
|
|
|
ray
|
|
|
|
pandas
|
|
|
|
pyarrow
|
|
|
|
sentencepiece
|
|
|
|
numpy
|
|
|
|
torch
|
|
|
|
transformers
|
|
|
|
outlines
|
|
|
|
xformers
|
|
|
|
fastapi
|
|
|
|
uvicorn
|
|
|
|
pydantic
|
|
|
|
aioprometheus
|
|
|
|
]
|
|
|
|
++ uvicorn.optional-dependencies.standard
|
2024-02-29 20:09:43 +00:00
|
|
|
++ aioprometheus.optional-dependencies.starlette
|
|
|
|
++ lib.optionals cudaSupport [
|
|
|
|
pynvml
|
|
|
|
cupy
|
|
|
|
];
|
|
|
|
|
|
|
|
pythonImportsCheck = [ "vllm" ];
|
|
|
|
|
|
|
|
meta = with lib; {
|
2024-06-20 14:57:18 +00:00
|
|
|
description = "High-throughput and memory-efficient inference and serving engine for LLMs";
|
2024-02-29 20:09:43 +00:00
|
|
|
changelog = "https://github.com/vllm-project/vllm/releases/tag/v${version}";
|
|
|
|
homepage = "https://github.com/vllm-project/vllm";
|
|
|
|
license = licenses.asl20;
|
2024-06-05 15:53:02 +00:00
|
|
|
maintainers = with maintainers; [
|
|
|
|
happysalada
|
|
|
|
lach
|
|
|
|
];
|
2024-02-29 20:09:43 +00:00
|
|
|
broken = !cudaSupport && !rocmSupport;
|
|
|
|
};
|
|
|
|
}
|