depot/third_party/nixpkgs/pkgs/development/python-modules/vllm/default.nix
Default email fa5436e0a7 Project import generated by Copybara.
GitOrigin-RevId: e8057b67ebf307f01bdcc8fba94d94f75039d1f6
2024-06-05 17:53:02 +02:00

157 lines
3.7 KiB
Nix

{
lib,
buildPythonPackage,
fetchFromGitHub,
which,
ninja,
packaging,
setuptools,
torch,
outlines,
wheel,
psutil,
ray,
pandas,
pyarrow,
sentencepiece,
numpy,
transformers,
xformers,
fastapi,
uvicorn,
pydantic,
aioprometheus,
pynvml,
cupy,
writeShellScript,
config,
cudaSupport ? config.cudaSupport,
cudaPackages ? { },
rocmSupport ? config.rocmSupport,
rocmPackages ? { },
gpuTargets ? [ ],
}:
buildPythonPackage rec {
pname = "vllm";
version = "0.3.3";
format = "pyproject";
src = fetchFromGitHub {
owner = "vllm-project";
repo = pname;
rev = "v${version}";
hash = "sha256-LU5pCPVv+Ws9dL8oWL1sJGzwQKI1IFk2A1I6TP9gXL4=";
};
# Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing.
PYTORCH_ROCM_ARCH = lib.optionalString rocmSupport (
lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets
);
# xformers 0.0.23.post1 github release specifies its version as 0.0.24
#
# cupy-cuda12x is the same wheel as cupy, but built with cuda dependencies, we already have it set up
# like that in nixpkgs. Version upgrade is due to upstream shenanigans
# https://github.com/vllm-project/vllm/pull/2845/commits/34a0ad7f9bb7880c0daa2992d700df3e01e91363
#
# hipcc --version works badly on NixOS due to unresolved paths.
postPatch =
''
substituteInPlace requirements.txt \
--replace "xformers == 0.0.23.post1" "xformers == 0.0.24"
substituteInPlace requirements.txt \
--replace "cupy-cuda12x == 12.1.0" "cupy == 12.3.0"
substituteInPlace requirements-build.txt \
--replace "torch==2.1.2" "torch == 2.2.1"
substituteInPlace pyproject.toml \
--replace "torch == 2.1.2" "torch == 2.2.1"
substituteInPlace requirements.txt \
--replace "torch == 2.1.2" "torch == 2.2.1"
''
+ lib.optionalString rocmSupport ''
substituteInPlace setup.py \
--replace "'hipcc', '--version'" "'${writeShellScript "hipcc-version-stub" "echo HIP version: 0.0"}'"
'';
preBuild =
lib.optionalString cudaSupport ''
export CUDA_HOME=${cudaPackages.cuda_nvcc}
''
+ lib.optionalString rocmSupport ''
export ROCM_HOME=${rocmPackages.clr}
export PATH=$PATH:${rocmPackages.hipcc}
'';
nativeBuildInputs = [
ninja
packaging
setuptools
torch
wheel
which
] ++ lib.optionals rocmSupport [ rocmPackages.hipcc ];
buildInputs =
(lib.optionals cudaSupport (
with cudaPackages;
[
cuda_cudart # cuda_runtime.h, -lcudart
cuda_cccl.dev # <thrust/*>
libcusparse.dev # cusparse.h
libcublas.dev # cublas_v2.h
libcusolver # cusolverDn.h
]
))
++ (lib.optionals rocmSupport (
with rocmPackages;
[
clr
rocthrust
rocprim
hipsparse
hipblas
]
));
propagatedBuildInputs =
[
psutil
ray
pandas
pyarrow
sentencepiece
numpy
torch
transformers
outlines
xformers
fastapi
uvicorn
pydantic
aioprometheus
]
++ uvicorn.optional-dependencies.standard
++ aioprometheus.optional-dependencies.starlette
++ lib.optionals cudaSupport [
pynvml
cupy
];
pythonImportsCheck = [ "vllm" ];
meta = with lib; {
description = "A high-throughput and memory-efficient inference and serving engine for LLMs";
changelog = "https://github.com/vllm-project/vllm/releases/tag/v${version}";
homepage = "https://github.com/vllm-project/vllm";
license = licenses.asl20;
maintainers = with maintainers; [
happysalada
lach
];
broken = !cudaSupport && !rocmSupport;
};
}