depot/third_party/nixpkgs/pkgs/os-specific/linux/dcgm/default.nix

{ lib
, callPackage
, gcc11Stdenv
, fetchFromGitHub
, addOpenGLRunpath
, catch2
, cmake
, cudaPackages_10_2
, cudaPackages_11_8
, cudaPackages_12
, fmt_9
, git
, jsoncpp
, libevent
, plog
, python3
, symlinkJoin
, tclap_1_4
, yaml-cpp
}:
let
  # Flags copied from DCGM's libevent build script
  libevent-nossl = libevent.override { sslSupport = false; };
  libevent-nossl-static = libevent-nossl.overrideAttrs (super: {
    CFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC";
    CXXFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC";
    configureFlags = super.configureFlags ++ [ "--disable-shared" "--with-pic" ];
  });

  jsoncpp-static = jsoncpp.override { enableStatic = true; };

  # DCGM depends on 3 different versions of CUDA at the same time.
  # The runtime closure, thankfully, is quite small because most things
  # are statically linked.
  cudaPackageSetByVersion = [
    {
      version = "10";
      # Nixpkgs cudaPackages_10 doesn't have redist packages broken out.
      pkgSet = [
        cudaPackages_10_2.cudatoolkit
        cudaPackages_10_2.cudatoolkit.lib
      ];
    }
    {
      version = "11";
      pkgSet = getCudaPackages cudaPackages_11_8;
    }
    {
      version = "12";
      pkgSet = getCudaPackages cudaPackages_12;
    }
  ];

  # Select needed redist packages from cudaPackages
  # C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/scripts/0080_cuda.sh#L24-L39
  getCudaPackages = p: with p; [
    cuda_cccl
    cuda_cudart
    cuda_nvcc
    cuda_nvml_dev
    libcublas
    libcufft
    libcurand
  ];

  # Builds CMake code to add CUDA paths for include and lib.
  mkAppendCudaPaths = { version, pkgSet }:
    let
      # The DCGM CMake assumes that the folder containing cuda.h contains all headers, so we must
      # combine everything together for headers to work.
      # It would be more convenient to use symlinkJoin on *just* the include subdirectories
      # of each package, but not all of them have an include directory and making that work
      # is more effort than it's worth for this temporary, build-time package.
      combined = symlinkJoin {
        name = "cuda-combined-${version}";
        paths = pkgSet;
      };
      # The combined package above breaks the build for some reason so we just configure
      # each package's library path.
      libs = lib.concatMapStringsSep " " (x: ''"${x}/lib"'') pkgSet;
    in ''
      list(APPEND Cuda${version}_INCLUDE_PATHS "${combined}/include")
      list(APPEND Cuda${version}_LIB_PATHS ${libs})
    '';

# gcc11 is required by DCGM's very particular build system
# C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/build.sh#L22
in gcc11Stdenv.mkDerivation rec {
  pname = "dcgm";
  version = "3.1.8";

  src = fetchFromGitHub {
    owner = "NVIDIA";
    repo = "DCGM";
    rev = "refs/tags/v${version}";
    hash = "sha256-OXqXkP2ZUNPzafGIgJ0MKa39xB84keVFFYl+JsHgnks=";
  };

  # Add our paths to the CUDA paths so FindCuda.cmake can find them.
  EXTRA_CUDA_PATHS = lib.concatMapStringsSep "\n" mkAppendCudaPaths cudaPackageSetByVersion;
  prePatch = ''
    echo "$EXTRA_CUDA_PATHS"$'\n'"$(cat cmake/FindCuda.cmake)" > cmake/FindCuda.cmake
  '';

  hardeningDisable = [ "all" ];

  nativeBuildInputs = [
    addOpenGLRunpath
    cmake
    git
    python3

    jsoncpp-static
    jsoncpp-static.dev
    libevent-nossl-static
    libevent-nossl-static.dev
    plog.dev # header-only
    tclap_1_4 # header-only
  ];

  buildInputs = [
    catch2
    fmt_9
    yaml-cpp
  ];

  # libcuda.so must be found at runtime because it is supplied by the NVIDIA
  # driver. autoAddOpenGLRunpathHook breaks on the statically linked exes.
  postFixup = ''
    find "$out/bin" "$out/lib" -type f -executable -print0 | while IFS= read -r -d "" f; do
      if isELF "$f" && [[ $(patchelf --print-needed "$f" || true) == *libcuda.so* ]]; then
        addOpenGLRunpath "$f"
      fi
    done
  '';

  disallowedReferences = lib.concatMap (x: x.pkgSet) cudaPackageSetByVersion;

  meta = with lib; {
    description = "Data Center GPU Manager (DCGM) is a daemon that allows users to monitor NVIDIA data-center GPUs.";
    homepage = "https://developer.nvidia.com/dcgm";
    license = licenses.asl20;
    maintainers = teams.deshaw.members;
    mainProgram = "dcgmi";
    platforms = platforms.linux;
  };
}
Project import generated by Copybara. GitOrigin-RevId: 18036c0be90f4e308ae3ebcab0e14aae0336fe42 2023-08-04 22:07:22 +00:00			`{ lib`
			`, callPackage`
			`, gcc11Stdenv`
			`, fetchFromGitHub`
			`, addOpenGLRunpath`
			`, catch2`
			`, cmake`
			`, cudaPackages_10_2`
			`, cudaPackages_11_8`
			`, cudaPackages_12`
			`, fmt_9`
			`, git`
			`, jsoncpp`
			`, libevent`
			`, plog`
			`, python3`
			`, symlinkJoin`
			`, tclap_1_4`
			`, yaml-cpp`
			`}:`
			`let`
			`# Flags copied from DCGM's libevent build script`
			`libevent-nossl = libevent.override { sslSupport = false; };`
			`libevent-nossl-static = libevent-nossl.overrideAttrs (super: {`
			`CFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC";`
			`CXXFLAGS = "-Wno-cast-function-type -Wno-implicit-fallthrough -fPIC";`
			`configureFlags = super.configureFlags ++ [ "--disable-shared" "--with-pic" ];`
			`});`

			`jsoncpp-static = jsoncpp.override { enableStatic = true; };`

			`# DCGM depends on 3 different versions of CUDA at the same time.`
			`# The runtime closure, thankfully, is quite small because most things`
			`# are statically linked.`
			`cudaPackageSetByVersion = [`
			`{`
			`version = "10";`
			`# Nixpkgs cudaPackages_10 doesn't have redist packages broken out.`
			`pkgSet = [`
			`cudaPackages_10_2.cudatoolkit`
			`cudaPackages_10_2.cudatoolkit.lib`
			`];`
			`}`
			`{`
			`version = "11";`
			`pkgSet = getCudaPackages cudaPackages_11_8;`
			`}`
			`{`
			`version = "12";`
			`pkgSet = getCudaPackages cudaPackages_12;`
			`}`
			`];`

			`# Select needed redist packages from cudaPackages`
			`# C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/scripts/0080_cuda.sh#L24-L39`
			`getCudaPackages = p: with p; [`
			`cuda_cccl`
			`cuda_cudart`
			`cuda_nvcc`
			`cuda_nvml_dev`
			`libcublas`
			`libcufft`
			`libcurand`
			`];`

			`# Builds CMake code to add CUDA paths for include and lib.`
			`mkAppendCudaPaths = { version, pkgSet }:`
			`let`
			`# The DCGM CMake assumes that the folder containing cuda.h contains all headers, so we must`
			`# combine everything together for headers to work.`
			`# It would be more convenient to use symlinkJoin on just the include subdirectories`
			`# of each package, but not all of them have an include directory and making that work`
			`# is more effort than it's worth for this temporary, build-time package.`
			`combined = symlinkJoin {`
			`name = "cuda-combined-${version}";`
			`paths = pkgSet;`
			`};`
			`# The combined package above breaks the build for some reason so we just configure`
			`# each package's library path.`
			`libs = lib.concatMapStringsSep " " (x: ''"${x}/lib"'') pkgSet;`
			`in ''`
			`list(APPEND Cuda${version}_INCLUDE_PATHS "${combined}/include")`
			`list(APPEND Cuda${version}_LIB_PATHS ${libs})`
			`'';`

			`# gcc11 is required by DCGM's very particular build system`
			`# C.f. https://github.com/NVIDIA/DCGM/blob/7e1012302679e4bb7496483b32dcffb56e528c92/dcgmbuild/build.sh#L22`
			`in gcc11Stdenv.mkDerivation rec {`
			`pname = "dcgm";`
			`version = "3.1.8";`

			`src = fetchFromGitHub {`
			`owner = "NVIDIA";`
			`repo = "DCGM";`
			`rev = "refs/tags/v${version}";`
			`hash = "sha256-OXqXkP2ZUNPzafGIgJ0MKa39xB84keVFFYl+JsHgnks=";`
			`};`

			`# Add our paths to the CUDA paths so FindCuda.cmake can find them.`
			`EXTRA_CUDA_PATHS = lib.concatMapStringsSep "\n" mkAppendCudaPaths cudaPackageSetByVersion;`
			`prePatch = ''`
			`echo "$EXTRA_CUDA_PATHS"$'\n'"$(cat cmake/FindCuda.cmake)" > cmake/FindCuda.cmake`
			`'';`

			`hardeningDisable = [ "all" ];`

			`nativeBuildInputs = [`
			`addOpenGLRunpath`
			`cmake`
			`git`
			`python3`

			`jsoncpp-static`
			`jsoncpp-static.dev`
			`libevent-nossl-static`
			`libevent-nossl-static.dev`
			`plog.dev # header-only`
			`tclap_1_4 # header-only`
			`];`

			`buildInputs = [`
			`catch2`
			`fmt_9`
			`yaml-cpp`
			`];`

			`# libcuda.so must be found at runtime because it is supplied by the NVIDIA`
			`# driver. autoAddOpenGLRunpathHook breaks on the statically linked exes.`
			`postFixup = ''`
			`find "$out/bin" "$out/lib" -type f -executable -print0 \| while IFS= read -r -d "" f; do`
			`if isELF "$f" && [[ $(patchelf --print-needed "$f" \|\| true) == libcuda.so ]]; then`
			`addOpenGLRunpath "$f"`
			`fi`
			`done`
			`'';`

			`disallowedReferences = lib.concatMap (x: x.pkgSet) cudaPackageSetByVersion;`

			`meta = with lib; {`
			`description = "Data Center GPU Manager (DCGM) is a daemon that allows users to monitor NVIDIA data-center GPUs.";`
			`homepage = "https://developer.nvidia.com/dcgm";`
			`license = licenses.asl20;`
			`maintainers = teams.deshaw.members;`
			`mainProgram = "dcgmi";`
			`platforms = platforms.linux;`
			`};`
			`}`