{ lib , stdenv , fetchFromGitHub , fetchpatch , rocmUpdateScript , runCommand , cmake , rocm-cmake , clr , python3 , tensile , msgpack , libxml2 , gtest , gfortran , openmp , amd-blis , python3Packages , buildTensile ? true , buildTests ? false , buildBenchmarks ? false , tensileLogic ? "asm_full" , tensileCOVersion ? "default" # https://github.com/ROCm/Tensile/issues/1757 # Allows gfx101* users to use rocBLAS normally. # Turn the below two values to `true` after the fix has been cherry-picked # into a release. Just backporting that single fix is not enough because it # depends on some previous commits. , tensileSepArch ? false , tensileLazyLib ? false , tensileLibFormat ? "msgpack" # `gfx940`, `gfx941` are not present in this list because they are early # engineering samples, and all final MI300 hardware are `gfx942`: # https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130 # # `gfx1012` is not present in this list because the ISA compatibility patches # would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will # always try to use `gfx1010` code objects, hence building for `gfx1012` is # useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152 , gpuTargets ? [ "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" ] }: stdenv.mkDerivation (finalAttrs: { pname = "rocblas"; version = "6.0.2"; outputs = [ "out" ] ++ lib.optionals buildTests [ "test" ] ++ lib.optionals buildBenchmarks [ "benchmark" ]; src = fetchFromGitHub { owner = "ROCm"; repo = "rocBLAS"; rev = "rocm-${finalAttrs.version}"; hash = "sha256-G68d/gvBbTdNx8xR3xY+OkBm5Yxq1NFjxby9BbpOcUk="; }; nativeBuildInputs = [ cmake rocm-cmake clr ] ++ lib.optionals buildTensile [ tensile ]; buildInputs = [ python3 ] ++ lib.optionals buildTensile [ msgpack libxml2 python3Packages.msgpack python3Packages.joblib ] ++ lib.optionals buildTests [ gtest ] ++ lib.optionals (buildTests || buildBenchmarks) [ gfortran openmp amd-blis ] ++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [ python3Packages.pyyaml ]; cmakeFlags = [ (lib.cmakeFeature "CMAKE_C_COMPILER" "hipcc") (lib.cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") (lib.cmakeFeature "python" "python3") (lib.cmakeFeature "AMDGPU_TARGETS" (lib.concatStringsSep ";" gpuTargets)) (lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile) (lib.cmakeBool "ROCM_SYMLINK_LIBS" false) (lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas") (lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests) (lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks) # rocblas header files are not installed unless we set this (lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "include") ] ++ lib.optionals buildTensile [ (lib.cmakeBool "BUILD_WITH_PIP" false) (lib.cmakeFeature "Tensile_LOGIC" tensileLogic) (lib.cmakeFeature "Tensile_CODE_OBJECT_VERSION" tensileCOVersion) (lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch) (lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib) (lib.cmakeFeature "Tensile_LIBRARY_FORMAT" tensileLibFormat) (lib.cmakeBool "Tensile_PRINT_DEBUG" true) ] ++ lib.optionals (buildTests || buildBenchmarks) [ (lib.cmakeFeature "CMAKE_CXX_FLAGS" "-I${amd-blis}/include/blis") ]; patches = [ (fetchpatch { name = "Extend-rocBLAS-HIP-ISA-compatibility.patch"; url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch"; hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo="; }) ]; # Pass $NIX_BUILD_CORES to Tensile postPatch = '' substituteInPlace cmake/build-options.cmake \ --replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"' ''; passthru.updateScript = rocmUpdateScript { name = finalAttrs.pname; owner = finalAttrs.src.owner; repo = finalAttrs.src.repo; }; requiredSystemFeatures = [ "big-parallel" ]; meta = with lib; { description = "BLAS implementation for ROCm platform"; homepage = "https://github.com/ROCm/rocBLAS"; license = with licenses; [ mit ]; maintainers = teams.rocm.members; platforms = platforms.linux; broken = versions.minor finalAttrs.version != versions.minor stdenv.cc.version || versionAtLeast finalAttrs.version "7.0.0"; }; })