{ rocblas , lib , stdenv , fetchFromGitHub , rocmUpdateScript , cmake , rocm-cmake , clr , python3 , tensile , msgpack , libxml2 , gtest , gfortran , openmp , amd-blis , python3Packages , buildTensile ? true , buildTests ? false , buildBenchmarks ? false , tensileLogic ? "asm_full" , tensileCOVersion ? "default" , tensileSepArch ? true , tensileLazyLib ? true , tensileLibFormat ? "msgpack" , gpuTargets ? [ "all" ] }: let # NOTE: Update the default GPU targets on every update gfx80 = (rocblas.override { gpuTargets = [ "gfx803" ]; }).overrideAttrs { pname = "rocblas-tensile-gfx80"; }; gfx90 = (rocblas.override { gpuTargets = [ "gfx900" "gfx906:xnack-" "gfx908:xnack-" "gfx90a:xnack+" "gfx90a:xnack-" ]; }).overrideAttrs { pname = "rocblas-tensile-gfx90"; }; gfx94 = (rocblas.override { gpuTargets = [ "gfx940" "gfx941" "gfx942" ]; }).overrideAttrs { pname = "rocblas-tensile-gfx94"; }; gfx10 = (rocblas.override { gpuTargets = [ "gfx1010" "gfx1012" "gfx1030" ]; }).overrideAttrs { pname = "rocblas-tensile-gfx10"; }; gfx11 = (rocblas.override { gpuTargets = [ "gfx1100" "gfx1101" "gfx1102" ]; }).overrideAttrs { pname = "rocblas-tensile-gfx11"; }; # Unfortunately, we have to do two full builds, otherwise we get overlapping _fallback.dat files fallbacks = rocblas.overrideAttrs { pname = "rocblas-tensile-fallbacks"; }; in stdenv.mkDerivation (finalAttrs: { pname = "rocblas"; version = "5.7.1"; outputs = [ "out" ] ++ lib.optionals buildTests [ "test" ] ++ lib.optionals buildBenchmarks [ "benchmark" ]; src = fetchFromGitHub { owner = "ROCm"; repo = "rocBLAS"; rev = "rocm-${finalAttrs.version}"; hash = "sha256-3wKnwvAra8u9xqlC05wUD+gSoBILTVJFU2cIV6xv3Lk="; }; nativeBuildInputs = [ cmake rocm-cmake clr ]; buildInputs = [ python3 ] ++ lib.optionals buildTensile [ msgpack libxml2 python3Packages.msgpack python3Packages.joblib ] ++ lib.optionals buildTests [ gtest ] ++ lib.optionals (buildTests || buildBenchmarks) [ gfortran openmp amd-blis ] ++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [ python3Packages.pyyaml ]; cmakeFlags = [ "-DCMAKE_C_COMPILER=hipcc" "-DCMAKE_CXX_COMPILER=hipcc" "-Dpython=python3" "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" "-DBUILD_WITH_TENSILE=${if buildTensile then "ON" else "OFF"}" # Manually define CMAKE_INSTALL_