248 lines
6.6 KiB
Nix
248 lines
6.6 KiB
Nix
{
|
|
rocblas,
|
|
lib,
|
|
stdenv,
|
|
fetchFromGitHub,
|
|
rocmUpdateScript,
|
|
cmake,
|
|
rocm-cmake,
|
|
clr,
|
|
python3,
|
|
tensile,
|
|
msgpack,
|
|
libxml2,
|
|
gtest,
|
|
gfortran,
|
|
openmp,
|
|
amd-blis,
|
|
python3Packages,
|
|
buildTensile ? true,
|
|
buildTests ? false,
|
|
buildBenchmarks ? false,
|
|
tensileLogic ? "asm_full",
|
|
tensileCOVersion ? "default",
|
|
tensileSepArch ? true,
|
|
tensileLazyLib ? true,
|
|
tensileLibFormat ? "msgpack",
|
|
gpuTargets ? [ "all" ],
|
|
}:
|
|
|
|
let
|
|
# NOTE: Update the default GPU targets on every update
|
|
gfx80 =
|
|
(rocblas.override {
|
|
gpuTargets = [
|
|
"gfx803"
|
|
];
|
|
}).overrideAttrs
|
|
{ pname = "rocblas-tensile-gfx80"; };
|
|
|
|
gfx90 =
|
|
(rocblas.override {
|
|
gpuTargets = [
|
|
"gfx900"
|
|
"gfx906:xnack-"
|
|
"gfx908:xnack-"
|
|
"gfx90a:xnack+"
|
|
"gfx90a:xnack-"
|
|
];
|
|
}).overrideAttrs
|
|
{ pname = "rocblas-tensile-gfx90"; };
|
|
|
|
gfx94 =
|
|
(rocblas.override {
|
|
gpuTargets = [
|
|
"gfx940"
|
|
"gfx941"
|
|
"gfx942"
|
|
];
|
|
}).overrideAttrs
|
|
{ pname = "rocblas-tensile-gfx94"; };
|
|
|
|
gfx10 =
|
|
(rocblas.override {
|
|
gpuTargets = [
|
|
"gfx1010"
|
|
"gfx1012"
|
|
"gfx1030"
|
|
];
|
|
}).overrideAttrs
|
|
{ pname = "rocblas-tensile-gfx10"; };
|
|
|
|
gfx11 =
|
|
(rocblas.override {
|
|
gpuTargets = [
|
|
"gfx1100"
|
|
"gfx1101"
|
|
"gfx1102"
|
|
];
|
|
}).overrideAttrs
|
|
{ pname = "rocblas-tensile-gfx11"; };
|
|
|
|
# Unfortunately, we have to do two full builds, otherwise we get overlapping _fallback.dat files
|
|
fallbacks = rocblas.overrideAttrs { pname = "rocblas-tensile-fallbacks"; };
|
|
in
|
|
stdenv.mkDerivation (finalAttrs: {
|
|
pname = "rocblas";
|
|
version = "5.7.1";
|
|
|
|
outputs =
|
|
[
|
|
"out"
|
|
]
|
|
++ lib.optionals buildTests [
|
|
"test"
|
|
]
|
|
++ lib.optionals buildBenchmarks [
|
|
"benchmark"
|
|
];
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "ROCm";
|
|
repo = "rocBLAS";
|
|
rev = "rocm-${finalAttrs.version}";
|
|
hash = "sha256-3wKnwvAra8u9xqlC05wUD+gSoBILTVJFU2cIV6xv3Lk=";
|
|
};
|
|
|
|
nativeBuildInputs = [
|
|
cmake
|
|
rocm-cmake
|
|
clr
|
|
];
|
|
|
|
buildInputs =
|
|
[
|
|
python3
|
|
]
|
|
++ lib.optionals buildTensile [
|
|
msgpack
|
|
libxml2
|
|
python3Packages.msgpack
|
|
python3Packages.joblib
|
|
]
|
|
++ lib.optionals buildTests [
|
|
gtest
|
|
]
|
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
|
gfortran
|
|
openmp
|
|
amd-blis
|
|
]
|
|
++ lib.optionals (buildTensile || buildTests || buildBenchmarks) [
|
|
python3Packages.pyyaml
|
|
];
|
|
|
|
cmakeFlags =
|
|
[
|
|
"-DCMAKE_C_COMPILER=hipcc"
|
|
"-DCMAKE_CXX_COMPILER=hipcc"
|
|
"-Dpython=python3"
|
|
"-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
|
|
"-DBUILD_WITH_TENSILE=${if buildTensile then "ON" else "OFF"}"
|
|
# Manually define CMAKE_INSTALL_<DIR>
|
|
# See: https://github.com/NixOS/nixpkgs/pull/197838
|
|
"-DCMAKE_INSTALL_BINDIR=bin"
|
|
"-DCMAKE_INSTALL_LIBDIR=lib"
|
|
"-DCMAKE_INSTALL_INCLUDEDIR=include"
|
|
]
|
|
++ lib.optionals buildTensile [
|
|
"-DVIRTUALENV_HOME_DIR=/build/source/tensile"
|
|
"-DTensile_TEST_LOCAL_PATH=/build/source/tensile"
|
|
"-DTensile_ROOT=/build/source/tensile/${python3.sitePackages}/Tensile"
|
|
"-DTensile_LOGIC=${tensileLogic}"
|
|
"-DTensile_CODE_OBJECT_VERSION=${tensileCOVersion}"
|
|
"-DTensile_SEPARATE_ARCHITECTURES=${if tensileSepArch then "ON" else "OFF"}"
|
|
"-DTensile_LAZY_LIBRARY_LOADING=${if tensileLazyLib then "ON" else "OFF"}"
|
|
"-DTensile_LIBRARY_FORMAT=${tensileLibFormat}"
|
|
]
|
|
++ lib.optionals buildTests [
|
|
"-DBUILD_CLIENTS_TESTS=ON"
|
|
]
|
|
++ lib.optionals buildBenchmarks [
|
|
"-DBUILD_CLIENTS_BENCHMARKS=ON"
|
|
]
|
|
++ lib.optionals (buildTests || buildBenchmarks) [
|
|
"-DCMAKE_CXX_FLAGS=-I${amd-blis}/include/blis"
|
|
];
|
|
|
|
postPatch =
|
|
lib.optionalString (finalAttrs.pname != "rocblas") ''
|
|
# Return early and install tensile files manually
|
|
substituteInPlace library/src/CMakeLists.txt \
|
|
--replace "set_target_properties( TensileHost PROPERTIES OUTPUT_NAME" "return()''\nset_target_properties( TensileHost PROPERTIES OUTPUT_NAME"
|
|
''
|
|
+ lib.optionalString (buildTensile && finalAttrs.pname == "rocblas") ''
|
|
# Link the prebuilt Tensile files
|
|
mkdir -p build/Tensile/library
|
|
|
|
for path in ${gfx80} ${gfx90} ${gfx94} ${gfx10} ${gfx11} ${fallbacks}; do
|
|
ln -s $path/lib/rocblas/library/* build/Tensile/library
|
|
done
|
|
|
|
unlink build/Tensile/library/TensileManifest.txt
|
|
''
|
|
+ lib.optionalString buildTensile ''
|
|
# Tensile REALLY wants to write to the nix directory if we include it normally
|
|
cp -a ${tensile} tensile
|
|
chmod +w -R tensile
|
|
|
|
# Rewrap Tensile
|
|
substituteInPlace tensile/bin/{.t*,.T*,*} \
|
|
--replace "${tensile}" "/build/source/tensile"
|
|
|
|
substituteInPlace CMakeLists.txt \
|
|
--replace "include(virtualenv)" "" \
|
|
--replace "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" ""
|
|
'';
|
|
|
|
postInstall =
|
|
lib.optionalString (finalAttrs.pname == "rocblas") ''
|
|
ln -sf ${fallbacks}/lib/rocblas/library/TensileManifest.txt $out/lib/rocblas/library
|
|
''
|
|
+ lib.optionalString (finalAttrs.pname != "rocblas") ''
|
|
mkdir -p $out/lib/rocblas/library
|
|
rm -rf $out/share
|
|
''
|
|
+
|
|
lib.optionalString
|
|
(finalAttrs.pname != "rocblas" && finalAttrs.pname != "rocblas-tensile-fallbacks")
|
|
''
|
|
rm Tensile/library/{TensileManifest.txt,*_fallback.dat}
|
|
mv Tensile/library/* $out/lib/rocblas/library
|
|
''
|
|
+ lib.optionalString (finalAttrs.pname == "rocblas-tensile-fallbacks") ''
|
|
mv Tensile/library/{TensileManifest.txt,*_fallback.dat} $out/lib/rocblas/library
|
|
''
|
|
+ lib.optionalString buildTests ''
|
|
mkdir -p $test/bin
|
|
cp -a $out/bin/* $test/bin
|
|
rm $test/bin/*-bench || true
|
|
''
|
|
+ lib.optionalString buildBenchmarks ''
|
|
mkdir -p $benchmark/bin
|
|
cp -a $out/bin/* $benchmark/bin
|
|
rm $benchmark/bin/*-test || true
|
|
''
|
|
+ lib.optionalString (buildTests || buildBenchmarks) ''
|
|
rm -rf $out/bin
|
|
'';
|
|
|
|
passthru.updateScript = rocmUpdateScript {
|
|
name = finalAttrs.pname;
|
|
owner = finalAttrs.src.owner;
|
|
repo = finalAttrs.src.repo;
|
|
};
|
|
|
|
requiredSystemFeatures = [ "big-parallel" ];
|
|
|
|
meta = with lib; {
|
|
description = "BLAS implementation for ROCm platform";
|
|
homepage = "https://github.com/ROCm/rocBLAS";
|
|
license = with licenses; [ mit ];
|
|
maintainers = teams.rocm.members;
|
|
platforms = platforms.linux;
|
|
broken =
|
|
versions.minor finalAttrs.version != versions.minor stdenv.cc.version
|
|
|| versionAtLeast finalAttrs.version "6.0.0";
|
|
};
|
|
})
|