depot/third_party/nixpkgs/pkgs/development/libraries/rocfft/split-kernel-compilation.patch

125 lines
4.7 KiB
Diff
Raw Normal View History

diff --git a/library/src/CMakeLists.txt b/library/src/CMakeLists.txt
index 3a16304..606b711 100644
--- a/library/src/CMakeLists.txt
+++ b/library/src/CMakeLists.txt
@@ -250,12 +250,12 @@ foreach( target
endforeach()
-add_executable( rocfft_aot_helper
+add_executable( rocfft_aot_helper EXCLUDE_FROM_ALL
rocfft_aot_helper.cpp
rocfft_stub.cpp
)
-add_executable( rocfft_config_search
+add_executable( rocfft_config_search EXCLUDE_FROM_ALL
rocfft_config_search.cpp
rocfft_stub.cpp
)
@@ -279,10 +279,10 @@ endif()
target_link_libraries( rocfft PRIVATE ${ROCFFT_DEVICE_LINK_LIBS} )
-target_link_libraries( rocfft PRIVATE rocfft-device-0 )
-target_link_libraries( rocfft PRIVATE rocfft-device-1 )
-target_link_libraries( rocfft PRIVATE rocfft-device-2 )
-target_link_libraries( rocfft PRIVATE rocfft-device-3 )
+foreach( sub ${AMDGPU_TARGETS} )
+ target_link_libraries( rocfft PRIVATE -lrocfft-device-${sub} )
+endforeach()
+
foreach( target rocfft rocfft_aot_helper rocfft_config_search )
# RTC uses dladdr to find the RTC helper program
if( NOT WIN32 )
@@ -347,7 +347,7 @@ add_custom_command(
DEPENDS rocfft_aot_helper rocfft_rtc_helper
COMMENT "Compile kernels into shipped cache file"
)
-add_custom_target( rocfft_kernel_cache_target ALL
+add_custom_target( rocfft_kernel_cache_target
DEPENDS rocfft_kernel_cache.db
VERBATIM
)
@@ -392,7 +392,8 @@ else()
endif()
rocm_install(FILES ${ROCFFT_KERNEL_CACHE_PATH}
DESTINATION "${ROCFFT_KERNEL_CACHE_INSTALL_DIR}"
- COMPONENT runtime
+ COMPONENT kernel_cache
+ EXCLUDE_FROM_ALL
)
# PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
diff --git a/library/src/device/CMakeLists.txt b/library/src/device/CMakeLists.txt
index 9f7b85f..73a8ec9 100644
--- a/library/src/device/CMakeLists.txt
+++ b/library/src/device/CMakeLists.txt
@@ -170,11 +170,11 @@ list( SORT rocfft_device_source )
# functions callable by rocFFT and depends on amdhip64, and another
# one usable by AOT RTC that contains no device code
list( FILTER rocfft_device_source EXCLUDE REGEX function_pool.cpp )
-add_library( rocfft-function-pool OBJECT
+add_library( rocfft-function-pool OBJECT EXCLUDE_FROM_ALL
function_pool.cpp
)
target_compile_definitions( rocfft-function-pool PRIVATE FUNCTION_POOL_STANDALONE_BODY= )
-add_library( rocfft-function-pool-standalone OBJECT
+add_library( rocfft-function-pool-standalone OBJECT EXCLUDE_FROM_ALL
function_pool.cpp
)
target_compile_definitions( rocfft-function-pool-standalone PRIVATE FUNCTION_POOL_STANDALONE_BODY={} )
@@ -193,26 +193,15 @@ foreach( pool rocfft-function-pool rocfft-function-pool-standalone )
add_dependencies(${pool} gen_headers_target)
endforeach()
-list( LENGTH rocfft_device_source rocfft_device_source_len )
-math(EXPR split_len "${rocfft_device_source_len} / 4")
-math(EXPR split_idx_2 "${rocfft_device_source_len} / 4 * 2")
-math(EXPR split_idx_3 "${rocfft_device_source_len} / 4 * 3")
-
-list( SUBLIST rocfft_device_source 0 ${split_len} rocfft_device_source_0 )
-list( SUBLIST rocfft_device_source ${split_len} ${split_len} rocfft_device_source_1 )
-list( SUBLIST rocfft_device_source ${split_idx_2} ${split_len} rocfft_device_source_2 )
-list( SUBLIST rocfft_device_source ${split_idx_3} -1 rocfft_device_source_3 )
-
-foreach( sub RANGE 3 )
- set( rocfft_device_source_var rocfft_device_source_${sub} )
+foreach( sub ${AMDGPU_TARGETS} )
if(NOT SINGLELIB)
- add_library( rocfft-device-${sub}
- ${${rocfft_device_source_var}} )
+ add_library( rocfft-device-${sub} EXCLUDE_FROM_ALL
+ ${rocfft_device_source} )
else()
# Compile the device lib as a static library, which is then linked
# into librocfft.so Useful for testing purposes.
- add_library( rocfft-device-${sub} STATIC
- ${${rocfft_device_source_var}} )
+ add_library( rocfft-device-${sub} STATIC EXCLUDE_FROM_ALL
+ ${rocfft_device_source} )
# if we're building singlelib, we don't want to export any of the
# device library symbols to the main library
@@ -241,9 +230,7 @@ foreach( sub RANGE 3 )
# Set AMD GPU architecture options
# Enable compilation of desired architectures
- foreach( target ${AMDGPU_TARGETS} )
- target_compile_options( rocfft-device-${sub} PRIVATE --offload-arch=${target} )
- endforeach( )
+ target_compile_options( rocfft-device-${sub} PRIVATE --offload-arch=${sub} )
target_include_directories( rocfft-device-${sub}
PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
@@ -268,9 +255,4 @@ foreach( sub RANGE 3 )
if( NOT BUILD_SHARED_LIBS )
set_target_properties( rocfft-device-${sub} PROPERTIES PREFIX "lib" )
endif( )
-
- rocm_install_targets(
- TARGETS
- rocfft-device-${sub}
- )
endforeach()