## CMake settings # V3.9 required for first-class CUDA support # V3.17 required for the FindCUDAToolkit package # V3.18 required for CMAKE_CUDA_ARCHITECTURES cmake_minimum_required(VERSION 3.19) ## Options option(DOUBLE_PRECISION "Generates double precision code." ON) option(USE_HIP "Use HIP as the underlying GPGPU library instead of CUDA" ON) option(USE_OMP "Enables use of openmp" ON) option(USE_DARDEL "Target machine is Dardel(PDC, Stockholm" OFF) option(PACKED_DATA_TRANSFERS "Enables kernel for packed data transfers" OFF) option(SINGLEPASS_INTEGRATION "Enables single pass integration" OFF) if (USE_HIP) find_program(CMAKE_HIP_COMPILER hipcc REQUIRED) set(CMAKE_C_COMPILER ${CMAKE_HIP_COMPILER}) set(CMAKE_CXX_COMPILER ${CMAKE_HIP_COMPILER}) endif() ## Project settings project(astaroth C CXX) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) ## Project-wide compilation flags # -Werror TODO readd #set(COMMON_FLAGS "-DOMPI_SKIP_MPICXX -Wall -Wextra -Wdouble-promotion -Wfloat-conversion -Wshadow -Wno-unused-result") #-DOMPI_SKIP_MPICXX is to force OpenMPI to use the C interface set(COMMON_FLAGS "-DOMPI_SKIP_MPICXX -Wall -Wno-unused-variable -Wno-unused-parameter -Wno-unused-function -Wno-#pragma-messages -Wno-extra-tokens -Wdouble-promotion -Wfloat-conversion -Wshadow -Wno-unused-result") #-DOMPI_SKIP_MPICXX is to force OpenMPI to use the C interface set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMMON_FLAGS}") set(CMAKE_C_STANDARD 11) set(CMAKE_CXX_STANDARD 17) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # For IDE highlighting (clangd) ## Build type if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release") # Default endif() message(STATUS "Build type: " ${CMAKE_BUILD_TYPE}) ## CUDA/HIP if (USE_HIP) add_definitions(-DAC_USE_HIP=1) set(GPU_TARGETS "gfx908" CACHE STRING "GPU target") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --offload-arch=gfx908 --offload-arch=gfx90a") # Workaround for finding HIP on Triton list(APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm) # Rocm #include_directories(/opt/rocm/include) #link_directories(/opt/rocm/lib) # Roctracer include_directories(/opt/rocm/roctracer/include) link_directories(/opt/rocm/roctracer/lib) # Rocrand include_directories(/opt/rocm/hiprand/include) include_directories(/opt/rocm/rocrand/include) link_directories(/opt/rocm/hiprand/lib) link_directories(/opt/rocm/rocrand/lib) find_package(hip) else () enable_language(CUDA) # Commented out CUDA host compiler flags: seem to be ignored anyways #string (REPLACE " " "," CUDA_COMMON_FLAGS "${COMMON_FLAGS}") #set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -forward-unknown-to-host-compiler -Wall -Wextra -Wdouble-promotion -Wfloat-conversion -Wshadow --compiler-options=${CUDA_COMMON_FLAGS}") # To inspect ptx, run # cmake .. && make -j && nvcc -DAC_DOUBLE_PRECISION=1 --generate-code=arch=compute_60,code=[compute_60,sm_60] -c ../src/core/kernels/kernels.cu -I acc-runtime/api/ -I ../include/ -I ../acc-runtime/api/ -I ../acc-runtime -ptx -Xptxas=-Werror,--warn-on-double-precision-use # less kernels.ptx # Additional compilation info: --resource-usage # Not supported with older CUDA # set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --Werror all-warnings") if (NOT DOUBLE_PRECISION) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xptxas=--warn-on-double-precision-use") # -Werror, endif() #set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xptxas -dlcm=ca") # Cache global loads in L1 #set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xptxas -dlcm=cg") # Cache global loads in L2 (default on >= NVIDIA Fermi) # Set device code architecture if (NOT CUDA_ARCHITECTURES) set(CMAKE_CUDA_ARCHITECTURES 60 61 70 80) # Default else () set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCHITECTURES}) # User-specified endif() endif() #if (USE_OMP) #find_package(OpenMP) #if (OPENMP_FOUND) #set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") #set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") #set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") #endif() #endif() include(FetchContent) ## Global flags if (DOUBLE_PRECISION) add_definitions(-DAC_DOUBLE_PRECISION=1) add_definitions(-DDOUBLE_PRECISION) endif() if (SINGLEPASS_INTEGRATION) add_definitions(-DSINGLEPASS=1) endif() if (USE_DARDEL) set(MPI_INCLUDE /opt/cray/pe/mpich/8.1.25/ofi/cray/10.0/include) set(MPI_LIB /opt/cray/pe/mpich/8.1.25/ofi/cray/10.0/lib) include_directories(${MPI_INCLUDE}) link_directories(${MPI_LIB}) else() find_package(MPI REQUIRED COMPONENTS C) include_directories(${MPI_C_INCLUDE_DIRS}) endif() add_definitions(-DAC_MPI_ENABLED=1) add_definitions(-DAC_DEFAULT_CONFIG="${CMAKE_SOURCE_DIR}/config/astaroth.conf") ## Includes include_directories(./submodule/include) # Library headers include_directories(./submodule/build) include_directories(./submodule/build/acc-runtime/api) include_directories(./submodule/acc-runtime/api) include_directories(./..) #include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) # CUDA headers include_directories(./submodule/build/src/core) include_directories(./submodule/build/src/core/kernels) include_directories(./submodule/build/src/utils) include_directories(./submodule/src/core/kernels) include_directories(./submodule/src/core) link_directories(./submodule/build/src/core) link_directories(./submodule/build/src/core/kernels) link_directories(./submodule/build/src/utils) link_directories(./submodule/build/acc-runtime/api) add_compile_options(-DMODPRE=${MODULE_PREFIX} -DMODIN=${MODULE_INFIX} -DMODSUF=${MODULE_SUFFIX}) if (DOUBLE_PRECISION) add_definitions(-DAC_DOUBLE_PRECISION=1) set(PREC "dbl") else() add_definitions(-DAC_DOUBLE_PRECISION=0) set(PREC "sgl") endif() if (PACKED_DATA_TRANSFERS) add_compile_options(-DPACKED_DATA_TRANSFERS=1) add_library(astaroth_${PREC} SHARED gpu_astaroth.cc loadStore.cc) else() add_library(astaroth_${PREC} SHARED gpu_astaroth.cc) endif() target_link_libraries(astaroth_${PREC} astaroth_core kernels)