diff --git a/CMakeLists.txt b/CMakeLists.txt index 92e580e28..2f5df770e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,14 @@ cmake_minimum_required(VERSION 3.20) +# Work around a CUDA/glibc declaration conflict (rsqrt/rsqrtf) seen with +# newer glibc versions while keeping standard libc feature declarations. +if(NOT CMAKE_CUDA_FLAGS_INIT MATCHES "(^| )-U_GNU_SOURCE( |$)") + string(APPEND CMAKE_CUDA_FLAGS_INIT " -U_GNU_SOURCE") +endif() +if(NOT CMAKE_CUDA_FLAGS_INIT MATCHES "(^| )-D_DEFAULT_SOURCE( |$)") + string(APPEND CMAKE_CUDA_FLAGS_INIT " -D_DEFAULT_SOURCE") +endif() + project(cuda-samples LANGUAGES C CXX CUDA) find_package(CUDAToolkit REQUIRED) @@ -13,7 +22,8 @@ set(CMAKE_CUDA_STANDARD 17) set(CMAKE_CUDA_STANDARD_REQUIRED ON) set(CMAKE_CUDA_ARCHITECTURES 75 80 86 87 89 90 100 110 120) -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets -U_GNU_SOURCE -D_DEFAULT_SOURCE") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -include ${CMAKE_SOURCE_DIR}/cmake/pthread_clock_compat.h") if(ENABLE_CUDA_DEBUG) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (may significantly affect performance on some targets) else() diff --git a/README.md b/README.md index 4686e11d5..7fbfd9e60 100644 --- a/README.md +++ b/README.md @@ -39,20 +39,41 @@ Ensure that CMake (version 3.20 or later) is installed. Install it using your pa e.g. ```sudo apt install cmake``` -Navigate to the root of the cloned repository and create a build directory: -``` -mkdir build && cd build -``` -Configure the project with CMake: -``` -cmake .. -``` -Build the samples: -``` -make -j$(nproc) +From the repository root, configure and build from a fresh build directory: +```bash +rm -rf build +cmake -S . -B build -G Ninja \ + -DCMAKE_C_COMPILER=/usr/bin/gcc-13 \ + -DCMAKE_CXX_COMPILER=/usr/bin/g++-13 \ + -DCMAKE_CUDA_HOST_COMPILER=/usr/bin/g++-13 +cmake --build build -j"$(nproc)" ``` Run the samples from their respective directories in the build folder. You can also follow this process from and subdirectory of the samples repo, or from within any individual sample. +#### Linux nvcc/glibc compatibility note + +On some Linux toolchain combinations, CUDA host compilation can fail with errors similar to: + +```text +identifier "pthread_cond_clockwait" is undefined +identifier "pthread_mutex_clocklock" is undefined +identifier "pthread_rwlock_clockwrlock" is undefined +identifier "pthread_rwlock_clockrdlock" is undefined +``` + +The root cause is a feature-macro interaction between nvcc, glibc headers, and libstdc++: + +- `-U_GNU_SOURCE` is needed in this project to avoid a separate CUDA/glibc conflict involving `rsqrt`/`rsqrtf` declarations. +- But clearing `_GNU_SOURCE` can hide pthread clock-lock APIs that newer libstdc++ headers expect. + +Workaround and solution used in this repository: + +- Keep the `-U_GNU_SOURCE -D_DEFAULT_SOURCE` nvcc flags required for the math declaration conflict. +- Add a compatibility header (`cmake/pthread_clock_compat.h`) that provides explicit declarations for the pthread clock-lock APIs used by libstdc++. +- Force-include that header for CUDA compilations from CMake, and apply the same flags in custom `nvcc` commands used by fatbin/PTX generation targets. + +If your system uses different compiler paths, replace `/usr/bin/gcc-13` and `/usr/bin/g++-13` with compatible host compiler paths available on your machine. + ### Windows Language services for CMake are available in Visual Studio 2019 version 16.5 or later, and you can directly import the CUDA samples repository from either the root level or from any diff --git a/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt b/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt index 735cbe621..80c59febe 100644 --- a/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt +++ b/Samples/0_Introduction/matrixMulDrv/CMakeLists.txt @@ -56,7 +56,7 @@ endif() add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} - COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} + COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets -U_GNU_SOURCE -D_DEFAULT_SOURCE ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} DEPENDS ${CUDA_KERNEL_SOURCE} COMMENT "Building CUDA fatbin: ${CUDA_FATBIN_FILE}" ) diff --git a/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt b/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt index a05e5b60f..566c9774c 100644 --- a/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt +++ b/Samples/0_Introduction/simpleDrvRuntime/CMakeLists.txt @@ -58,7 +58,7 @@ endif() add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} - COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} + COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets -U_GNU_SOURCE -D_DEFAULT_SOURCE ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} DEPENDS ${CUDA_KERNEL_SOURCE} COMMENT "Building CUDA fatbin: ${CUDA_FATBIN_FILE}" ) diff --git a/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt b/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt index e9e36b02f..64992c12f 100644 --- a/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt +++ b/Samples/0_Introduction/simpleTextureDrv/CMakeLists.txt @@ -55,7 +55,7 @@ endif() add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} - COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} + COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets -U_GNU_SOURCE -D_DEFAULT_SOURCE ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} DEPENDS ${CUDA_KERNEL_SOURCE} COMMENT "Building CUDA fatbin: ${CUDA_FATBIN_FILE}" ) diff --git a/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt b/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt index a08c02c92..640703d27 100644 --- a/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt +++ b/Samples/0_Introduction/vectorAddDrv/CMakeLists.txt @@ -55,7 +55,7 @@ endif() add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} - COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} + COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets -U_GNU_SOURCE -D_DEFAULT_SOURCE ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} DEPENDS ${CUDA_KERNEL_SOURCE} COMMENT "Building CUDA fatbin: ${CUDA_FATBIN_FILE}" ) diff --git a/Samples/0_Introduction/vectorAddMMAP/CMakeLists.txt b/Samples/0_Introduction/vectorAddMMAP/CMakeLists.txt index 8989d1c3c..fd85a0f9b 100644 --- a/Samples/0_Introduction/vectorAddMMAP/CMakeLists.txt +++ b/Samples/0_Introduction/vectorAddMMAP/CMakeLists.txt @@ -50,7 +50,7 @@ else() add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} - COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} + COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets -U_GNU_SOURCE -D_DEFAULT_SOURCE ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} DEPENDS ${CUDA_KERNEL_SOURCE} COMMENT "Building CUDA fatbin: ${CUDA_FATBIN_FILE}" ) diff --git a/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt b/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt index 81b626ff9..a02f9027b 100644 --- a/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt +++ b/Samples/2_Concepts_and_Techniques/threadMigration/CMakeLists.txt @@ -60,7 +60,7 @@ endif() add_custom_command( OUTPUT ${CUDA_FATBIN_FILE} - COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} + COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets -U_GNU_SOURCE -D_DEFAULT_SOURCE ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE} DEPENDS ${CUDA_KERNEL_SOURCE} COMMENT "Building CUDA fatbin: ${CUDA_FATBIN_FILE}" ) diff --git a/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt b/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt index 9856efbef..c430ba1fa 100644 --- a/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt +++ b/Samples/3_CUDA_Features/memMapIPCDrv/CMakeLists.txt @@ -58,7 +58,7 @@ endif() add_custom_command( OUTPUT ${CUDA_PTX_FILE} - COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets -o ${CUDA_PTX_FILE} -ptx ${CUDA_KERNEL_SOURCE} + COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets -U_GNU_SOURCE -D_DEFAULT_SOURCE -o ${CUDA_PTX_FILE} -ptx ${CUDA_KERNEL_SOURCE} DEPENDS ${CUDA_KERNEL_SOURCE} COMMENT "Building CUDA PTX: ${CUDA_PTX_FILE}" ) diff --git a/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt b/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt index 15ad74f5b..60a35fbc8 100644 --- a/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt +++ b/Samples/3_CUDA_Features/ptxjit/CMakeLists.txt @@ -51,7 +51,7 @@ endif() add_custom_command( OUTPUT ${CUDA_PTX_FILE} - COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets -o ${CUDA_PTX_FILE} -ptx ${CUDA_KERNEL_SOURCE} + COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} -Wno-deprecated-gpu-targets -U_GNU_SOURCE -D_DEFAULT_SOURCE -o ${CUDA_PTX_FILE} -ptx ${CUDA_KERNEL_SOURCE} DEPENDS ${CUDA_KERNEL_SOURCE} COMMENT "Building CUDA PTX: ${CUDA_PTX_FILE}" ) diff --git a/cmake/pthread_clock_compat.h b/cmake/pthread_clock_compat.h new file mode 100644 index 000000000..e06a82e98 --- /dev/null +++ b/cmake/pthread_clock_compat.h @@ -0,0 +1,32 @@ +#ifndef CUDA_SAMPLES_PTHREAD_CLOCK_COMPAT_H_ +#define CUDA_SAMPLES_PTHREAD_CLOCK_COMPAT_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int pthread_cond_clockwait(pthread_cond_t *__restrict cond, + pthread_mutex_t *__restrict mutex, + clockid_t clock_id, + const struct timespec *__restrict abstime); + +int pthread_mutex_clocklock(pthread_mutex_t *__restrict mutex, + clockid_t clock_id, + const struct timespec *__restrict abstime); + +int pthread_rwlock_clockrdlock(pthread_rwlock_t *__restrict rwlock, + clockid_t clock_id, + const struct timespec *__restrict abstime); + +int pthread_rwlock_clockwrlock(pthread_rwlock_t *__restrict rwlock, + clockid_t clock_id, + const struct timespec *__restrict abstime); + +#ifdef __cplusplus +} +#endif + +#endif