Skip to content

Conversation

@oschuett
Copy link
Member

No description provided.

@oschuett oschuett marked this pull request as ready for review July 13, 2023 16:21
@mtaillefumier
Copy link
Contributor

That is very useful to get this up and running. One minor change should be made in the CMakeList.txt file. CUDA::cusolver needs to be added to

target_link_libraries(
      cp2k_cuda_libs INTERFACE CUDA::cufft CUDA::cufftw CUDA::cublas
                               CUDA::cudart CUDA::cuda_driver)

no further changes are needed so far in the cmake build system.

@mtaillefumier
Copy link
Contributor

mtaillefumier commented Jul 14, 2023

Correction a bit more work needs to be done for the cmake build system. This is a patch fixing it. CUSOLVER is off by default

From 38a55b11d735d5878ba1684b608d3272df6724a4 Mon Sep 17 00:00:00 2001
From: "Dr. Mathieu Taillefumier" <mathieu.taillefumier@free.fr>
Date: Fri, 14 Jul 2023 09:35:03 +0200
Subject: [PATCH] [cmake] include cusolver to the cmake build system

---
 CMakeLists.txt     |  5 +++++
 src/CMakeLists.txt | 11 +++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e44497cbd..017db4f23 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -106,6 +106,9 @@ option(CP2K_USE_SPLA
 option(CP2K_USE_METIS "enable metis library support" OFF)
 option(CP2K_USE_LIBXSMM "Use libxsmm for small gemms (supports x86 platforms)"
        OFF)
+option(CP2K_USE_CUSOLVER
+       "Use Nvidia gpu accelerated eigensolver. Only active when CUDA is ON"
+       OFF)
 option(CP2K_BUILD_DBCSR "Duild dbcsr at the same time than cp2k." OFF)
 option(BUILD_SHARED_LIBS "Build cp2k shared library" ON)

@@ -308,6 +311,8 @@ set(__cp2k_cmake_name "cmake_build_cpu")

 if(CP2K_USE_ACCEL MATCHES "CUDA")
   set(__cp2k_cmake_name "cmake_build_cuda")
+else()
+  set(CP2K_USE_CUSOLVER OFF)
 endif()

 if(CP2K_USE_ACCEL MATCHES "HIP")
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 47c056d07..924e95038 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -956,8 +956,6 @@ list(
   fm/cp_fm_struct.F
   fm/cp_linked_list_fm.F)

-list(APPEND CP2K_SRCS_C fm/cp_fm_cusolver.c)
-
 list(APPEND CP2K_SRCS_F hfxbase/hfx_compression_core_methods.F
      hfxbase/hfx_contract_block.F hfxbase/hfx_contraction_methods.F)

@@ -1324,6 +1322,10 @@ set(CP2K_GRID_SRCS_C
     grid/cpu/grid_cpu_task_list.c
     grid/grid_replay.c)

+if(CP2K_USE_CUSOLVER)
+  list(APPEND CP2K_SRCS_C fm/cp_fm_cusolver.c)
+endif()
+
 set(CP2K_FPGA_SRC_C pw/fpga/fft_fpga.c pw/fpga/opencl_utils.c)
 set(CP2K_PW_SRCS_C pw/gpu/pw_gpu_internal.c)
 set(CP2K_OFFLOAD_SRCS_C offload/offload_buffer.c offload/offload_library.c)
@@ -1419,8 +1421,8 @@ if(CP2K_USE_CUDA)
     target_link_libraries(cp2k_cuda_libs INTERFACE NVHPC::MATH NVHPC::CUDA)
   else()
     target_link_libraries(
-      cp2k_cuda_libs INTERFACE CUDA::cufft CUDA::cufftw CUDA::cublas
-                               CUDA::cudart CUDA::cuda_driver)
+      cp2k_cuda_libs INTERFACE CUDA::cusolver CUDA::cufft CUDA::cufftw
+                               CUDA::cublas CUDA::cudart CUDA::cuda_driver)
   endif()
 endif()

@@ -1581,6 +1583,7 @@ target_compile_definitions(
          $<$<BOOL:${CP2K_USE_LIBXSMM}>:__LIBXSMM>
          $<$<STREQUAL:${CP2K_BLAS_VENDOR},MKL>:__MKL>
          $<$<STREQUAL:${CP2K_BLAS_VENDOR},Apple>:__ACCELERATE>
+         $<$<BOOL:${CP2K_USE_CUSOLVER}>:__CUSOLVERMP>
          $<$<BOOL:${CP2K_USE_CUDA}>:__OFFLOAD_CUDA>
          $<$<COMPILE_LANGUAGE:CUDA>:__OFFLOAD_CUDA>
          $<$<BOOL:${CP2K_USE_HIP}>:__HIP_PLATFORM_AMD__
--
2.41.0

@dev-zero
Copy link
Contributor

cmake_dependent_option might be the right thing to use here, see https://cliutils.gitlab.io/modern-cmake/chapters/features/modules.html

@mtaillefumier
Copy link
Contributor

indeed cmake_dependent_option is better.

a revisited version of the patch

From d15f357e16753de703205023b9e6938ef2cc9345 Mon Sep 17 00:00:00 2001
From: "Dr. Mathieu Taillefumier" <mathieu.taillefumier@free.fr>
Date: Fri, 14 Jul 2023 09:35:03 +0200
Subject: [PATCH] [cmake] include cusolver to the cmake build system

---
 CMakeLists.txt     |  5 +++++
 src/CMakeLists.txt | 12 ++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e44497cbd..3f81c7b52 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -133,6 +133,11 @@ cmake_dependent_option(
   "CP2K_BUILD_DBCSR"
   OFF)

+cmake_dependent_option(
+  CP2K_USE_CUSOLVER_MP
+  "Use Nvidia gpu accelerated eigensolver. Only active when CUDA is ON" OFF
+  "CP2K_USE_ACCEL MATCHES \"CUDA\"" OFF)
+
 set(CP2K_BLAS_VENDOR
     "auto"
     CACHE STRING "Blas library for computations on host")
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 47c056d07..14f0b67e1 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -956,8 +956,6 @@ list(
   fm/cp_fm_struct.F
   fm/cp_linked_list_fm.F)

-list(APPEND CP2K_SRCS_C fm/cp_fm_cusolver.c)
-
 list(APPEND CP2K_SRCS_F hfxbase/hfx_compression_core_methods.F
      hfxbase/hfx_contract_block.F hfxbase/hfx_contraction_methods.F)

@@ -1324,6 +1322,10 @@ set(CP2K_GRID_SRCS_C
     grid/cpu/grid_cpu_task_list.c
     grid/grid_replay.c)

+if(CP2K_USE_CUSOLVER_MP)
+  list(APPEND CP2K_SRCS_C fm/cp_fm_cusolver.c)
+endif()
+
 set(CP2K_FPGA_SRC_C pw/fpga/fft_fpga.c pw/fpga/opencl_utils.c)
 set(CP2K_PW_SRCS_C pw/gpu/pw_gpu_internal.c)
 set(CP2K_OFFLOAD_SRCS_C offload/offload_buffer.c offload/offload_library.c)
@@ -1419,8 +1421,9 @@ if(CP2K_USE_CUDA)
     target_link_libraries(cp2k_cuda_libs INTERFACE NVHPC::MATH NVHPC::CUDA)
   else()
     target_link_libraries(
-      cp2k_cuda_libs INTERFACE CUDA::cufft CUDA::cufftw CUDA::cublas
-                               CUDA::cudart CUDA::cuda_driver)
+      cp2k_cuda_libs
+      INTERFACE $<$<BOOL:${CP2K_USE_CUSOLVER_MP}>:CUDA::cusolver> CUDA::cufft
+                CUDA::cufftw CUDA::cublas CUDA::cudart CUDA::cuda_driver)
   endif()
 endif()

@@ -1581,6 +1584,7 @@ target_compile_definitions(
          $<$<BOOL:${CP2K_USE_LIBXSMM}>:__LIBXSMM>
          $<$<STREQUAL:${CP2K_BLAS_VENDOR},MKL>:__MKL>
          $<$<STREQUAL:${CP2K_BLAS_VENDOR},Apple>:__ACCELERATE>
+         $<$<BOOL:${CP2K_USE_CUSOLVER_MP}>:__CUSOLVERMP>
          $<$<BOOL:${CP2K_USE_CUDA}>:__OFFLOAD_CUDA>
          $<$<COMPILE_LANGUAGE:CUDA>:__OFFLOAD_CUDA>
          $<$<BOOL:${CP2K_USE_HIP}>:__HIP_PLATFORM_AMD__
--
2.41.0

@oschuett
Copy link
Member Author

Thank a lot for the CMake integration! Did you have a chance to test it too?

I used version 0.4.1.0 and found that 99% of our regtests pass. Since the glue code is rather simple, I'm fairly confident that the remaining issues are with cuSOLVERMp itself.

@mtaillefumier
Copy link
Contributor

the cmake modifications are alright. Compiling cp2k with CUDA 11.6 fails however.

@mtaillefumier
Copy link
Contributor

the error is during the compilation. I suspect the version of cusolvermp we have installed locally is missing some of the latest changes. No need to worry in my view.

@mtaillefumier
Copy link
Contributor

side note : it is off by default

@oschuett
Copy link
Member Author

I suspect the version of cusolvermp we have installed locally is missing some of the latest changes.

Yes, that's very likely. The support for CUBLAS_FILL_MODE_UPPER and CUSOLVERMP_GRID_MAPPING_ROW_MAJOR was added only very recently and is AFAIK not yet part of NVHPC. Still, I think it's useful to already include it in the upcoming release to get some field testing.

@mtaillefumier
Copy link
Contributor

@oschuett : Do not worry about these compilation issues. I agree with you this should be merged and get some exposure. It will certainly help us with comparison between DLAF and cuSolver get this included.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants