fm: Add experimental support for cuSOLVERMp #2860

oschuett · 2023-07-13T16:12:53Z

No description provided.

mtaillefumier · 2023-07-14T06:48:39Z

That is very useful to get this up and running. One minor change should be made in the CMakeList.txt file. CUDA::cusolver needs to be added to

target_link_libraries(
      cp2k_cuda_libs INTERFACE CUDA::cufft CUDA::cufftw CUDA::cublas
                               CUDA::cudart CUDA::cuda_driver)

no further changes are needed so far in the cmake build system.

mtaillefumier · 2023-07-14T07:46:11Z

Correction a bit more work needs to be done for the cmake build system. This is a patch fixing it. CUSOLVER is off by default

From 38a55b11d735d5878ba1684b608d3272df6724a4 Mon Sep 17 00:00:00 2001
From: "Dr. Mathieu Taillefumier" <mathieu.taillefumier@free.fr>
Date: Fri, 14 Jul 2023 09:35:03 +0200
Subject: [PATCH] [cmake] include cusolver to the cmake build system

---
 CMakeLists.txt     |  5 +++++
 src/CMakeLists.txt | 11 +++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e44497cbd..017db4f23 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -106,6 +106,9 @@ option(CP2K_USE_SPLA
 option(CP2K_USE_METIS "enable metis library support" OFF)
 option(CP2K_USE_LIBXSMM "Use libxsmm for small gemms (supports x86 platforms)"
        OFF)
+option(CP2K_USE_CUSOLVER
+       "Use Nvidia gpu accelerated eigensolver. Only active when CUDA is ON"
+       OFF)
 option(CP2K_BUILD_DBCSR "Duild dbcsr at the same time than cp2k." OFF)
 option(BUILD_SHARED_LIBS "Build cp2k shared library" ON)

@@ -308,6 +311,8 @@ set(__cp2k_cmake_name "cmake_build_cpu")

 if(CP2K_USE_ACCEL MATCHES "CUDA")
   set(__cp2k_cmake_name "cmake_build_cuda")
+else()
+  set(CP2K_USE_CUSOLVER OFF)
 endif()

 if(CP2K_USE_ACCEL MATCHES "HIP")
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 47c056d07..924e95038 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -956,8 +956,6 @@ list(
   fm/cp_fm_struct.F
   fm/cp_linked_list_fm.F)

-list(APPEND CP2K_SRCS_C fm/cp_fm_cusolver.c)
-
 list(APPEND CP2K_SRCS_F hfxbase/hfx_compression_core_methods.F
      hfxbase/hfx_contract_block.F hfxbase/hfx_contraction_methods.F)

@@ -1324,6 +1322,10 @@ set(CP2K_GRID_SRCS_C
     grid/cpu/grid_cpu_task_list.c
     grid/grid_replay.c)

+if(CP2K_USE_CUSOLVER)
+  list(APPEND CP2K_SRCS_C fm/cp_fm_cusolver.c)
+endif()
+
 set(CP2K_FPGA_SRC_C pw/fpga/fft_fpga.c pw/fpga/opencl_utils.c)
 set(CP2K_PW_SRCS_C pw/gpu/pw_gpu_internal.c)
 set(CP2K_OFFLOAD_SRCS_C offload/offload_buffer.c offload/offload_library.c)
@@ -1419,8 +1421,8 @@ if(CP2K_USE_CUDA)
     target_link_libraries(cp2k_cuda_libs INTERFACE NVHPC::MATH NVHPC::CUDA)
   else()
     target_link_libraries(
-      cp2k_cuda_libs INTERFACE CUDA::cufft CUDA::cufftw CUDA::cublas
-                               CUDA::cudart CUDA::cuda_driver)
+      cp2k_cuda_libs INTERFACE CUDA::cusolver CUDA::cufft CUDA::cufftw
+                               CUDA::cublas CUDA::cudart CUDA::cuda_driver)
   endif()
 endif()

@@ -1581,6 +1583,7 @@ target_compile_definitions(
          $<$<BOOL:${CP2K_USE_LIBXSMM}>:__LIBXSMM>
          $<$<STREQUAL:${CP2K_BLAS_VENDOR},MKL>:__MKL>
          $<$<STREQUAL:${CP2K_BLAS_VENDOR},Apple>:__ACCELERATE>
+         $<$<BOOL:${CP2K_USE_CUSOLVER}>:__CUSOLVERMP>
          $<$<BOOL:${CP2K_USE_CUDA}>:__OFFLOAD_CUDA>
          $<$<COMPILE_LANGUAGE:CUDA>:__OFFLOAD_CUDA>
          $<$<BOOL:${CP2K_USE_HIP}>:__HIP_PLATFORM_AMD__
--
2.41.0

dev-zero · 2023-07-14T08:10:33Z

cmake_dependent_option might be the right thing to use here, see https://cliutils.gitlab.io/modern-cmake/chapters/features/modules.html

mtaillefumier · 2023-07-14T09:06:09Z

indeed cmake_dependent_option is better.

a revisited version of the patch

From d15f357e16753de703205023b9e6938ef2cc9345 Mon Sep 17 00:00:00 2001
From: "Dr. Mathieu Taillefumier" <mathieu.taillefumier@free.fr>
Date: Fri, 14 Jul 2023 09:35:03 +0200
Subject: [PATCH] [cmake] include cusolver to the cmake build system

---
 CMakeLists.txt     |  5 +++++
 src/CMakeLists.txt | 12 ++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e44497cbd..3f81c7b52 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -133,6 +133,11 @@ cmake_dependent_option(
   "CP2K_BUILD_DBCSR"
   OFF)

+cmake_dependent_option(
+  CP2K_USE_CUSOLVER_MP
+  "Use Nvidia gpu accelerated eigensolver. Only active when CUDA is ON" OFF
+  "CP2K_USE_ACCEL MATCHES \"CUDA\"" OFF)
+
 set(CP2K_BLAS_VENDOR
     "auto"
     CACHE STRING "Blas library for computations on host")
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 47c056d07..14f0b67e1 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -956,8 +956,6 @@ list(
   fm/cp_fm_struct.F
   fm/cp_linked_list_fm.F)

-list(APPEND CP2K_SRCS_C fm/cp_fm_cusolver.c)
-
 list(APPEND CP2K_SRCS_F hfxbase/hfx_compression_core_methods.F
      hfxbase/hfx_contract_block.F hfxbase/hfx_contraction_methods.F)

@@ -1324,6 +1322,10 @@ set(CP2K_GRID_SRCS_C
     grid/cpu/grid_cpu_task_list.c
     grid/grid_replay.c)

+if(CP2K_USE_CUSOLVER_MP)
+  list(APPEND CP2K_SRCS_C fm/cp_fm_cusolver.c)
+endif()
+
 set(CP2K_FPGA_SRC_C pw/fpga/fft_fpga.c pw/fpga/opencl_utils.c)
 set(CP2K_PW_SRCS_C pw/gpu/pw_gpu_internal.c)
 set(CP2K_OFFLOAD_SRCS_C offload/offload_buffer.c offload/offload_library.c)
@@ -1419,8 +1421,9 @@ if(CP2K_USE_CUDA)
     target_link_libraries(cp2k_cuda_libs INTERFACE NVHPC::MATH NVHPC::CUDA)
   else()
     target_link_libraries(
-      cp2k_cuda_libs INTERFACE CUDA::cufft CUDA::cufftw CUDA::cublas
-                               CUDA::cudart CUDA::cuda_driver)
+      cp2k_cuda_libs
+      INTERFACE $<$<BOOL:${CP2K_USE_CUSOLVER_MP}>:CUDA::cusolver> CUDA::cufft
+                CUDA::cufftw CUDA::cublas CUDA::cudart CUDA::cuda_driver)
   endif()
 endif()

@@ -1581,6 +1584,7 @@ target_compile_definitions(
          $<$<BOOL:${CP2K_USE_LIBXSMM}>:__LIBXSMM>
          $<$<STREQUAL:${CP2K_BLAS_VENDOR},MKL>:__MKL>
          $<$<STREQUAL:${CP2K_BLAS_VENDOR},Apple>:__ACCELERATE>
+         $<$<BOOL:${CP2K_USE_CUSOLVER_MP}>:__CUSOLVERMP>
          $<$<BOOL:${CP2K_USE_CUDA}>:__OFFLOAD_CUDA>
          $<$<COMPILE_LANGUAGE:CUDA>:__OFFLOAD_CUDA>
          $<$<BOOL:${CP2K_USE_HIP}>:__HIP_PLATFORM_AMD__
--
2.41.0

oschuett · 2023-07-14T10:56:34Z

Thank a lot for the CMake integration! Did you have a chance to test it too?

I used version 0.4.1.0 and found that 99% of our regtests pass. Since the glue code is rather simple, I'm fairly confident that the remaining issues are with cuSOLVERMp itself.

mtaillefumier · 2023-07-14T12:55:30Z

the cmake modifications are alright. Compiling cp2k with CUDA 11.6 fails however.

mtaillefumier · 2023-07-14T13:21:29Z

the error is during the compilation. I suspect the version of cusolvermp we have installed locally is missing some of the latest changes. No need to worry in my view.

mtaillefumier · 2023-07-14T13:21:40Z

side note : it is off by default

oschuett · 2023-07-14T13:50:54Z

I suspect the version of cusolvermp we have installed locally is missing some of the latest changes.

Yes, that's very likely. The support for CUBLAS_FILL_MODE_UPPER and CUSOLVERMP_GRID_MAPPING_ROW_MAJOR was added only very recently and is AFAIK not yet part of NVHPC. Still, I think it's useful to already include it in the upcoming release to get some field testing.

mtaillefumier · 2023-07-14T13:54:34Z

@oschuett : Do not worry about these compilation issues. I agree with you this should be merged and get some exposure. It will certainly help us with comparison between DLAF and cuSolver get this included.

oschuett force-pushed the current4 branch from dd7fe51 to 789f33d Compare July 13, 2023 16:19

oschuett marked this pull request as ready for review July 13, 2023 16:21

oschuett force-pushed the current4 branch from 789f33d to 0a3dde3 Compare July 13, 2023 19:01

oschuett force-pushed the current4 branch from 0a3dde3 to 2ca882b Compare July 14, 2023 10:59

fm: Add experimental support for cuSOLVERMp

bc1a558

oschuett force-pushed the current4 branch from 2ca882b to bc1a558 Compare July 14, 2023 11:21

oschuett merged commit 35c8065 into cp2k:master Jul 14, 2023

oschuett mentioned this pull request Jul 14, 2023

fm: Fix uninitialized info in choose_eigv_solver #2864

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

fm: Add experimental support for cuSOLVERMp #2860

fm: Add experimental support for cuSOLVERMp #2860

Uh oh!

oschuett commented Jul 13, 2023

Uh oh!

mtaillefumier commented Jul 14, 2023

Uh oh!

mtaillefumier commented Jul 14, 2023 •

edited

Loading

Uh oh!

dev-zero commented Jul 14, 2023

Uh oh!

mtaillefumier commented Jul 14, 2023

Uh oh!

oschuett commented Jul 14, 2023

Uh oh!

mtaillefumier commented Jul 14, 2023

Uh oh!

mtaillefumier commented Jul 14, 2023

Uh oh!

mtaillefumier commented Jul 14, 2023

Uh oh!

oschuett commented Jul 14, 2023

Uh oh!

mtaillefumier commented Jul 14, 2023

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

fm: Add experimental support for cuSOLVERMp #2860

fm: Add experimental support for cuSOLVERMp #2860

Uh oh!

Conversation

oschuett commented Jul 13, 2023

Uh oh!

mtaillefumier commented Jul 14, 2023

Uh oh!

mtaillefumier commented Jul 14, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

dev-zero commented Jul 14, 2023

Uh oh!

mtaillefumier commented Jul 14, 2023

Uh oh!

oschuett commented Jul 14, 2023

Uh oh!

mtaillefumier commented Jul 14, 2023

Uh oh!

mtaillefumier commented Jul 14, 2023

Uh oh!

mtaillefumier commented Jul 14, 2023

Uh oh!

oschuett commented Jul 14, 2023

Uh oh!

mtaillefumier commented Jul 14, 2023

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

mtaillefumier commented Jul 14, 2023 •

edited

Loading