Makefile.include.nvhpc ompi mkl omp acc: Difference between revisions

From VASP Wiki
No edit summary
No edit summary
 
(One intermediate revision by one other user not shown)
Line 8: Line 8:
               -Davoidalloc \
               -Davoidalloc \
               -Dvasp6 \
               -Dvasp6 \
              -Duse_bse_te \
               -Dtbdyn \
               -Dtbdyn \
               -Dqd_emulate \
               -Dqd_emulate \
               -Dfock_dblbuf \
               -Dfock_dblbuf \
               -D_OPENMP \
               -D_OPENMP \
               -D_OPENACC \
               -DACC_OFFLOAD \
              -DNVCUDA \
               -DUSENCCL
               -DUSENCCL


Line 19: Line 19:


# N.B.: you might need to change the cuda-version here
# N.B.: you might need to change the cuda-version here
#      to one that comes with your NVIDIA-HPC SDK
#      to one that comes with your NVIDIA-HPC SDK, and
CC          = mpicc  -acc -gpu=cc60,cc70,cc80,cuda11.8 -mp  
#      the compute capabilities to the ones applicable
FC          = mpif90 -acc -gpu=cc60,cc70,cc80,cuda11.8 -mp
#      to the hardware you are targeting
FCL        = mpif90 -acc -gpu=cc60,cc70,cc80,cuda11.8 -mp -c++libs
GPU        ?= -gpu=cc60,cc70,cc80,cc89,cuda12.6
 
CC          = mpicc  -mp -acc $(GPU)
FC          = mpif90 -mp -acc $(GPU)
FCL        = mpif90 -mp -acc $(GPU) -c++libs


FREE        = -Mfree
FREE        = -Mfree
Line 31: Line 35:


DEBUG      = -Mfree -O0 -traceback
DEBUG      = -Mfree -O0 -traceback
OBJECTS    = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o


LLIBS      = -cudalib=cublas,cusolver,cufft,nccl -cuda
LLIBS      = -cudalib=cublas,cusolver,cufft,nccl -cuda


# Redefine the standard list of O1 and O2 objects
# Redefine the standard list of O1 and O2 objects
SOURCE_O1  := pade_fit.o minimax_dependence.o
SOURCE_O1  := pade_fit.o minimax_dependence.o wave_window.o
SOURCE_O2  := pead.o
SOURCE_O2  := pead.o


Line 83: Line 85:
# Intel MKL for FFTW, BLAS, LAPACK, and scaLAPACK
# Intel MKL for FFTW, BLAS, LAPACK, and scaLAPACK
MKLROOT    ?= /path/to/your/mkl/installation
MKLROOT    ?= /path/to/your/mkl/installation
LLIBS_MKL  = -Mmkl -L$(MKLROOT)/lib/intel64 -lmkl_scalapack_lp64 -lmkl_blacs_openmpi_lp64
MKLLIBS    = -Mmkl
INCS      += -I$(MKLROOT)/include/fftw
#MKLLIBS    = -lmkl_intel_lp64 -lmkl_pgi_thread -lmkl_core -pgf90libs -mp -lpthread -lm -ldl
 
# If you want to use scaLAPACK from MKL
LLIBS_MKL  = -L$(MKLROOT)/lib -lmkl_scalapack_lp64 -lmkl_blacs_openmpi_lp64 $(MKLLIBS)


# Use a separate scaLAPACK installation (optional but recommended in combination with OpenMPI)
# Use a separate scaLAPACK installation (optional but recommended in combination with OpenMPI)
# Comment out the two lines below if you want to use scaLAPACK from MKL instead
# Comment out the two lines below if you want to use scaLAPACK from MKL instead
SCALAPACK_ROOT ?= /path/to/your/scalapack/installation
#SCALAPACK_ROOT ?= /path/to/your/scalapack/installation
LLIBS_MKL  = -L$(SCALAPACK_ROOT)/lib -lscalapack -Mmkl
#LLIBS_MKL  = -L$(SCALAPACK_ROOT)/lib -lscalapack $(MKLLIBS)


LLIBS      += $(LLIBS_MKL)
LLIBS      += $(LLIBS_MKL)
INCS      += -I$(MKLROOT)/include/fftw


# Use cusolvermp (optional)
# Use cusolvermp (optional)
# supported as of NVHPC-SDK 24.1 (and needs CUDA-11.8)
# supported as of NVHPC-SDK 24.1 (and needs CUDA-11.8)
#CPP_OPTIONS+= -DCUSOLVERMP
#CPP_OPTIONS+= -DCUSOLVERMP -DCUBLASMP
#LLIBS      += -cudalib=cusolvermp
#LLIBS      += -cudalib=cusolvermp,cublasmp -lnvhpcwrapcal
#CFLAGS_LIB += -cudalib=cusolvermp
#OBJECTS_LIB+= cal_mpi.o


# HDF5-support (optional but strongly recommended)
# HDF5-support (optional but strongly recommended, and mandatory for some features)
#CPP_OPTIONS+= -DVASP_HDF5
#CPP_OPTIONS+= -DVASP_HDF5
#HDF5_ROOT  ?= /path/to/your/hdf5/installation
#HDF5_ROOT  ?= /path/to/your/hdf5/installation
Line 118: Line 123:
#LIBS      += fftlib
#LIBS      += fftlib
#LLIBS      += -ldl
#LLIBS      += -ldl
# For machine learning library vaspml (experimental)
#CPP_OPTIONS += -Dlibvaspml
#CPP_OPTIONS += -DVASPML_USE_CBLAS
#CPP_OPTIONS += -DVASPML_DEBUG_LEVEL=3
#CXX_ML      = mpic++ -mp
#CXXFLAGS_ML = -O3 -std=c++17 -Wall -Wextra
#INCLUDE_ML  =
# Add -gpu=tripcount:host to compiler commands for NV HPC-SDK > 25.1
NVFORTRAN_VERSION := $(shell nvfortran --version | sed -n '2s/^nvfortran \([0-9.]*\).*/\1/p')
define greater_or_equal
$(shell printf '%s\n%s\n' '$(1)' '$(2)' | sort -V | head -n1 | grep -q '$(2)' && echo true || echo false)
endef
ifeq ($(call greater_or_equal,$(NVFORTRAN_VERSION),25.1),true)
    CC  += -gpu=tripcount:host
    FC  += -gpu=tripcount:host
endif
</pre>
</pre>
----
----

Latest revision as of 13:16, 10 June 2025

# Default precompiler options
CPP_OPTIONS = -DHOST=\"LinuxNV\" \
              -DMPI -DMPI_INPLACE -DMPI_BLOCK=8000 -Duse_collective \
              -DscaLAPACK \
              -DCACHE_SIZE=4000 \
              -Davoidalloc \
              -Dvasp6 \
              -Dtbdyn \
              -Dqd_emulate \
              -Dfock_dblbuf \
              -D_OPENMP \
              -DACC_OFFLOAD \
              -DNVCUDA \
              -DUSENCCL

CPP         = nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX)  > $*$(SUFFIX)

# N.B.: you might need to change the cuda-version here
#       to one that comes with your NVIDIA-HPC SDK, and
#       the compute capabilities to the ones applicable
#       to the hardware you are targeting
GPU        ?= -gpu=cc60,cc70,cc80,cc89,cuda12.6

CC          = mpicc  -mp -acc $(GPU)
FC          = mpif90 -mp -acc $(GPU)
FCL         = mpif90 -mp -acc $(GPU) -c++libs

FREE        = -Mfree

FFLAGS      = -Mbackslash -Mlarge_arrays

OFLAG       = -fast

DEBUG       = -Mfree -O0 -traceback

LLIBS       = -cudalib=cublas,cusolver,cufft,nccl -cuda

# Redefine the standard list of O1 and O2 objects
SOURCE_O1  := pade_fit.o minimax_dependence.o wave_window.o
SOURCE_O2  := pead.o

# For what used to be vasp.5.lib
CPP_LIB     = $(CPP)
FC_LIB      = $(FC)
CC_LIB      = $(CC)
CFLAGS_LIB  = -O -w
FFLAGS_LIB  = -O1 -Mfixed
FREE_LIB    = $(FREE)

OBJECTS_LIB = linpack_double.o

# For the parser library
CXX_PARS    = nvc++ --no_warnings

##
## Customize as of this point! Of course you may change the preceding
## part of this file as well if you like, but it should rarely be
## necessary ...
##
# When compiling on the target machine itself , change this to the
# relevant target when cross-compiling for another architecture
VASP_TARGET_CPU ?= -tp host
FFLAGS     += $(VASP_TARGET_CPU)

# Specify your NV HPC-SDK installation (mandatory)
#... first try to set it automatically
NVROOT      =$(shell which nvfortran | awk -F /compilers/bin/nvfortran '{ print $$1 }')

# If the above fails, then NVROOT needs to be set manually
#NVHPC      ?= /opt/nvidia/hpc_sdk
#NVVERSION   = 21.11
#NVROOT      = $(NVHPC)/Linux_x86_64/$(NVVERSION)

## Improves performance when using NV HPC-SDK >=21.11 and CUDA >11.2
#OFLAG_IN   = -fast -Mwarperf
#SOURCE_IN  := nonlr.o

# Software emulation of quadruple precsion (mandatory)
QD         ?= $(NVROOT)/compilers/extras/qd
LLIBS      += -L$(QD)/lib -lqdmod -lqd
INCS       += -I$(QD)/include/qd

# Intel MKL for FFTW, BLAS, LAPACK, and scaLAPACK
MKLROOT    ?= /path/to/your/mkl/installation
MKLLIBS     = -Mmkl
#MKLLIBS     = -lmkl_intel_lp64 -lmkl_pgi_thread -lmkl_core -pgf90libs -mp -lpthread -lm -ldl

# If you want to use scaLAPACK from MKL
LLIBS_MKL   = -L$(MKLROOT)/lib -lmkl_scalapack_lp64 -lmkl_blacs_openmpi_lp64 $(MKLLIBS)

# Use a separate scaLAPACK installation (optional but recommended in combination with OpenMPI)
# Comment out the two lines below if you want to use scaLAPACK from MKL instead
#SCALAPACK_ROOT ?= /path/to/your/scalapack/installation
#LLIBS_MKL   = -L$(SCALAPACK_ROOT)/lib -lscalapack $(MKLLIBS)

LLIBS      += $(LLIBS_MKL)

INCS       += -I$(MKLROOT)/include/fftw

# Use cusolvermp (optional)
# supported as of NVHPC-SDK 24.1 (and needs CUDA-11.8)
#CPP_OPTIONS+= -DCUSOLVERMP -DCUBLASMP
#LLIBS      += -cudalib=cusolvermp,cublasmp -lnvhpcwrapcal

# HDF5-support (optional but strongly recommended, and mandatory for some features)
#CPP_OPTIONS+= -DVASP_HDF5
#HDF5_ROOT  ?= /path/to/your/hdf5/installation
#LLIBS      += -L$(HDF5_ROOT)/lib -lhdf5_fortran
#INCS       += -I$(HDF5_ROOT)/include

# For the VASP-2-Wannier90 interface (optional)
#CPP_OPTIONS    += -DVASP2WANNIER90
#WANNIER90_ROOT ?= /path/to/your/wannier90/installation
#LLIBS          += -L$(WANNIER90_ROOT)/lib -lwannier

# For the fftlib library (hardly any benefit for the OpenACC GPU port, especially in combination with MKL's FFTs)
#CPP_OPTIONS+= -Dsysv
#FCL        += fftlib.o
#CXX_FFTLIB  = nvc++ -mp --no_warnings -std=c++11 -DFFTLIB_USE_MKL -DFFTLIB_THREADSAFE
#INCS_FFTLIB = -I./include -I$(MKLROOT)/include/fftw
#LIBS       += fftlib
#LLIBS      += -ldl

# For machine learning library vaspml (experimental)
#CPP_OPTIONS += -Dlibvaspml
#CPP_OPTIONS += -DVASPML_USE_CBLAS
#CPP_OPTIONS += -DVASPML_DEBUG_LEVEL=3
#CXX_ML      = mpic++ -mp
#CXXFLAGS_ML = -O3 -std=c++17 -Wall -Wextra
#INCLUDE_ML  =

# Add -gpu=tripcount:host to compiler commands for NV HPC-SDK > 25.1
NVFORTRAN_VERSION := $(shell nvfortran --version | sed -n '2s/^nvfortran \([0-9.]*\).*/\1/p')
 define greater_or_equal
$(shell printf '%s\n%s\n' '$(1)' '$(2)' | sort -V | head -n1 | grep -q '$(2)' && echo true || echo false)
endef
ifeq ($(call greater_or_equal,$(NVFORTRAN_VERSION),25.1),true)
    CC  += -gpu=tripcount:host
    FC  += -gpu=tripcount:host
endif

makefile.include