Makefile.include.nvhpc omp acc: Difference between revisions
No edit summary |
No edit summary |
||
(One intermediate revision by one other user not shown) | |||
Line 8: | Line 8: | ||
-Davoidalloc \ | -Davoidalloc \ | ||
-Dvasp6 \ | -Dvasp6 \ | ||
-Dtbdyn \ | -Dtbdyn \ | ||
-Dqd_emulate \ | -Dqd_emulate \ | ||
-Dfock_dblbuf \ | -Dfock_dblbuf \ | ||
-D_OPENMP \ | -D_OPENMP \ | ||
- | -DACC_OFFLOAD \ | ||
-DUSENCCL | -DNVCUDA \ | ||
-DUSENCCL | |||
CPP = nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX) | CPP = nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX) | ||
# N.B.: you might need to change the cuda-version here | # N.B.: you might need to change the cuda-version here | ||
# to one that comes with your NVIDIA-HPC SDK | # to one that comes with your NVIDIA-HPC SDK, and | ||
# the compute capabilities to the ones applicable | |||
FC = mpif90 -acc | # to the hardware you are targeting | ||
FCL = mpif90 -acc | GPU ?= -gpu=cc60,cc70,cc80,cc89,cuda12.6 | ||
CC = mpicc -acc -mp $(GPU) | |||
FC = mpif90 -acc -mp $(GPU) | |||
FCL = mpif90 -acc -mp $(GPU) -c++libs | |||
FREE = -Mfree | FREE = -Mfree | ||
Line 31: | Line 36: | ||
DEBUG = -Mfree -O0 -traceback | DEBUG = -Mfree -O0 -traceback | ||
LLIBS = -cudalib=cublas,cusolver,cufft,nccl -cuda | LLIBS = -cudalib=cublas,cusolver,cufft,nccl -cuda | ||
# Redefine the standard list of O1 and O2 objects | # Redefine the standard list of O1 and O2 objects | ||
SOURCE_O1 := pade_fit.o minimax_dependence.o | SOURCE_O1 := pade_fit.o minimax_dependence.o wave_window.o | ||
SOURCE_O2 := pead.o | SOURCE_O2 := pead.o | ||
Line 99: | Line 102: | ||
# Use cusolvermp (optional) | # Use cusolvermp (optional) | ||
# supported as of NVHPC-SDK 24.1 (and needs CUDA-11.8) | # supported as of NVHPC-SDK 24.1 (and needs CUDA-11.8) | ||
#CPP_OPTIONS+= -DCUSOLVERMP | #CPP_OPTIONS+= -DCUSOLVERMP -DCUBLASMP | ||
#LLIBS += -cudalib=cusolvermp | #LLIBS += -cudalib=cusolvermp,cublasmp -lnvhpcwrapcal | ||
# HDF5-support (optional but strongly recommended) | # HDF5-support (optional but strongly recommended, and mandatory for some features) | ||
#CPP_OPTIONS+= -DVASP_HDF5 | #CPP_OPTIONS+= -DVASP_HDF5 | ||
#HDF5_ROOT ?= /path/to/your/hdf5/installation | #HDF5_ROOT ?= /path/to/your/hdf5/installation | ||
Line 123: | Line 123: | ||
#LIBS += fftlib | #LIBS += fftlib | ||
#LLIBS += -ldl | #LLIBS += -ldl | ||
# For machine learning library vaspml (experimental) | |||
#CPP_OPTIONS += -Dlibvaspml | |||
#CPP_OPTIONS += -DVASPML_USE_CBLAS | |||
#CPP_OPTIONS += -DVASPML_DEBUG_LEVEL=3 | |||
#CXX_ML = mpic++ -mp | |||
#CXXFLAGS_ML = -O3 -std=c++17 -Wall -Wextra | |||
#INCLUDE_ML = | |||
# Add -gpu=tripcount:host to compiler commands for NV HPC-SDK > 25.1 | |||
NVFORTRAN_VERSION := $(shell nvfortran --version | sed -n '2s/^nvfortran \([0-9.]*\).*/\1/p') | |||
define greater_or_equal | |||
$(shell printf '%s\n%s\n' '$(1)' '$(2)' | sort -V | head -n1 | grep -q '$(2)' && echo true || echo false) | |||
endef | |||
ifeq ($(call greater_or_equal,$(NVFORTRAN_VERSION),25.1),true) | |||
CC += -gpu=tripcount:host | |||
FC += -gpu=tripcount:host | |||
endif | |||
</pre> | </pre> | ||
---- | ---- |
Latest revision as of 13:15, 10 June 2025
# Default precompiler options CPP_OPTIONS = -DHOST=\"LinuxNV\" \ -DMPI -DMPI_INPLACE -DMPI_BLOCK=8000 -Duse_collective \ -DscaLAPACK \ -DCACHE_SIZE=4000 \ -Davoidalloc \ -Dvasp6 \ -Dtbdyn \ -Dqd_emulate \ -Dfock_dblbuf \ -D_OPENMP \ -DACC_OFFLOAD \ -DNVCUDA \ -DUSENCCL CPP = nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX) # N.B.: you might need to change the cuda-version here # to one that comes with your NVIDIA-HPC SDK, and # the compute capabilities to the ones applicable # to the hardware you are targeting GPU ?= -gpu=cc60,cc70,cc80,cc89,cuda12.6 CC = mpicc -acc -mp $(GPU) FC = mpif90 -acc -mp $(GPU) FCL = mpif90 -acc -mp $(GPU) -c++libs FREE = -Mfree FFLAGS = -Mbackslash -Mlarge_arrays OFLAG = -fast DEBUG = -Mfree -O0 -traceback LLIBS = -cudalib=cublas,cusolver,cufft,nccl -cuda # Redefine the standard list of O1 and O2 objects SOURCE_O1 := pade_fit.o minimax_dependence.o wave_window.o SOURCE_O2 := pead.o # For what used to be vasp.5.lib CPP_LIB = $(CPP) FC_LIB = $(FC) CC_LIB = $(CC) CFLAGS_LIB = -O -w FFLAGS_LIB = -O1 -Mfixed FREE_LIB = $(FREE) OBJECTS_LIB = linpack_double.o # For the parser library CXX_PARS = nvc++ --no_warnings ## ## Customize as of this point! Of course you may change the preceding ## part of this file as well if you like, but it should rarely be ## necessary ... ## # When compiling on the target machine itself , change this to the # relevant target when cross-compiling for another architecture VASP_TARGET_CPU ?= -tp host FFLAGS += $(VASP_TARGET_CPU) # Specify your NV HPC-SDK installation (mandatory) #... first try to set it automatically NVROOT =$(shell which nvfortran | awk -F /compilers/bin/nvfortran '{ print $$1 }') # If the above fails, then NVROOT needs to be set manually #NVHPC ?= /opt/nvidia/hpc_sdk #NVVERSION = 21.11 #NVROOT = $(NVHPC)/Linux_x86_64/$(NVVERSION) ## Improves performance when using NV HPC-SDK >=21.11 and CUDA >11.2 #OFLAG_IN = -fast -Mwarperf #SOURCE_IN := nonlr.o # Software emulation of quadruple precsion (mandatory) QD ?= $(NVROOT)/compilers/extras/qd LLIBS += -L$(QD)/lib -lqdmod -lqd INCS += -I$(QD)/include/qd # BLAS (mandatory) BLAS = -lblas # LAPACK (mandatory) LAPACK = -llapack # scaLAPACK (mandatory) SCALAPACK = -Mscalapack LLIBS += $(SCALAPACK) $(LAPACK) $(BLAS) # FFTW (mandatory) FFTW_ROOT ?= /path/to/your/fftw/installation LLIBS += -L$(FFTW_ROOT)/lib -lfftw3 -lfftw3_omp INCS += -I$(FFTW_ROOT)/include # Use cusolvermp (optional) # supported as of NVHPC-SDK 24.1 (and needs CUDA-11.8) #CPP_OPTIONS+= -DCUSOLVERMP -DCUBLASMP #LLIBS += -cudalib=cusolvermp,cublasmp -lnvhpcwrapcal # HDF5-support (optional but strongly recommended, and mandatory for some features) #CPP_OPTIONS+= -DVASP_HDF5 #HDF5_ROOT ?= /path/to/your/hdf5/installation #LLIBS += -L$(HDF5_ROOT)/lib -lhdf5_fortran #INCS += -I$(HDF5_ROOT)/include # For the VASP-2-Wannier90 interface (optional) #CPP_OPTIONS += -DVASP2WANNIER90 #WANNIER90_ROOT ?= /path/to/your/wannier90/installation #LLIBS += -L$(WANNIER90_ROOT)/lib -lwannier # For the fftlib library (hardly any benefit for the OpenACC GPU port) #CPP_OPTIONS+= -Dsysv #FCL += fftlib.o #CXX_FFTLIB = nvc++ -mp --no_warnings -std=c++11 -DFFTLIB_THREADSAFE #INCS_FFTLIB = -I./include -I$(FFTW_ROOT)/include #LIBS += fftlib #LLIBS += -ldl # For machine learning library vaspml (experimental) #CPP_OPTIONS += -Dlibvaspml #CPP_OPTIONS += -DVASPML_USE_CBLAS #CPP_OPTIONS += -DVASPML_DEBUG_LEVEL=3 #CXX_ML = mpic++ -mp #CXXFLAGS_ML = -O3 -std=c++17 -Wall -Wextra #INCLUDE_ML = # Add -gpu=tripcount:host to compiler commands for NV HPC-SDK > 25.1 NVFORTRAN_VERSION := $(shell nvfortran --version | sed -n '2s/^nvfortran \([0-9.]*\).*/\1/p') define greater_or_equal $(shell printf '%s\n%s\n' '$(1)' '$(2)' | sort -V | head -n1 | grep -q '$(2)' && echo true || echo false) endef ifeq ($(call greater_or_equal,$(NVFORTRAN_VERSION),25.1),true) CC += -gpu=tripcount:host FC += -gpu=tripcount:host endif