Makefile.include.nvhpc ompi mkl omp acc: Difference between revisions
No edit summary |
No edit summary |
||
| (2 intermediate revisions by 2 users not shown) | |||
| Line 3: | Line 3: | ||
# Default precompiler options | # Default precompiler options | ||
CPP_OPTIONS = -DHOST=\"LinuxNV\" \ | CPP_OPTIONS = -DHOST=\"LinuxNV\" \ | ||
-DMPI -DMPI_BLOCK=8000 -Duse_collective \ | -DMPI -DMPI_INPLACE -DMPI_BLOCK=8000 -Duse_collective \ | ||
-DscaLAPACK \ | -DscaLAPACK \ | ||
-DCACHE_SIZE=4000 \ | -DCACHE_SIZE=4000 \ | ||
-Davoidalloc \ | -Davoidalloc \ | ||
-Dvasp6 \ | -Dvasp6 \ | ||
-Dtbdyn \ | -Dtbdyn \ | ||
-Dqd_emulate \ | -Dqd_emulate \ | ||
-Dfock_dblbuf \ | -Dfock_dblbuf \ | ||
-D_OPENMP \ | -D_OPENMP \ | ||
- | -DACC_OFFLOAD \ | ||
-DUSENCCL | -DNVCUDA \ | ||
-DUSENCCL | |||
CPP = nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX) | CPP = nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX) | ||
# N.B.: you might need to change the cuda-version here | # N.B.: you might need to change the cuda-version here | ||
# to one that comes with your NVIDIA-HPC SDK | # to one that comes with your NVIDIA-HPC SDK, and | ||
# the compute capabilities to the ones applicable | |||
# to the hardware you are targeting | |||
GPU ?= -gpu=cc60,cc70,cc80,cc89,cuda12.6 | |||
CC = mpicc -mp -acc $(GPU) | |||
FC = mpif90 -mp -acc $(GPU) | |||
FCL = mpif90 -mp -acc $(GPU) -c++libs | |||
FREE = -Mfree | FREE = -Mfree | ||
| Line 30: | Line 35: | ||
DEBUG = -Mfree -O0 -traceback | DEBUG = -Mfree -O0 -traceback | ||
LLIBS = -cudalib=cublas,cusolver,cufft,nccl -cuda | LLIBS = -cudalib=cublas,cusolver,cufft,nccl -cuda | ||
# Redefine the standard list of O1 and O2 objects | # Redefine the standard list of O1 and O2 objects | ||
SOURCE_O1 := pade_fit.o | SOURCE_O1 := pade_fit.o minimax_dependence.o wave_window.o | ||
SOURCE_O2 := pead.o | SOURCE_O2 := pead.o | ||
# For what used to be vasp.5.lib | # For what used to be vasp.5.lib | ||
CPP_LIB = $(CPP) | CPP_LIB = $(CPP) | ||
FC_LIB = | FC_LIB = $(FC) | ||
CC_LIB = | CC_LIB = $(CC) | ||
CFLAGS_LIB = -O | CFLAGS_LIB = -O -w | ||
FFLAGS_LIB = -O1 -Mfixed | FFLAGS_LIB = -O1 -Mfixed | ||
FREE_LIB = $(FREE) | FREE_LIB = $(FREE) | ||
| Line 57: | Line 60: | ||
## necessary ... | ## necessary ... | ||
## | ## | ||
# When compiling on the target machine itself , change this to the | |||
# relevant target when cross-compiling for another architecture | |||
VASP_TARGET_CPU ?= -tp host | |||
FFLAGS += $(VASP_TARGET_CPU) | |||
# Specify your NV HPC-SDK installation (mandatory) | # Specify your NV HPC-SDK installation (mandatory) | ||
| Line 66: | Line 73: | ||
#NVVERSION = 21.11 | #NVVERSION = 21.11 | ||
#NVROOT = $(NVHPC)/Linux_x86_64/$(NVVERSION) | #NVROOT = $(NVHPC)/Linux_x86_64/$(NVVERSION) | ||
## Improves performance when using NV HPC-SDK >=21.11 and CUDA >11.2 | |||
#OFLAG_IN = -fast -Mwarperf | |||
#SOURCE_IN := nonlr.o | |||
# Software emulation of quadruple precsion (mandatory) | # Software emulation of quadruple precsion (mandatory) | ||
| Line 72: | Line 83: | ||
INCS += -I$(QD)/include/qd | INCS += -I$(QD)/include/qd | ||
# Intel MKL | # Intel MKL for FFTW, BLAS, LAPACK, and scaLAPACK | ||
MKLROOT ?= /path/to/your/mkl/installation | MKLROOT ?= /path/to/your/mkl/installation | ||
MKLLIBS = -Mmkl | |||
#MKLLIBS = -lmkl_intel_lp64 -lmkl_pgi_thread -lmkl_core -pgf90libs -mp -lpthread -lm -ldl | |||
# If you want to use scaLAPACK from MKL | |||
LLIBS_MKL = -L$(MKLROOT)/lib -lmkl_scalapack_lp64 -lmkl_blacs_openmpi_lp64 $(MKLLIBS) | |||
# Use a separate scaLAPACK installation (optional but recommended in combination with OpenMPI) | |||
# Comment out the two lines below if you want to use scaLAPACK from MKL instead | |||
#SCALAPACK_ROOT ?= /path/to/your/scalapack/installation | |||
#LLIBS_MKL = -L$(SCALAPACK_ROOT)/lib -lscalapack $(MKLLIBS) | |||
LLIBS += $(LLIBS_MKL) | |||
INCS += -I$(MKLROOT)/include/fftw | INCS += -I$(MKLROOT)/include/fftw | ||
# HDF5-support (optional but strongly recommended) | # Use cusolvermp (optional) | ||
# supported as of NVHPC-SDK 24.1 (and needs CUDA-11.8) | |||
#CPP_OPTIONS+= -DCUSOLVERMP -DCUBLASMP | |||
#LLIBS += -cudalib=cusolvermp,cublasmp -lnvhpcwrapcal | |||
# HDF5-support (optional but strongly recommended, and mandatory for some features) | |||
#CPP_OPTIONS+= -DVASP_HDF5 | #CPP_OPTIONS+= -DVASP_HDF5 | ||
#HDF5_ROOT ?= /path/to/your/hdf5/installation | #HDF5_ROOT ?= /path/to/your/hdf5/installation | ||
| Line 88: | Line 116: | ||
#LLIBS += -L$(WANNIER90_ROOT)/lib -lwannier | #LLIBS += -L$(WANNIER90_ROOT)/lib -lwannier | ||
# For the fftlib library ( | # For the fftlib library (hardly any benefit for the OpenACC GPU port, especially in combination with MKL's FFTs) | ||
#CPP_OPTIONS+= -Dsysv | #CPP_OPTIONS+= -Dsysv | ||
#FCL += fftlib.o | #FCL += fftlib.o | ||
| Line 95: | Line 123: | ||
#LIBS += fftlib | #LIBS += fftlib | ||
#LLIBS += -ldl | #LLIBS += -ldl | ||
# For machine learning library vaspml (experimental) | |||
#CPP_OPTIONS += -Dlibvaspml | |||
#CPP_OPTIONS += -DVASPML_USE_CBLAS | |||
#CPP_OPTIONS += -DVASPML_DEBUG_LEVEL=3 | |||
#CXX_ML = mpic++ -mp | |||
#CXXFLAGS_ML = -O3 -std=c++17 -Wall -Wextra | |||
#INCLUDE_ML = | |||
# Add -gpu=tripcount:host to compiler commands for NV HPC-SDK > 25.1 | |||
NVFORTRAN_VERSION := $(shell nvfortran --version | sed -n '2s/^nvfortran \([0-9.]*\).*/\1/p') | |||
define greater_or_equal | |||
$(shell printf '%s\n%s\n' '$(1)' '$(2)' | sort -V | head -n1 | grep -q '$(2)' && echo true || echo false) | |||
endef | |||
ifeq ($(call greater_or_equal,$(NVFORTRAN_VERSION),25.1),true) | |||
CC += -gpu=tripcount:host | |||
FC += -gpu=tripcount:host | |||
endif | |||
</pre> | </pre> | ||
---- | ---- | ||
[[makefile.include]] | |||
[[Category:VASP]] | [[Category:VASP]] | ||
[[Category:Installation]] | [[Category:Installation]] | ||
Latest revision as of 13:16, 10 June 2025
# Default precompiler options
CPP_OPTIONS = -DHOST=\"LinuxNV\" \
-DMPI -DMPI_INPLACE -DMPI_BLOCK=8000 -Duse_collective \
-DscaLAPACK \
-DCACHE_SIZE=4000 \
-Davoidalloc \
-Dvasp6 \
-Dtbdyn \
-Dqd_emulate \
-Dfock_dblbuf \
-D_OPENMP \
-DACC_OFFLOAD \
-DNVCUDA \
-DUSENCCL
CPP = nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX)
# N.B.: you might need to change the cuda-version here
# to one that comes with your NVIDIA-HPC SDK, and
# the compute capabilities to the ones applicable
# to the hardware you are targeting
GPU ?= -gpu=cc60,cc70,cc80,cc89,cuda12.6
CC = mpicc -mp -acc $(GPU)
FC = mpif90 -mp -acc $(GPU)
FCL = mpif90 -mp -acc $(GPU) -c++libs
FREE = -Mfree
FFLAGS = -Mbackslash -Mlarge_arrays
OFLAG = -fast
DEBUG = -Mfree -O0 -traceback
LLIBS = -cudalib=cublas,cusolver,cufft,nccl -cuda
# Redefine the standard list of O1 and O2 objects
SOURCE_O1 := pade_fit.o minimax_dependence.o wave_window.o
SOURCE_O2 := pead.o
# For what used to be vasp.5.lib
CPP_LIB = $(CPP)
FC_LIB = $(FC)
CC_LIB = $(CC)
CFLAGS_LIB = -O -w
FFLAGS_LIB = -O1 -Mfixed
FREE_LIB = $(FREE)
OBJECTS_LIB = linpack_double.o
# For the parser library
CXX_PARS = nvc++ --no_warnings
##
## Customize as of this point! Of course you may change the preceding
## part of this file as well if you like, but it should rarely be
## necessary ...
##
# When compiling on the target machine itself , change this to the
# relevant target when cross-compiling for another architecture
VASP_TARGET_CPU ?= -tp host
FFLAGS += $(VASP_TARGET_CPU)
# Specify your NV HPC-SDK installation (mandatory)
#... first try to set it automatically
NVROOT =$(shell which nvfortran | awk -F /compilers/bin/nvfortran '{ print $$1 }')
# If the above fails, then NVROOT needs to be set manually
#NVHPC ?= /opt/nvidia/hpc_sdk
#NVVERSION = 21.11
#NVROOT = $(NVHPC)/Linux_x86_64/$(NVVERSION)
## Improves performance when using NV HPC-SDK >=21.11 and CUDA >11.2
#OFLAG_IN = -fast -Mwarperf
#SOURCE_IN := nonlr.o
# Software emulation of quadruple precsion (mandatory)
QD ?= $(NVROOT)/compilers/extras/qd
LLIBS += -L$(QD)/lib -lqdmod -lqd
INCS += -I$(QD)/include/qd
# Intel MKL for FFTW, BLAS, LAPACK, and scaLAPACK
MKLROOT ?= /path/to/your/mkl/installation
MKLLIBS = -Mmkl
#MKLLIBS = -lmkl_intel_lp64 -lmkl_pgi_thread -lmkl_core -pgf90libs -mp -lpthread -lm -ldl
# If you want to use scaLAPACK from MKL
LLIBS_MKL = -L$(MKLROOT)/lib -lmkl_scalapack_lp64 -lmkl_blacs_openmpi_lp64 $(MKLLIBS)
# Use a separate scaLAPACK installation (optional but recommended in combination with OpenMPI)
# Comment out the two lines below if you want to use scaLAPACK from MKL instead
#SCALAPACK_ROOT ?= /path/to/your/scalapack/installation
#LLIBS_MKL = -L$(SCALAPACK_ROOT)/lib -lscalapack $(MKLLIBS)
LLIBS += $(LLIBS_MKL)
INCS += -I$(MKLROOT)/include/fftw
# Use cusolvermp (optional)
# supported as of NVHPC-SDK 24.1 (and needs CUDA-11.8)
#CPP_OPTIONS+= -DCUSOLVERMP -DCUBLASMP
#LLIBS += -cudalib=cusolvermp,cublasmp -lnvhpcwrapcal
# HDF5-support (optional but strongly recommended, and mandatory for some features)
#CPP_OPTIONS+= -DVASP_HDF5
#HDF5_ROOT ?= /path/to/your/hdf5/installation
#LLIBS += -L$(HDF5_ROOT)/lib -lhdf5_fortran
#INCS += -I$(HDF5_ROOT)/include
# For the VASP-2-Wannier90 interface (optional)
#CPP_OPTIONS += -DVASP2WANNIER90
#WANNIER90_ROOT ?= /path/to/your/wannier90/installation
#LLIBS += -L$(WANNIER90_ROOT)/lib -lwannier
# For the fftlib library (hardly any benefit for the OpenACC GPU port, especially in combination with MKL's FFTs)
#CPP_OPTIONS+= -Dsysv
#FCL += fftlib.o
#CXX_FFTLIB = nvc++ -mp --no_warnings -std=c++11 -DFFTLIB_USE_MKL -DFFTLIB_THREADSAFE
#INCS_FFTLIB = -I./include -I$(MKLROOT)/include/fftw
#LIBS += fftlib
#LLIBS += -ldl
# For machine learning library vaspml (experimental)
#CPP_OPTIONS += -Dlibvaspml
#CPP_OPTIONS += -DVASPML_USE_CBLAS
#CPP_OPTIONS += -DVASPML_DEBUG_LEVEL=3
#CXX_ML = mpic++ -mp
#CXXFLAGS_ML = -O3 -std=c++17 -Wall -Wextra
#INCLUDE_ML =
# Add -gpu=tripcount:host to compiler commands for NV HPC-SDK > 25.1
NVFORTRAN_VERSION := $(shell nvfortran --version | sed -n '2s/^nvfortran \([0-9.]*\).*/\1/p')
define greater_or_equal
$(shell printf '%s\n%s\n' '$(1)' '$(2)' | sort -V | head -n1 | grep -q '$(2)' && echo true || echo false)
endef
ifeq ($(call greater_or_equal,$(NVFORTRAN_VERSION),25.1),true)
CC += -gpu=tripcount:host
FC += -gpu=tripcount:host
endif