Hi
I am using VASP 6.5.1 on my local machine, which has both a CPU and a GPU. My machine has an AMD SOC chipset with an Nvidia RTX 4500 ADA GPU.I have installed VASP with HDF5 and Wannier90 interfacing using the following Makefile.include
____________
export MKLROOT=/home/user/intel/oneapi/mkl/2025.3
# Default precompiler options
CPP_OPTIONS = -DHOST=\"LinuxNV\" \
-DMPI -DMPI_INPLACE -DMPI_BLOCK=8000 -Duse_collective \
-DscaLAPACK \
-DCACHE_SIZE=4000 \
-Davoidalloc \
-Dvasp6 \
-Dtbdyn \
-Dqd_emulate \
-Dfock_dblbuf \
-D_OPENMP \
-DACC_OFFLOAD \
-DNVCUDA \
-DUSENCCL
CPP = nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX)
# N.B.: you might need to change the cuda-version here
# to one that comes with your NVIDIA-HPC SDK
CC = mpicc -acc -gpu=cc89,cuda13.1 -mp
FC = mpif90 -acc -gpu=cc89,cuda13.1 -mp
FCL = mpif90 -acc -gpu=cc89,cuda13.1 -mp -c++libs
FREE = -Mfree
FFLAGS = -Mbackslash -Mlarge_arrays
OFLAG = -fast
DEBUG = -Mfree -O0 -traceback
LLIBS = -cudalib=cublas,cusolver,cufft,nccl -cuda
# Redefine the standard list of O1 and O2 objects
SOURCE_O1 := pade_fit.o minimax_dependence.o wave_window.o
SOURCE_O2 := pead.o
# For what used to be vasp.5.lib
CPP_LIB = $(CPP)
FC_LIB = $(FC)
CC_LIB = $(CC)
CFLAGS_LIB = -O -w
FFLAGS_LIB = -O1 -Mfixed
FREE_LIB = $(FREE)
OBJECTS_LIB = linpack_double.o
# For the parser library
CXX_PARS = nvc++ --no_warnings
##
## Customize as of this point! Of course you may change the preceding
## part of this file as well if you like, but it should rarely be
## necessary ...
##
# When compiling on the target machine itself , change this to the
# relevant target when cross-compiling for another architecture
VASP_TARGET_CPU ?= -tp host
FFLAGS += $(VASP_TARGET_CPU)
# Specify your NV HPC-SDK installation (mandatory)
#... first try to set it automatically
NVROOT =$(shell which nvfortran | awk -F /compilers/bin/nvfortran '{ print $$1 }')
# If the above fails, then NVROOT needs to be set manually
#NVHPC ?= /opt/nvidia/hpc_sdk
#NVVERSION = 21.11
#NVROOT = $(NVHPC)/Linux_x86_64/$(NVVERSION)
## Improves performance when using NV HPC-SDK >=21.11 and CUDA >11.2
#OFLAG_IN = -fast -Mwarperf
#SOURCE_IN := nonlr.o
# Software emulation of quadruple precsion (mandatory)
QD ?= $(NVROOT)/compilers/extras/qd
LLIBS += -L$(QD)/lib -lqdmod -lqd
INCS += -I$(QD)/include/qd
# Intel MKL for FFTW, BLAS, LAPACK, and scaLAPACK
MKLROOT ?= /home/user/intel/oneapi/mkl/2025.3
#MKLLIBS = -Mmkl
MKLLIBS = -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -pgf90libs -mp -lpthread -lm -ldl
# If you want to use scaLAPACK from MKL
LLIBS_MKL = -L$(MKLROOT)/lib -lmkl_scalapack_lp64 -lmkl_blacs_openmpi_lp64 $(MKLLIBS)
# Use a separate scaLAPACK installation (optional but recommended in combination with OpenMPI)
# Comment out the two lines below if you want to use scaLAPACK from MKL instead
#SCALAPACK_ROOT ?= /path/to/your/scalapack/installation
#LLIBS_MKL = -L$(SCALAPACK_ROOT)/lib -lscalapack $(MKLLIBS)
LLIBS += $(LLIBS_MKL)
INCS += -I$(MKLROOT)/include/fftw
# Use cusolvermp (optional)
# supported as of NVHPC-SDK 24.1 (and needs CUDA-11.8)
#CPP_OPTIONS+= -DCUSOLVERMP -DCUBLASMP
#LLIBS += -cudalib=cusolvermp,cublasmp -lnvhpcwrapcal
# HDF5-support (optional but strongly recommended, and mandatory for some features)
# HDF5-support
CPP_OPTIONS += -DVASP_HDF5
HDF5_ROOT = /home/user/hdf5-nvhpc
# Point directly to the folder containing hdf5.mod
INCS += -I$(HDF5_ROOT)/include
# Point directly to the folder containing libhdf5.so
LLIBS += -L$(HDF5_ROOT)/lib -lhdf5_fortran -lhdf5
# For the VASP-2-Wannier90 interface (optional)
CPP_OPTIONS += -DVASP2WANNIER90
WANNIER90_ROOT = /home/user/wannier90-3.1_nvhpc
LLIBS += $(WANNIER90_ROOT)/libwannier.a
# For the fftlib library (hardly any benefit for the OpenACC GPU port, especially in combination with MKL's FFTs)
#CPP_OPTIONS+= -Dsysv
#FCL += fftlib.o
#CXX_FFTLIB = nvc++ -mp --no_warnings -std=c++11 -DFFTLIB_USE_MKL -DFFTLIB_THREADSAFE
#INCS_FFTLIB = -I./include -I$(MKLROOT)/include/fftw
#LIBS += fftlib
#LLIBS += -ldl
# For machine learning library vaspml (experimental)
#CPP_OPTIONS += -Dlibvaspml
#CPP_OPTIONS += -DVASPML_USE_CBLAS
#CPP_OPTIONS += -DVASPML_DEBUG_LEVEL=3
#CXX_ML = mpic++ -mp
#CXXFLAGS_ML = -O3 -std=c++17 -Wall -Wextra
#INCLUDE_ML =
# Add -gpu=tripcount:host to compiler commands for NV HPC-SDK > 25.1
NVFORTRAN_VERSION := $(shell nvfortran --version | sed -n '2s/^nvfortran \([0-9.]*\).*/\1/p')
define greater_or_equal
$(shell printf '%s\n%s\n' '$(1)' '$(2)' | sort -V | head -n1 | grep -q '$(2)' && echo true || echo false)
endef
ifeq ($(call greater_or_equal,$(NVFORTRAN_VERSION),25.1),true)
CC += -gpu=tripcount:host
FC += -gpu=tripcount:host
endif
_______________________
The installation was successful, with all interfaces working properly, and VASP is working properly. My concern is the performance with the GPU. I ran calculations on the GPU and CPU (with np 32) using the same flags and files to test speed. It has been observed that the GPU is working at more than 90% performance, like 95, 96, sometimes 99 and so on. But the tie taken by the calculation is the same as the CPU's. I am not able to observe any fast performance on the GPU, as well as any errors in calculation. Please guide me if there is any installation issue or what I am missing in this.

