This is an old revision of the document!
ok, new version much easier
# devel packages for libjpeg, blas, lapack, boost which nvcc export PATH=/usr/local/mpich-3.1.4/bin:$PATH export LD_LIBRARY_PATH=/usr/local/mpich-3.1.4/lib:$LD_LIBRARY_PATH which mpicxx # untar stable.gz cd lib/gpu vi Makefile.linux.double # edits CUDA_HOME = /usr/local/cuda # best fitting ARCH for GTX 1080 with CUDA 8 CUDA_ARCH = -arch=sm_61 http://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ CUDA_PRECISION = -D_DOUBLE_DOUBLE make -f Makefile.linux.double ls -lrt # new files: libgpu.a and Makefile.lammps cd ../../src/ vi MAKE/Makefile.mpi # edits LMP_INC = -DLAMMPS_GZIP -DLAMMPS_MEMALIGN=64 -DLAMMPS_JPEG JPG_INC = -I/usr/include JPG_PATH = -L/usr/lib64 JPG_LIB = -ljpeg make yes-gpu make yes-colloid make yes-class2 make yes-kspace make yes-misc make yes-molecule which mpicxx make mpi mv lmp_mpi /usr/local/lammps-11Aug17/lmp_mpi-double-double-with-gpu # repeat from scratch for SINGLE_SINGLE and SINGLE_DOUBLE
Because it took me so long to get the GPU side of things compiled. I've compiled it two ways, and the packages will differ between both versions.
Initially the GPU binary would hang after launching in a nanosleep loop. My bad. In my submit script I had an incomplete PATH, we need both /cm/shared/…/bin
and /cm/local/…/bin
for the GPU to work properly.
Submit scripts for serial, cpu and gpu follow. Centos 6.x and Openlava 3.x and 31Mar17 Lammps.
# on n33, a gpu node mkdir /tmp/laamps; cd !$ gunzip lammps-stable.tar.gz tar xvf lammps-stable.tar chown -R hmeij:its lammps-31Mar17 su - hmeij [hmeij@n33 src]$ which icc /share/apps/intel/parallel_studio_xe_2016_update3/compilers_and_libraries_2016.3.210/linux/bin/intel64/icc [hmeij@n33 src]$ which ifort /share/apps/intel/parallel_studio_xe_2016_update3/compilers_and_libraries_2016.3.210/linux/bin/intel64/ifort [hmeij@n33 src]$ echo $INTEL_LICENSE_FILE /share/apps/intel/parallel_studio_xe_2016_update3/flexlm/server.lic:/share/apps/intel/parallel_studio_xe_2016_update3/compilers_and_libraries_2016.3.210/linux/licenses:/opt/intel/licenses:/home/hmeij/intel/licenses [hmeij@n33 src]$ echo $MKL_HOME # not using icc/icpc/ifort but g++ only #no export MKL_HOME=/share/apps/intel/parallel_studio_xe_2016_update3/compilers_and_libraries_2016.3.210/linux/mkl cd /tmp/lammps/lammps-31Mar17/ cd src make yes-colloid make yes-class2 make yes-kspace make yes-misc make yes-molecule make no-user-misc make no-user-vtk make no-user-omp cd STUBS/ make cd .. #no cp MAKE/Makefile.serial MAKE/Makefile.serial_icc # edit MAKE/MAkefile.serial # leave gcc++ icc/icpc -DLAMMPS_JPEG for LMP_INC # JPG_INC = -I/usr/include # JPG_PATH = -L/usr/lib64 # JPG_LIB = -ljpeg # leave FFT_INC, FFT_PATH, FFT_LIB blank for KISS FFTW #no make serial_icc make serial size ../lmp_serial_icc text data bss dec hex filename 8472387 248912 2464 8723763 851d33 ../lmp_serial_icc #no mv lmp_serial_icc /share/apps/CENTOS6/lammps/31Mar17/ mv lmp_serial /share/apps/CENTOS6/lammps/31Mar17/ # note compile fails for mvapich2, trying with openmpi #/cm/shared/apps/mvapich2/gcc/64/1.6/bin/mpicxx <---------- has to be for wrapper export LD_LIBRARY_PATH=/share/apps/CENTOS6/openmpi/1.8.4/lib/:$LD_LIBRARY_PATH export PATH=/share/apps/CENTOS6/openmpi/1.8.4/bin:$PATH which mpicxx /share/apps/CENTOS6/openmpi/1.8.4/bin/mpicxx <------ works, will need logic of mvapich2 wrapper into new openmpi wrapper # perform edits in makefile as above but leave mpicxx # leave MPI_INC, MPI_PATH, MPI_LIB as is, blank make mpi size ../lmp_mpi text data bss dec hex filename 5630638 8320 16784 5655742 564cbe ../lmp_mpi mv lmp_mpi /share/apps/CENTOS6/lammps/31Mar17/ ### CUDA, use icc/mpi env from above export PATH=/share/apps/CENTOS6/python/2.7.9/bin:$PATH export LD_LIBRARY_PATH=/share/apps/CENTOS6/python/2.7.9/lib:$LD_LIBRARY_PATH which python export PATH=/cm/shared/apps/cuda50/toolkit/5.0.35/bin:$PATH export PATH=/cm/local/apps/cuda50/libs/304.54/bin:$PATH export LD_LIBRARY_PATH="/cm/shared/apps/cuda50/toolkit/5.0.35/lib64:/cm/shared/apps/cuda50/toolkit/5.0.35/lib:$LD_LIBRARY_PATH" # this is for libcuda.so export LD_LIBRARY_PATH="/cm/local/apps/cuda50/libs/304.54/lib64:/cm/local/apps/cuda50/libs/304.54/lib/:$LD_LIBRARY_PATH" cd /tmp/lammps/lammps-31Mar17/ cd src make yes-gpu make yes-colloid make yes-class2 make yes-kspace make yes-misc make yes-molecule which mpicxx which nvcc # edit lib/gpu/Makefile.auto set CUDA_HOME, this had no effect export CUDA_HOME=/cm/shared/apps/cuda50/toolkit/current export CUDA_INSTALL_PATH=/cm/shared/apps/cuda50/toolkit/5.0.35 # had to make links # /usr/local/cuda -> /cm/shared/apps/cuda50/toolkit/current # /cm/shared/apps/cuda50/toolkit/current/lib64/libcuda.so -> # /cm/local/apps/cuda50/libs/304.54/lib64/libcuda.so # note the -a flag is missing in Make.list examples, how can that be? # also -o is ignored it generates lmp_auto, totally weirdo make clean ./Make.py -v -j 2 -p colloid class2 kspace misc molecule gpu -gpu mode=double arch=35 -o gpu_double -a lib-gpu file clean mpi size ../lmp_auto text data bss dec hex filename 11831705 10608 299016 12141329 b94311 ../lmp_auto mv lmp_auto /share/apps/CENTOS6/lammps/31Mar17/lmp_gpu_double # repeat for single and mixed modes # nanosleep problem due to incomple $PATH, fixed, # need both /cm/shared... and /cm/local ... # the malaysan approach also works, see install file
Same environment as above
#source, many thanks. #http://comsics.usm.my/tlyoon/configrepo/howto/customise_centos/inst_lammps_31Mar17_gnu.txt #lammps installation (version 31Mar2017) # hmeij - edit the Makefiles and add -DLAMMPS-JPEG etc, fix path in *.lammps.installed # for -I/usr/lib64/atlas, leave FFTW lines blank cd /share/apps wget http://www2.fizik.usm.my/configrepo/howto/mylammps/lammps-31Mar17.tar tar -xvf lammps-31Mar17.tar cd lammps-31Mar17 mylammpsdir=$(echo $PWD) cd $mylammpsdir/src/MAKE wget http://comsics.usm.my/tlyoon/configrepo/howto/customise_centos/lammps/Makefile.gnu # atc # cd $mylammpsdir/lib/atc rm -f *.a *.o wget http://comsics.usm.my/tlyoon/configrepo/howto/customise_centos/lammps/Makefile.lammps.installed.atc rm -rf Makefile.lammps.installed cp Makefile.lammps.installed.atc Makefile.lammps.installed make -f Makefile.mpic++ cd $mylammpsdir/lib/awpmd rm -f *.a *.o wget http://comsics.usm.my/tlyoon/configrepo/howto/customise_centos/lammps/Makefile.lammps.installed.awpmd rm -rf Makefile.lammps.installed cp Makefile.lammps.installed.awpmd Makefile.lammps.installed make -f Makefile.mpicc # linalg # cd $mylammpsdir/lib/linalg rm -f *.a *.o make -f Makefile.gfortran # gpu NVIDIA CUDA on n33 comppute node cd $mylammpsdir/lib/gpu rm -f *.a *.o make -f Makefile.linux.double # meam # cd $mylammpsdir/lib/meam rm -f *.a *.o make -f Makefile.gfortran # poems # cd $mylammpsdir/lib/poems rm -f *.a *.o make -f Makefile.g++ # reax # cd $mylammpsdir/lib/reax rm -f *.a *.o make -f Makefile.gfortran cd $mylammpsdir/lib/qmmm rm -f *.a *.o make -f Makefile.gfortran cd $mylammpsdir/lib/colvars rm -f *.a *.o make -f Makefile.g++ # Now exit /lib cd $mylammpsdir/src make yes-all make no-user-quip make no-user-smd make no-user-vtk make no-kim make no-voronoi make no-kokkos make no-mscg make no-user-h5md # then serial & double make gnu make gpu
# serial and mpi #!/bin/bash # submit via 'bsub < run.gpu' rm -f out err auout.[0-9]* #BSUB -e err #BSUB -o out #BSUB -q mw128 #BSUB -J "GPU test" #BSUB -n 4 # unique job scratch dirs MYSANSCRATCH=/sanscratch/$LSB_JOBID MYLOCALSCRATCH=/localscratch/$LSB_JOBID export MYSANSCRATCH MYLOCALSCRATCH cd $MYSANSCRATCH # LAMMPS export LD_LIBRARY_PATH=/share/apps/CENTOS6/openmpi/1.8.4/lib/:$LD_LIBRARY_PATH export PATH=/share/apps/CENTOS6/openmpi/1.8.4/bin:$PATH export PATH=/cm/shared/apps/cuda50/toolkit/5.0.35/bin:$PATH export LD_LIBRARY_PATH="/cm/shared/apps/cuda50/toolkit/5.0.35/lib64:/cm/shared/apps/cuda50/toolkit/5.0.35/lib:$LD_LIBRARY_PATH" export LD_LIBRARY_PATH="/cm/local/apps/cuda50/libs/304.54/lib64:/cm/local/apps/cuda50/libs/304.54/lib/:$LD_LIBRARY_PATH" export CUDA_HOME=/cm/shared/apps/cuda50/toolkit/current export CUDA_INSTALL_PATH=/cm/shared/apps/cuda50/toolkit/5.0.35 export PATH=/share/apps/CENTOS6/python/2.7.9/bin:$PATH export LD_LIBRARY_PATH=/share/apps/CENTOS6/python/2.7.9/lib:$LD_LIBRARY_PATH ldd /share/apps/CENTOS6/lammps/31Mar17/lmp_mpi # GPUIDX=1 use allocated GPU(s), GPUIDX=0 cpu run only (view header au.inp) export GPUIDX=0 # <---------- flag, change lmp_serial to lmp_mpi below # stage the data scp -r ~/lammps/colloid-gpu/* . # feed the binary /share/apps/CENTOS6/lammps/31Mar17/lmp_mpi -h /share/apps/CENTOS6/lammps/31Mar17/lmp_mpi \ -var GPUIDX $GPUIDX -in in.colloid -l out.colloid # save results scp out.colloid $HOME/lammps/colloid-gpu/out.colloid.$LSB_JOBID
And …
# gpu #!/bin/bash # submit via 'bsub < run.gpu' rm -f out err #BSUB -e err #BSUB -o out #BSUB -q mwgpu #BSUB -J "GPU test" #BSUB -m n33 #BSUB -n 1 #BSUB -R "rusage[gpu=1:mem=6144],span[hosts=1]" # unique job scratch dirs MYSANSCRATCH=/sanscratch/$LSB_JOBID MYLOCALSCRATCH=/localscratch/$LSB_JOBID export MYSANSCRATCH MYLOCALSCRATCH cd $MYSANSCRATCH # LAMMPS export LD_LIBRARY_PATH=/share/apps/CENTOS6/openmpi/1.8.4/lib/:$LD_LIBRARY_PATH export PATH=/share/apps/CENTOS6/openmpi/1.8.4/bin:$PATH export PATH=/cm/shared/apps/cuda50/toolkit/5.0.35/bin:$PATH export PATH=/cm/local/apps/cuda50/libs/304.54/bin:$PATH export LD_LIBRARY_PATH="/cm/shared/apps/cuda50/toolkit/5.0.35/lib64:/cm/shared/apps/cuda50/toolkit/5.0.35/lib:$LD_LIBRARY_PATH" export LD_LIBRARY_PATH="/cm/local/apps/cuda50/libs/304.54/lib64:/cm/local/apps/cuda50/libs/304.54/lib/:$LD_LIBRARY_PATH" export CUDA_HOME=/cm/shared/apps/cuda50/toolkit/current export CUDA_INSTALL_PATH=/cm/shared/apps/cuda50/toolkit/5.0.35 export PATH=/share/apps/CENTOS6/python/2.7.9/bin:$PATH export LD_LIBRARY_PATH=/share/apps/CENTOS6/python/2.7.9/lib:$LD_LIBRARY_PATH ldd /share/apps/CENTOS6/lammps/31Mar17/lmp_gpu_double # GPUIDX=1 use allocated GPU(s), GPUIDX=0 cpu run only (view header au.inp) export GPUIDX=1 # stage the data scp -r ~/lammps/colloid-gpu/* . # feed the wrapper openmpi-mpirun-gpu /share/apps/CENTOS6/lammps/31Mar17/lmp_gpu -h openmpi-mpirun-gpu /share/apps/CENTOS6/lammps/31Mar17/lmp_gpu \ -suffix gpu -var GPUIDX $GPUIDX -in in.colloid -l lmp_gpu.out.colloid openmpi-mpirun-gpu /share/apps/CENTOS6/lammps/31Mar17/lmp_gpu_double -h openmpi-mpirun-gpu /share/apps/CENTOS6/lammps/31Mar17/lmp_gpu_double \ -suffix gpu -var GPUIDX $GPUIDX -in in.colloid -l lmp_gpu_double.out.colloid ## save results scp lmp_gpu.out.colloid $HOME/lammps/colloid-gpu/lmp_gpu.out.colloid.$LSB_JOBID scp lmp_gpu_double.out.colloid $HOME/lammps/colloid-gpu/lmp_gpu_double.out.colloid.$LSB_JOBID