This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision | ||
cluster:119 [2013/08/21 18:59] hmeij |
cluster:119 [2021/06/17 19:32] (current) hmeij07 |
||
---|---|---|---|
Line 1: | Line 1: | ||
\\ | \\ | ||
**[[cluster: | **[[cluster: | ||
- | |||
- | Jobs need to be submitted to the scheduler on host sharptail itself for now and will be dispatched to nodes n33-n37 in queue mwgpu. | ||
- | --- // | ||
==== Submitting GPU Jobs ==== | ==== Submitting GPU Jobs ==== | ||
+ | |||
+ | Please plenty of time between multiple GPU job submissions. | ||
+ | |||
+ | Jobs need to be submitted to the scheduler via cottontail to queues mwgpu, amber128, exx96. | ||
+ | |||
+ | This page is old, the gpu resource '' | ||
+ | --- // | ||
+ | |||
+ | **Articles** | ||
+ | |||
+ | * [[http:// | ||
+ | * [[http:// | ||
+ | |||
Line 44: | Line 54: | ||
</ | </ | ||
- | With '' | + | With '' |
< | < | ||
Line 58: | Line 68: | ||
3 Tesla K20m 21 C 0 % | 3 Tesla K20m 21 C 0 % | ||
==================================================== | ==================================================== | ||
+ | |||
+ | [hmeij@sharptail sharptail]$ ssh n33 gpu-free | ||
+ | 1,3,0 | ||
+ | |||
+ | |||
</ | </ | ||
Line 121: | Line 136: | ||
#BSUB -q mwgpu | #BSUB -q mwgpu | ||
#BSUB -J test | #BSUB -J test | ||
+ | |||
+ | # from greentail we need to set up the module env | ||
+ | export PATH=/ | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | export LD_LIBRARY_PATH=/ | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | |||
## leave sufficient time between job submissions (30-60 secs) | ## leave sufficient time between job submissions (30-60 secs) | ||
Line 171: | Line 202: | ||
</ | </ | ||
+ | |||
+ | ==== gromacs.sub ==== | ||
+ | |||
+ | < | ||
+ | |||
+ | #!/bin/bash | ||
+ | |||
+ | rm -rf gromacs.out gromacs.err \#* *.log | ||
+ | |||
+ | # from greentail we need to recreate module env | ||
+ | export PATH=/ | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | export LD_LIBRARY_PATH=/ | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | |||
+ | #BSUB -o gromacs.out | ||
+ | #BSUB -e gromacs.err | ||
+ | #BSUB -N | ||
+ | #BSUB -J 325monolayer | ||
+ | |||
+ | # read / | ||
+ | . / | ||
+ | export VMDDIR=/ | ||
+ | |||
+ | ## CPU RUN: queue mw256, n<=28, must run on one node (thread_mpi) | ||
+ | ##BSUB -q mw256 | ||
+ | ##BSUB -n 2 | ||
+ | ##BSUB -R " | ||
+ | #export PATH=/ | ||
+ | #. / | ||
+ | #mdrun -nt 2 -s 325topol.tpr -c 325monolayer.gro -e 325ener.edr -o 325traj.trr -x 325traj.xtc | ||
+ | |||
+ | ## GPU RUN: gpu (1-4), queue mwgpu, n (1-4, matches gpu count), must run on one node | ||
+ | ##BSUB -q mwgpu | ||
+ | ##BSUB -n 1 | ||
+ | ##BSUB -R " | ||
+ | ## signal GMXRC is a gpu run with: 1=thread_mpi | ||
+ | #export GMXRC=1 | ||
+ | #export PATH=/ | ||
+ | #. / | ||
+ | # | ||
+ | # | ||
+ | |||
+ | # GPU RUN: gpu (1-4), queue mwgpu, n (1-4, matches gpu count), must run on one node | ||
+ | #BSUB -q mwgpu | ||
+ | #BSUB -n 1 | ||
+ | #BSUB -R " | ||
+ | # signal GMXRC is a gpu run with: 2=mvapich2 | ||
+ | export GMXRC=2 | ||
+ | export PATH=/ | ||
+ | . / | ||
+ | lava.mvapich2.wrapper mdrun_mpi \ | ||
+ | -testverlet -s 325topol.tpr -c 325monolayer.gro -e 325ener.edr -o 325traj.trr -x 325traj.xtc | ||
+ | |||
+ | |||
+ | </ | ||
+ | |||
+ | ==== matlab.sub ==== | ||
+ | |||
+ | < | ||
+ | |||
+ | #!/bin/bash | ||
+ | |||
+ | rm -rf out err *.out | ||
+ | |||
+ | # from greentail we need to recreate module env | ||
+ | export PATH=/ | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | export PATH=/ | ||
+ | export LD_LIBRARY_PATH=/ | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | |||
+ | #BSUB -o out | ||
+ | #BSUB -e err | ||
+ | #BSUB -N | ||
+ | #BSUB -J test | ||
+ | |||
+ | # GPU RUN: (1-4), queue mwgpu, n (1-4, matches gpu count), must run on one node | ||
+ | #BSUB -q mwgpu | ||
+ | #BSUB -n 1 | ||
+ | #BSUB -R " | ||
+ | # signal MATGPU is a gpu run | ||
+ | export MATGPU=1 | ||
+ | lava.mvapich2.wrapper matlab -nodisplay | ||
+ | |||
+ | |||
+ | </ | ||
==== lava.mvampich2.wrapper ==== | ==== lava.mvampich2.wrapper ==== | ||
Line 275: | Line 411: | ||
#debug# echo " | #debug# echo " | ||
- | if [ $CHARMRUN -eq 1 ]; then | + | |
+ | if [ -n " | ||
+ | # gromacs needs them from base 0, so gpu 2,3 is string 01 | ||
+ | if [ ${# | ||
+ | gmxrc_gpus=" | ||
+ | elif [ ${# | ||
+ | gmxrc_gpus=" | ||
+ | elif [ ${# | ||
+ | gmxrc_gpus=" | ||
+ | elif [ ${# | ||
+ | gmxrc_gpus=" | ||
+ | fi | ||
+ | |||
+ | if [ $GMXRC -eq 1 ]; then | ||
+ | newargs=`echo ${MYARGS} | sed " | ||
+ | echo " | ||
+ | $newargs | ||
+ | elif [ $GMXRC -eq 2 ]; then | ||
+ | newargs=`echo ${MYARGS} | sed " | ||
+ | echo " | ||
+ | ${MPIRUN} -ssh -hostfile ${MACHFILE} -np $gpunp $newargs | ||
+ | fi | ||
+ | |||
+ | elif [ -n " | ||
+ | echo " | ||
+ | ${MYARGS} | ||
+ | elif [ -n " | ||
cat ${MACHFILE}.lst | tr '\/ ' ' | cat ${MACHFILE}.lst | tr '\/ ' ' | ||
echo " | echo " | ||
Line 283: | Line 445: | ||
${MPIRUN} -ssh -hostfile ${MACHFILE} -np $gpunp ${MYARGS} | ${MPIRUN} -ssh -hostfile ${MACHFILE} -np $gpunp ${MYARGS} | ||
fi | fi | ||
+ | |||
exit $? | exit $? | ||
+ | |||
+ | |||
+ | </ | ||
+ | |||
+ | |||
+ | ===== elim code ===== | ||
+ | |||
+ | < | ||
+ | |||
+ | # | ||
+ | |||
+ | while (1) { | ||
+ | |||
+ | $gpu = 0; | ||
+ | $log = ''; | ||
+ | if (-e "/ | ||
+ | $tmp = `/ | ||
+ | @tmp = split(/ | ||
+ | foreach $i (0..$#tmp) { | ||
+ | ($a, | ||
+ | if ( $f == 0 ) { $gpu = $gpu + 1; } | ||
+ | #print "$a $f $gpu\n"; | ||
+ | $log .= " | ||
+ | } | ||
+ | } | ||
+ | # nr_of_args name1 value1 | ||
+ | $string = "1 gpu $gpu"; | ||
+ | |||
+ | $h = `hostname`; chop($h); | ||
+ | $d = `date +%m/ | ||
+ | foreach $i (' | ||
+ | if ( " | ||
+ | `echo " | ||
+ | } | ||
+ | } | ||
+ | |||
+ | # you need the \n to flush -hmeij | ||
+ | # you also need the space before the line feed -hmeij | ||
+ | print " | ||
+ | # or use | ||
+ | # | ||
+ | |||
+ | # smaller than specified in lsf.shared | ||
+ | sleep 10; | ||
+ | |||
+ | } | ||