Both sides previous revision
Previous revision
Next revision
|
Previous revision
|
cluster:119 [2013/09/24 19:28] hmeij [lava.mvampich2.wrapper] |
cluster:119 [2021/06/17 19:32] (current) hmeij07 |
\\ | \\ |
**[[cluster:0|Back]]** | **[[cluster:0|Back]]** |
| |
Jobs need to be submitted to the scheduler on host sharptail itself for now and will be dispatched to nodes n33-n37 in queue mwgpu. | |
--- //[[hmeij@wesleyan.edu|Meij, Henk]] 2013/08/21 11:01// | |
| |
==== Submitting GPU Jobs ==== | ==== Submitting GPU Jobs ==== |
| |
| Please plenty of time between multiple GPU job submissions. Like minutes. |
| |
| Jobs need to be submitted to the scheduler via cottontail to queues mwgpu, amber128, exx96. |
| |
| This page is old, the gpu resource ''gpu4'' should be used, a more recent page can be found [[cluster:173|K20 Redo Usage]]. Although there might some useful information on this page explaining gpu jobs. |
| --- //[[hmeij@wesleyan.edu|Henk]] 2021/06/17 15:29// |
| |
| **Articles** |
| |
| * [[http://www.pgroup.com/lit/articles/insider/v5n2a1.htm]] Tesla vs. Xeon Phi vs. Radeon: A Compiler Writer's Perspective |
| * [[http://www.pgroup.com/lit/articles/insider/v5n2a5.htm]] Calling CUDA Fortran kernels from MATLAB |
| |
| |
| |
</code> | </code> |
| |
With ''gpu-info'' we can view our running job. ''gpu-info'' and ''gpu-free'' are available [[http://ambermd.org/gpus/]] (I had to hard code my GPU string information as they came in at 02,03,82&83, you can use deviceQuery to find them). | With ''gpu-info'' we can view our running job. ''gpu-info'' and ''gpu-free'' are available <del>[[http://ambermd.org/gpus/]]</del> [[http://ambermd.org/gpus12/#Running]](I had to hard code my GPU string information as they came in at 02,03,82&83, you can use deviceQuery to find them). |
| |
<code> | <code> |
3 Tesla K20m 21 C 0 % | 3 Tesla K20m 21 C 0 % |
==================================================== | ==================================================== |
| |
| [hmeij@sharptail sharptail]$ ssh n33 gpu-free |
| 1,3,0 |
| |
| |
| |
</code> | </code> |
#BSUB -q mwgpu | #BSUB -q mwgpu |
#BSUB -J test | #BSUB -J test |
| |
| # from greentail we need to set up the module env |
| export PATH=/home/apps/bin:/cm/local/apps/cuda50/libs/304.54/bin:\ |
| /cm/shared/apps/cuda50/sdk/5.0.35/bin/linux/release:/cm/shared/apps/lammps/cuda/2013-01-27/:\ |
| /cm/shared/apps/amber/amber12/bin:/cm/shared/apps/namd/ibverbs-smp-cuda/2013-06-02/:\ |
| /usr/lib64/qt-3.3/bin:/usr/local/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin:/sbin:\ |
| /usr/sbin:/cm/shared/apps/cuda50/toolkit/5.0.35/bin:/cm/shared/apps/cuda50/sdk/5.0.35/bin/linux/release:\ |
| /cm/shared/apps/cuda50/libs/current/bin:/cm/shared/apps/cuda50/toolkit/5.0.35/open64/bin:\ |
| /cm/shared/apps/mvapich2/gcc/64/1.6/bin:/cm/shared/apps/mvapich2/gcc/64/1.6/sbin |
| export LD_LIBRARY_PATH=/cm/local/apps/cuda50/libs/304.54/lib64:\ |
| /cm/shared/apps/cuda50/toolkit/5.0.35/lib64:/cm/shared/apps/amber/amber12/lib:\ |
| /cm/shared/apps/amber/amber12/lib64:/cm/shared/apps/namd/ibverbs-smp-cuda/2013-06-02/:\ |
| /cm/shared/apps/cuda50/toolkit/5.0.35/lib64:/cm/shared/apps/cuda50/libs/current/lib64:\ |
| /cm/shared/apps/cuda50/toolkit/5.0.35/open64/lib:/cm/shared/apps/cuda50/toolkit/5.0.35/extras/CUPTI/lib:\ |
| /cm/shared/apps/mvapich2/gcc/64/1.6/lib |
| |
| |
## leave sufficient time between job submissions (30-60 secs) | ## leave sufficient time between job submissions (30-60 secs) |
| |
# NAMD | # NAMD |
# from greentail we need to recreate module env | |
export PATH=/home/apps/bin:/cm/local/apps/cuda50/libs/304.54/bin:\ | |
/cm/shared/apps/cuda50/sdk/5.0.35/bin/linux/release:/cm/shared/apps/lammps/cuda/2013-01-27/:\ | |
/cm/shared/apps/amber/amber12/bin:/cm/shared/apps/namd/ibverbs-smp-cuda/2013-06-02/:\ | |
/usr/lib64/qt-3.3/bin:/usr/local/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin:/sbin:\ | |
/usr/sbin:/cm/shared/apps/cuda50/toolkit/5.0.35/bin:/cm/shared/apps/cuda50/sdk/5.0.35/bin/linux/release:\ | |
/cm/shared/apps/cuda50/libs/current/bin:/cm/shared/apps/cuda50/toolkit/5.0.35/open64/bin:\ | |
/cm/shared/apps/mvapich2/gcc/64/1.6/bin:/cm/shared/apps/mvapich2/gcc/64/1.6/sbin | |
export LD_LIBRARY_PATH=/cm/local/apps/cuda50/libs/304.54/lib64:\ | |
/cm/shared/apps/cuda50/toolkit/5.0.35/lib64:/cm/shared/apps/amber/amber12/lib:\ | |
/cm/shared/apps/amber/amber12/lib64:/cm/shared/apps/namd/ibverbs-smp-cuda/2013-06-02/:\ | |
/cm/shared/apps/cuda50/toolkit/5.0.35/lib64:/cm/shared/apps/cuda50/libs/current/lib64:\ | |
/cm/shared/apps/cuda50/toolkit/5.0.35/open64/lib:/cm/shared/apps/cuda50/toolkit/5.0.35/extras/CUPTI/lib:\ | |
/cm/shared/apps/mvapich2/gcc/64/1.6/lib | |
# signal that this is charmrun/namd job | # signal that this is charmrun/namd job |
export CHARMRUN=1 | export CHARMRUN=1 |
##BSUB -q mwgpu | ##BSUB -q mwgpu |
##BSUB -n 1 | ##BSUB -n 1 |
##BSUB -R "rusage[gpu=1],span[hosts=1]" | ##BSUB -R "rusage[gpu=1:mem=7000],span[hosts=1]" |
## signal GMXRC is a gpu run with: 1=thread_mpi | ## signal GMXRC is a gpu run with: 1=thread_mpi |
#export GMXRC=1 | #export GMXRC=1 |
#BSUB -q mwgpu | #BSUB -q mwgpu |
#BSUB -n 1 | #BSUB -n 1 |
#BSUB -R "rusage[gpu=1],span[hosts=1]" | #BSUB -R "rusage[gpu=1:mem=7000],span[hosts=1]" |
# signal GMXRC is a gpu run with: 2=mvapich2 | # signal GMXRC is a gpu run with: 2=mvapich2 |
export GMXRC=2 | export GMXRC=2 |
/cm/shared/apps/cuda50/libs/current/bin:/cm/shared/apps/cuda50/toolkit/5.0.35/open64/bin:\ | /cm/shared/apps/cuda50/libs/current/bin:/cm/shared/apps/cuda50/toolkit/5.0.35/open64/bin:\ |
/cm/shared/apps/mvapich2/gcc/64/1.6/bin:/cm/shared/apps/mvapich2/gcc/64/1.6/sbin | /cm/shared/apps/mvapich2/gcc/64/1.6/bin:/cm/shared/apps/mvapich2/gcc/64/1.6/sbin |
| export PATH=/share/apps/matlab/2013a/bin:$PATH |
export LD_LIBRARY_PATH=/cm/local/apps/cuda50/libs/304.54/lib64:\ | export LD_LIBRARY_PATH=/cm/local/apps/cuda50/libs/304.54/lib64:\ |
/cm/shared/apps/cuda50/toolkit/5.0.35/lib64:/cm/shared/apps/amber/amber12/lib:\ | /cm/shared/apps/cuda50/toolkit/5.0.35/lib64:/cm/shared/apps/amber/amber12/lib:\ |
#BSUB -q mwgpu | #BSUB -q mwgpu |
#BSUB -n 1 | #BSUB -n 1 |
#BSUB -R "rusage[gpu=1],span[hosts=1]" | #BSUB -R "rusage[gpu=1:mem=7000],span[hosts=1]" |
# signal MATGPU is a gpu run | # signal MATGPU is a gpu run |
export MATGPU=1 | export MATGPU=1 |
| |
exit $? | exit $? |
| |
| |
| </code> |
| |
| |
| ===== elim code ===== |
| |
| <code> |
| |
| #!/usr/bin/perl |
| |
| while (1) { |
| |
| $gpu = 0; |
| $log = ''; |
| if (-e "/usr/local/bin/gpu-info" ) { |
| $tmp = `/usr/local/bin/gpu-info | egrep "Tesla K20"`; |
| @tmp = split(/\n/,$tmp); |
| foreach $i (0..$#tmp) { |
| ($a,$b,$c,$d,$e,$f,$g) = split(/\s+/,$tmp[$i]); |
| if ( $f == 0 ) { $gpu = $gpu + 1; } |
| #print "$a $f $gpu\n"; |
| $log .= "$f,"; |
| } |
| } |
| # nr_of_args name1 value1 |
| $string = "1 gpu $gpu"; |
| |
| $h = `hostname`; chop($h); |
| $d = `date +%m/%d/%y_%H:%M:%S`; chop($d); |
| foreach $i ('n33','n34','n35','n36','n37') { |
| if ( "$h" eq "$i" ) { |
| `echo "$d,$log" >> /share/apps/logs/$h.gpu.log`; |
| } |
| } |
| |
| # you need the \n to flush -hmeij |
| # you also need the space before the line feed -hmeij |
| print "$string \n"; |
| # or use |
| #syswrite(OUT,$string,1); |
| |
| # smaller than specified in lsf.shared |
| sleep 10; |
| |
| } |
| |
| |