cluster:134
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| cluster:134 [2014/08/19 15:46] – hmeij | cluster:134 [2014/08/22 13:05] (current) – [MPI] hmeij | ||
|---|---|---|---|
| Line 101: | Line 101: | ||
| [[https:// | [[https:// | ||
| + | |||
| + | Vanilla out of the box with these changes | ||
| * MaxJobCount=120000 | * MaxJobCount=120000 | ||
| Line 111: | Line 113: | ||
| |100, | |100, | ||
| - | The N=16 is 8 one core VMs and swallowtail itself (8 cores). So we' | + | The N=16 is 8 one core VMs and swallowtail itself (8 cores). So we' |
| Line 119: | Line 121: | ||
| </ | </ | ||
| - | After fixing that. (I also added a proplog/ | + | After fixing that. Hmmm. |
| + | ^NrJobs^N^hh: | ||
| + | | 1, | ||
| + | |10, | ||
| + | |15, | ||
| + | |20, | ||
| - | ^NrJobs^N^hh: | ||
| - | |50, | ||
| + | Debug Level is 3 above. Falling back to proctrack/ | ||
| + | |||
| + | ^NrJobs^N^hh: | ||
| + | | 1, | ||
| + | |10, | ||
| + | |25, | ||
| + | |50, | ||
| + | |75, | ||
| + | |100, | ||
| + | |||
| + | Next I will add a proplog/ | ||
| + | / | ||
| + | |||
| + | < | ||
| + | #!/bin/bash | ||
| + | / | ||
| + | |||
| + | #SBATCH --job-name=" | ||
| + | #SBATCH --output=" | ||
| + | #SBATCH --begin=10: | ||
| + | |||
| + | # unique job scratch dir | ||
| + | export MYLOCALSCRATCH=/ | ||
| + | cd $MYLOCALSCRATCH | ||
| + | pwd | ||
| + | |||
| + | echo " | ||
| + | date >> foo | ||
| + | cat foo | ||
| + | |||
| + | / | ||
| + | </ | ||
| + | |||
| + | |||
| + | ^NrJobs^N^hh: | ||
| + | | 1, | ||
| + | | 5, | ||
| + | |10, | ||
| + | |25, | ||
| + | |||
| + | |||
| + | ==== MPI ==== | ||
| + | |||
| + | With '' | ||
| + | |||
| + | < | ||
| + | |||
| + | #!/bin/bash | ||
| + | #/ | ||
| + | |||
| + | #SBATCH --job-name=" | ||
| + | #SBATCH --ntasks=8 | ||
| + | #SBATCH --begin=now | ||
| + | |||
| + | # unique job scratch dir | ||
| + | #export MYLOCALSCRATCH=/ | ||
| + | #cd $MYLOCALSCRATCH | ||
| + | |||
| + | echo " | ||
| + | |||
| + | rm -rf err out logfile mdout restrt mdinfo | ||
| + | |||
| + | export PATH=/ | ||
| + | export LD_LIBRARY_PATH=/ | ||
| + | which mpirun | ||
| + | |||
| + | mpirun / | ||
| + | -i inp/mini.in -p 1g6r.cd.parm -c 1g6r.cd.randions.crd.1 \ | ||
| + | -ref 1g6r.cd.randions.crd.1 | ||
| + | |||
| + | #/ | ||
| + | |||
| + | </ | ||
| + | |||
| + | When submitted we see | ||
| + | |||
| + | < | ||
| + | |||
| + | JOBID PARTITION | ||
| + | 902246 | ||
| + | |||
| + | </ | ||
| + | |||
| + | Dumping the environment we observe some key parameters | ||
| + | |||
| + | < | ||
| + | |||
| + | SLURM_NODELIST=v[1-8] | ||
| + | SLURM_JOB_NAME=MPI | ||
| + | SLURMD_NODENAME=v1 | ||
| + | SLURM_NNODES=8 | ||
| + | SLURM_NTASKS=8 | ||
| + | SLURM_TASKS_PER_NODE=1(x8) | ||
| + | SLURM_NPROCS=8 | ||
| + | SLURM_CPUS_ON_NODE=1 | ||
| + | SLURM_JOB_NODELIST=v[1-8] | ||
| + | SLURM_JOB_CPUS_PER_NODE=1(x8) | ||
| + | SLURM_JOB_NUM_NODES=8 | ||
| + | |||
| + | </ | ||
| + | |||
| + | And in the slurmjob.log file | ||
| + | |||
| + | < | ||
| + | |||
| + | JobId=902245 UserId=hmeij(8216) GroupId=its(623) \ | ||
| + | Name=MPI JobState=COMPLETED Partition=test TimeLimit=UNLIMITED \ | ||
| + | StartTime=2014-08-21T15: | ||
| + | NodeList=v[1-8] NodeCnt=8 ProcCnt=8 WorkDir=/ | ||
| + | |||
| + | </ | ||
| - | Debug Level is 3. Maybe go to 1. | ||
| \\ | \\ | ||
| **[[cluster: | **[[cluster: | ||
cluster/134.1408463191.txt.gz · Last modified: (external edit)
