This is an old revision of the document!
[hmeij@cottontail lammps]$ bsub < blcr_wrapper Job <681> is submitted to queue <test>. [hmeij@cottontail lammps]$ bjobs JOBID USER STAT QUEUE FROM_HOST EXEC_HOST JOB_NAME SUBMIT_TIME 681 hmeij PEND test cottontail test Mar 29 13:23 [hmeij@cottontail lammps]$ bjobs JOBID USER STAT QUEUE FROM_HOST EXEC_HOST JOB_NAME SUBMIT_TIME 681 hmeij RUN test cottontail petaltail test Mar 29 13:23 petaltail petaltail petaltail [hmeij@cottontail lammps]$ tail ~/.lsbatch/1459272204.681.out 160 4062132.4 -4439564.1 1.4618689e+08 2.8818933e+08 17552.743 170 4395340.9 -4440084.6 1.3417499e+08 2.8818925e+08 19150.394 180 4711438.8 -4440426.5 1.2277977e+08 2.8818918e+08 20665.317 190 5007151.9 -4440573.2 1.1211925e+08 2.8818913e+08 22081.756 200 5279740.3 -4440516 1.0229219e+08 2.8818909e+08 23386.523 210 5527023.5 -4440257.5 93377214 2.8818906e+08 24569.109 220 5747387.7 -4439813.3 85432510 2.8818904e+08 25621.734 230 5939773.3 -4439214.4 78496309 2.8818904e+08 26539.282 240 6103647.2 -4438507.6 72587871 2.8818905e+08 27319.145 250 6238961.8 -4437755.5 67708974 2.8818907e+08 27961.064 [hmeij@cottontail lammps]$ ll /sanscratch/checkpoints/681 total 30572 -rw------- 1 hmeij its 8704 Mar 29 13:28 1459272204.681.err -rw------- 1 hmeij its 5686 Mar 29 13:28 1459272204.681.out -rw-r--r-- 1 hmeij its 2652 Mar 29 13:28 au.inp -rw-r--r-- 1 hmeij its 0 Mar 29 13:28 auout -rw-r--r-- 1 hmeij its 38310 Mar 29 13:28 auu3 -r-------- 1 hmeij its 289714 Mar 29 13:28 chk.9127 -rw-r--r-- 1 hmeij its 21342187 Mar 29 13:28 data.Big11AuSAMInitial -rw-r--r-- 1 hmeij its 9598629 Mar 29 13:28 henz.dump drwx------ 3 hmeij its 46 Mar 29 13:28 ompi_global_snapshot_9134.ckpt -rw-r--r-- 1 hmeij its 16 Mar 29 13:23 pwd.9127 [hmeij@cottontail lammps]$ ssh petaltail ps -u hmeij PID TTY TIME CMD 5762 ? 00:00:00 sshd 5763 pts/1 00:00:00 bash 9104 ? 00:00:00 res 9110 ? 00:00:00 1459272204.681 9113 ? 00:00:00 1459272204.681. 9127 ? 00:00:00 cr_mpirun 9128 ? 00:00:00 blcr_watcher 9133 ? 00:00:00 cr_mpirun 9134 ? 00:00:00 mpirun 9135 ? 00:00:00 sleep 9136 ? 00:05:55 lmp_mpi 9137 ? 00:06:07 lmp_mpi 9138 ? 00:05:52 lmp_mpi 9139 ? 00:05:53 lmp_mpi 9347 ? 00:00:00 sleep 9369 ? 00:00:00 sshd 9370 ? 00:00:00 ps 18559 pts/2 00:00:00 bash [hmeij@cottontail lammps]$ tail ~/.lsbatch/1459272204.681.out 190 5007151.9 -4440573.2 1.1211925e+08 2.8818913e+08 22081.756 200 5279740.3 -4440516 1.0229219e+08 2.8818909e+08 23386.523 210 5527023.5 -4440257.5 93377214 2.8818906e+08 24569.109 220 5747387.7 -4439813.3 85432510 2.8818904e+08 25621.734 230 5939773.3 -4439214.4 78496309 2.8818904e+08 26539.282 240 6103647.2 -4438507.6 72587871 2.8818905e+08 27319.145 250 6238961.8 -4437755.5 67708974 2.8818907e+08 27961.064 260 6346104.5 -4437033.7 63845731 2.881891e+08 28466.852 270 6425840.1 -4436426.7 60970646 2.8818913e+08 28840.129 280 6479251.9 -4436021.1 59044759 2.8818917e+08 29086.075 [hmeij@cottontail lammps]$ ssh petaltail kill 9133 [hmeij@cottontail lammps]$ bsub < blcr_wrapper Job <684> is submitted to queue <test>. [hmeij@cottontail lammps]$ rm -f ../.lsbatch/*^C [hmeij@cottontail lammps]$ bjobs JOBID USER STAT QUEUE FROM_HOST EXEC_HOST JOB_NAME SUBMIT_TIME 684 hmeij RUN test cottontail petaltail test Mar 29 13:48 petaltail petaltail petaltail [hmeij@cottontail lammps]$ ll ../.lsbatch/ total 172 -rw------- 1 hmeij its 8589 Mar 29 13:48 1459272204.681.err -rw------- 1 hmeij its 5686 Mar 29 13:48 1459272204.681.out -rwx------ 1 hmeij its 4609 Mar 29 13:48 1459273700.684 -rw------- 1 hmeij its 9054 Mar 29 13:48 1459273700.684.err -rw------- 1 hmeij its 53 Mar 29 13:48 1459273700.684.out -rwxr--r-- 1 hmeij its 4270 Mar 29 13:48 1459273700.684.shell lrwxrwxrwx 1 hmeij its 33 Mar 29 13:48 hostfile.681 -> /home/hmeij/.lsbatch/hostfile.684 -rw-r--r-- 1 hmeij its 40 Mar 29 13:48 hostfile.684 -rw-r--r-- 1 hmeij its 40 Mar 29 13:48 hostfile.tmp.684 [hmeij@cottontail lammps]$ less ../.lsbatch/*684.err [hmeij@cottontail lammps]$ ssh petaltail ps -u hmeij PID TTY TIME CMD 5762 ? 00:00:00 sshd 5763 pts/1 00:00:00 bash 9127 ? 00:00:00 cr_mpirun 9136 ? 00:00:34 lmp_mpi 9137 ? 00:00:34 lmp_mpi 9138 ? 00:00:34 lmp_mpi 9139 ? 00:00:34 lmp_mpi 9994 ? 00:00:00 res 10002 ? 00:00:00 1459273700.684 10005 ? 00:00:00 1459273700.684. 10039 ? 00:00:00 cr_restart 10051 ? 00:00:00 cr_mpirun 10052 ? 00:00:00 mpirun 10053 ? 00:00:00 blcr_watcher 10054 ? 00:00:00 sleep 10055 ? 00:00:00 sleep 10056 ? 00:00:01 cr_restart 10057 ? 00:00:01 cr_restart 10058 ? 00:00:02 cr_restart 10059 ? 00:00:02 cr_restart 10151 ? 00:00:00 sshd 10152 ? 00:00:00 ps 18559 pts/2 00:00:00 bash [hmeij@cottontail lammps]$ tail -20 ../.lsbatch/1459272204.681.out 210 5527023.5 -4440257.5 93377214 2.8818906e+08 24569.109 220 5747387.7 -4439813.3 85432510 2.8818904e+08 25621.734 230 5939773.3 -4439214.4 78496309 2.8818904e+08 26539.282 240 6103647.2 -4438507.6 72587871 2.8818905e+08 27319.145 250 6238961.8 -4437755.5 67708974 2.8818907e+08 27961.064 260 6346104.5 -4437033.7 63845731 2.881891e+08 28466.852 270 6425840.1 -4436426.7 60970646 2.8818913e+08 28840.129 280 6479251.9 -4436021.1 59044759 2.8818917e+08 29086.075 290 6507681 -4435898.2 58019799 2.8818922e+08 29211.089 300 6512669 -4436124.7 57840251 2.8818927e+08 29222.575 310 6495904.7 -4436745.3 58445285 2.8818932e+08 29128.647 320 6459174.9 -4437776.1 59770495 2.8818937e+08 28937.93 330 6404322.4 -4439201.5 61749434 2.8818942e+08 28659.348 340 6333209 -4440973.8 64314930 2.8818947e+08 28301.927 350 6247685.4 -4443016.1 67400192 2.8818951e+08 27874.684 360 6149565.9 -4445228.2 70939709 2.8818956e+08 27386.465 370 6040609.2 -4447492.8 74869965 2.8818961e+08 26845.871 380 5922503.2 -4449683.5 79129981 2.8818965e+08 26261.166 390 5796854.1 -4451671.6 83661722 2.8818969e+08 25640.235 400 5665179.3 -4453332 88410367 2.8818972e+08 24990.519