User Tools

Site Tools


cluster:192

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
Next revision Both sides next revision
cluster:192 [2020/02/17 19:47]
hmeij07 [EXX96]
cluster:192 [2020/02/24 14:08]
hmeij07
Line 7: Line 7:
  
 Page best read bottom to top. Page best read bottom to top.
-==== WhatWeDo? ====+ 
 +==== Miscellaneous ==== 
 + 
 +<code> 
 + 
 +# propagate global passwd, shadow, group, hosts file 
 + 
 +# add to date_ctt2.sh script, get and set date 
 + 
 +NOW=`/bin/date +%m%d%H%M%Y.%S` 
 +for i in `seq 79 90`; do echo n$i; ssh n$i date $NOW; done 
 + 
 +# crontab 
 + 
 +# ionice gaussian 
 +0,15,30,45 * * * * /share/apps/scripts/ionice_lexes.sh  > /dev/null 2>&
 + 
 +# cpu temps 
 +40 * * * * /share/apps/scripts/lm_sensors.sh > /dev/null 2>&
 +  
 +# rc.local, chmod o+x /etc/rc.d/rc.local, then add 
 + 
 +# for mapd, 'All On' enable graphicsrendering support 
 +#/usr/bin/nvidia-smi --gom=0 
 + 
 +# for amber16 -pm=ENABLED -c=EXCLUSIVE_PROCESS 
 +#nvidia-smi --persistence-mode=1 
 +#nvidia-smi --compute-mode=1 
 + 
 +# for mwgpu/exx96 -pm=ENABLED -c=DEFAULT 
 +nvidia-smi --persistence-mode=1 
 +nvidia-smi --compute-mode=0 
 + 
 +# turn ECC off (memory scrubbing) 
 +#/usr/bin/nvidia-smi -e 0 
 + 
 +# lm_sensor 
 +modprobe coretemp 
 +modprobe tmp401 
 +#modprobe w83627ehf 
 +  
 +reboot 
 + 
 +</code> 
 + 
 +==== Recipe ====
  
 Steps. "Ala n37" ... so the RTX nodes are similar to the K20 nodes and we can put the local software in place. See [[cluster:172|K20 Redo]] page.  First we add these packages and clean up. Steps. "Ala n37" ... so the RTX nodes are similar to the K20 nodes and we can put the local software in place. See [[cluster:172|K20 Redo]] page.  First we add these packages and clean up.
Line 13: Line 58:
 <code> <code>
  
-yum install epel-release +# hook up VDI-D cable to GPU port (offboard video) 
-yum install tcl tcl-devel dmtcp +# login as root check some things out... 
-yum install freeglut-devel libXi-devel libXmu-devel \ make mesa-libGLU-devel +free -g 
-yum install blas blas-devel lapack lapack-devel boost boost-devel +nvidia-smi 
-yum install tkinter lm_sensors lm_sensors-libs +docker images 
-yum install zlib-devel bzip2-devel bzip bzip-devel +docker ps 
-yum install openmpi openmpi-devel perl-ExtUtils-MakeMaker +# set local time zone 
-yum install cmake cmake-devel +mv /etc/localtime /etc/localtime.backup 
-yum install libjpeg libjpeg-devel libjpeg-turbo-devel+ln -s /usr/share/zoneinfo/America/New_York /etc/localtime 
 +# change passwords for root and vendor account 
 +passwd 
 +passwd exx 
 +# set hostname 
 +hostnamectl set-hostname n79 
 +# configure private subnets and ping file server 
 +cd /etc/sysconfig/network-scripts/ 
 +vi ifcfg-enp1s0f0 
 +vi ifcfg-enp1s0f1 
 +systemctl restart network 
 +ping -c 3 192.168.102.42 
 +ping -c 3 10.10.102.42 
 +# make internet connection for yum 
 +ifdown enp1s0f0 
 +vi ifcfg-enp1s0f0 
 +systemctl restart network 
 +dig google.com 
 +yum install -y iptables-services 
 +vi /etc/sysconfig/iptables 
 +systemctl start iptables 
 +iptables -L 
 +systemctl stop firewalld 
 +systemctl disable firewalld 
 +# other configs 
 +vi /etc/selinux/config (disabled) 
 +mv /home /usr/local/ 
 +mkdir /home 
 +vi /etc/passwd (exx, dockeruser $HOME) 
 +mkdir /sanscratch /localscratch 
 +chmod ugo+rwx /sanscratch /localscratch 
 +chmod o+t /sanscratch /localscratch 
 +ln -s /home /share 
 +ssh-keygen -t rsa 
 +scp 10.10.102.253:/root/.ssh/authorized_keys /root/.ssh/ 
 +/etc/ssh/sshd_config (PermitRootLogin) 
 +echo "relayhost = 192.168.102.42" >> /etc/postfix/main.cf 
 +# add packages and update 
 +yum install epel-release -y 
 +yum install tcl tcl-devel dmtcp -y 
 +yum install freeglut-devel libXi-devel libXmu-devel \ make mesa-libGLU-devel -y 
 +yum install blas blas-devel lapack lapack-devel boost boost-devel -y 
 +yum install tkinter lm_sensors lm_sensors-libs -y 
 +yum install zlib-devel bzip2-devel bzip bzip-devel -y 
 +yum install openmpi openmpi-devel perl-ExtUtils-MakeMaker -y 
 +yum install cmake cmake-devel -y 
 +yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y 
 +yum update -y
 yum clean all yum clean all
 +# remove internet, bring private back up
 +ifdown enp1s0f0
 +vi ifcfg-enp1s0f0
 +ifup enp1s0f0
 +# passwd, shadow, group, hosts, fstab
 +mkdir /homeextra1 /homeextra2 /home33 /mindstore
 +cd /etc/
 +# backup files to -orig versions
 +scp 192.168.102.89:/etc/passwd /etc/passwd (and others)
 +scp 10.10.102.89:/etc/fstab /tmp
 +vi /etc/fstab
 +mount -a; df -h
 +# pick the kernel vendor used for now
 +grep ^menuentry /etc/grub2.cfg
 +grub2-set-default 1
 +ls -d /sys/firmware/efi && echo "EFI" || echo "Legacy"
 +grub2-mkconfig -o /boot/grub2/grub.cfg
 +#grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg
 +# old level 3
 +systemctl set-default multi-user.target
 +reboot
 +# switch to VGA
 +cd /usr/local/src/
 +tar zxf n37.chroot-keep.ul.tar.gz
 +cd usr/local/
 +mv amber16/  fsl-5.0.10/ gromacs-2018/ lammps-22Aug18/ /usr/local/
 +mv cuda-9.2/ /usr/local/n37-cuda-9.2/
 +cd /usr/local/bin/
 +rsync -vac 10.10.102.89:/usr/local/bin/ /usr/local/bin/
 +# test scripts gpu-free, gpu-info, gpu-process
 +0,1,2,3
 +id,name,temp.gpu,mem.used,mem.free,util.gpu,util.mem
 +0, GeForce RTX 2080 SUPER, 25, 126 MiB, 7855 MiB, 0 %, 0 %
 +1, GeForce RTX 2080 SUPER, 24, 11 MiB, 7971 MiB, 0 %, 0 %
 +2, GeForce RTX 2080 SUPER, 23, 11 MiB, 7971 MiB, 0 %, 0 %
 +3, GeForce RTX 2080 SUPER, 23, 11 MiB, 7971 MiB, 0 %, 0 %
 +gpu_name, gpu_bus_id, pid, process_name
 +GeForce RTX 2080 SUPER, 00000000:3B:00.0, 3109, python
 +# done
  
 </code> </code>
  
-==== WhatWeGot? ====+==== What We Purchased ====
  
   * 12 nodes yielding a total of   * 12 nodes yielding a total of
Line 118: Line 249:
  
 {{:cluster:ssd_small.JPG?nolink&300|}} Yea, found 1T SSD \\ {{:cluster:ssd_small.JPG?nolink&300|}} Yea, found 1T SSD \\
-{{:cluster:hdmi_small.JPG?nolink&300|}} HDMI ports on gpu \\+{{:cluster:hdmi_small.JPG?nolink&300|}} ports on gpu \\
 {{:cluster:gpu_small.JPG?nolink&300|}} GPU detail, blower model \\ {{:cluster:gpu_small.JPG?nolink&300|}} GPU detail, blower model \\
 {{:cluster:back_small.JPG?nolink&300|}} Back, gpus stacked 2 on 2 \\ {{:cluster:back_small.JPG?nolink&300|}} Back, gpus stacked 2 on 2 \\
cluster/192.txt · Last modified: 2022/03/08 18:29 by hmeij07