User Tools

Site Tools


cluster:224

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
cluster:224 [2023/10/13 14:40]
hmeij07
cluster:224 [2024/01/12 14:36] (current)
hmeij07
Line 7: Line 7:
  
 <code> <code>
 +
 +# first step
 +yum update -y  # get to the latest
 +reboot
  
 # IP ranges # IP ranges
Line 12: Line 16:
 10.10.102.48 n38-eth1 10.10.102.48 n38-eth1
 10.11.103.48 n38-ib0 10.11.103.48 n38-ib0
 +DEVROUTE=yes # others no
 +GATEWAY=192,168.102.251 # greentail52
  
 cd /etc/sysconfig/network-scripts/ cd /etc/sysconfig/network-scripts/
 vi ifcfg-eth0 # 192.168.102.x vi ifcfg-eth0 # 192.168.102.x
 vi ifcfg-eth1 # 10.10.102.x   # 'uuidgen eth1' to get uuid vi ifcfg-eth1 # 10.10.102.x   # 'uuidgen eth1' to get uuid
 +
 +# or via  rc.local? see n102
 +vi ifcfg-ib0
 +DEVICE=ib0
 +ONBOOT=yes
 +MTU=65520
 +CONNECTED_MODE=yes
 +BOOTPROTO=none
 +IPADDR=10.11.103.48
 +PREFIX=16
 +# check with ibstat
 + Port 1:
 + State: Active
 + Physical state: LinkUp
 +# check with ethtool ib0
 + Speed: 40000Mb/s
  
 # root: sync cottontail's master and known_hosts (tails+stores) # root: sync cottontail's master and known_hosts (tails+stores)
 ssh-keygen -t rsa ssh-keygen -t rsa
-scp 10.10.102.253:/root/.ssh/authorized_keys /root/.ssh/+scp 10.10.102.250:/root/.ssh/authorized_keys /root/.ssh/ #ctt2
 /etc/ssh/sshd_config (PermitRootLogin) /etc/ssh/sshd_config (PermitRootLogin)
 vi /etc/selinux/config # disabled, do not mistype, kernel will not boot! vi /etc/selinux/config # disabled, do not mistype, kernel will not boot!
Line 75: Line 97:
 rpm -qa | egrep  "libibverbs|libibverbs-devel" rpm -qa | egrep  "libibverbs|libibverbs-devel"
 # no # yum groupinstall "Infiniband Support" # ib already working # no # yum groupinstall "Infiniband Support" # ib already working
-yum install libibvers-devel ibutils infiniband-diags perftest qperf +yum install libibverbs-devel ibutils infiniband-diags perftest qperf -y
  
 # amber20 cmake readline error fix needs # amber20 cmake readline error fix needs
Line 107: Line 129:
 # compute nodes old level 3 # compute nodes old level 3
 systemctl set-default multi-user.target systemctl set-default multi-user.target
 +
 +### centos7 so not an OpenHPC environment
  
 # other configs # other configs
Line 124: Line 148:
  
 ### DONE ### DONE
-undo vlan52, down iptables, reboot, test you can it via privs+undo vlan52, down iptables, reboot,  
 +test you can to it via privaate networks
 ### REST AT HOME ### REST AT HOME
  
-# or via  rc.local? 
-vi ifcfg-ib0 
-DEVICE=ib0 
-ONBOOT=yes 
-MTU=65520 
-CONNECTED_MODE=yes 
-BOOTPROTO=none 
-IPADDR=10.11.103.48 
-PREFIX=16 
  
-mkdir /sanscratch /localscratch +# /etc/fstab 
-chmod ugo+rwx /sanscratch /home/localscratch +/dev/sdb /localscratch5tb        ext4    defaults        0 0 
-chmod o+t /sanscratch /home/localscratch  + 
-ln -s /localscratch /localscratch5tb+# if sdb present 
 +mkdir /sanscratch /localscratch5tb 
 +chmod ugo+rwx /sanscratch /localscratch5tb 
 +chmod o+t /sanscratch /localscratch5tb 
 +ln -s /localscratch5tb /localscratch
  
 cd /home  cd /home 
Line 148: Line 168:
 ls -l ls -l
  
-cat /sanscratch/tmp/fstab.tmp >> /etc/fstab; mkdir /astrostore; mount -a; df -h+# ADD all the NFS mounts see greentail52:/root/n45.fstab
  
 # fstab file mounts # fstab file mounts
-mkdir -p /zfshomes /home66 /home33 /mindstore /opt/ohpc/pub /opt/intel+mkdir -p /zfshomes /home66 /home33 /mindstore /astrostore
 mkdir -p /smithlab/home;cd /smithlab;ln -s /smithlab/home/opt/rhel07 opt; ls -l mkdir -p /smithlab/home;cd /smithlab;ln -s /smithlab/home/opt/rhel07 opt; ls -l
 +mount -a # via 10.10
  
 # compute nodes /etc/chronyc.conf # compute nodes /etc/chronyc.conf
Line 162: Line 182:
 systemctl restart chronyd systemctl restart chronyd
 chronyc sources chronyc sources
- 
-cat /sanscratch/tmp/fstab.tmp >> /etc/fstab; mkdir /astrostore; mount -a; df -h; cd /smithlab/;  ln -s /smithlab/home/opt/rhel08 opt; ls -l 
  
  
Line 169: Line 187:
 rocommunity public rocommunity public
 dontLogTCPWrappersConnects yes dontLogTCPWrappersConnects yes
-enable, start, add to zenoss +# add to zenoss 
 +systemctl enable snmpd 
 +systemctl start snmpd 
  
  
 # compute nodes only # compute nodes only
-# scp fron n79:/usr/local/ +# scp from n79:/usr/local/ 
-amber16/  amber20/ fsl-5.0.10/ gromacs-2018/ lammps-22Aug18/+amber16/  amber20/ fsl-5.0.10/ gromacs-2018/ lammps-22Aug18/ 
 +scp -rp 10.10.102.89:/usr/local/amber16 . 
 + 
 +# copy slurm from n79 
 +scp -rp 10.10.102.89:/usr/local/slurm-22.05.2 . 
 +ln -s /usr/local/slurm-22.05.2 /usr/local/slurm 
 + 
 + 
 +# backup and update passwd, shadow, group and hosts files 
 +# scp from n79 or n45 
 + 
 +# slurm config 
 +echo SLURMD_OPTIONS="--conf-server 192.168.102.250" > /etc/sysconfig/slurmd 
 +  mkdir /var/log/slurm  
 +  chown slurm:munge /var/log/slurm  
 +  mkdir /var/spool/slurm  
 +  chown slurm:munge /var/spool/slurm  
 +# check 
 +chown -R munge:munge /etc/munge /var/log/munge /var/lib/munge /var/run/munge 
 +chown -R slurm:munge /var/log/slurm /var/spool/slurm 
 +systemctl enable munge 
 +systemctl start munge 
 +# test unmunge 
 +/usr/local/slurm/sbin/slurmd 
 +# check log 
 + 
 +# /etc/bashrc (login node) 
 +export PATH=/usr/local/slurm/bin:$PATH 
 +export LD_LIBRARY_PATH=/usr/local/slurm/lib:$LD_LIBRARY_PATH 
 + 
 +# crontab 
 + 
 +# ionice gaussian 
 +0,15,30,45 * * * * /share/apps/scripts/ionice_lexes.sh  > /dev/null 2>&
 + 
 +# cpu temps 
 +40 * * * * /share/apps/scripts/lm_sensors.sh > /dev/null 2>&
 + 
 +on compute node /etc/security/limits.conf 
 +*                -       memlock         270039400 
 + 
 + 
 +/etc/rc.local 
 +#timing issue with munge 
 +#sleep 15 
 +#/usr/local/slurm/sbin/slurmd 
 +chmod +x /etc/rc.d/rc.local 
 + 
 +# important!! put private back in place 
 +systemctl disable iptables 
 +systemctl stop iptables 
 +reboot 
 + 
 +# file date_ctt2.sh 
 + 
 +# ctt /etc/pdsh 
 + 
 +# ctt:/root/scripts 
 + 
 +# ctt2:/usr/local/bin/rslurm2022.sh
  
-# Put the warewulf cluster key in authorized_keys 
 # Put eth0 fingerprints in cottontail/greentail52 known hosts # Put eth0 fingerprints in cottontail/greentail52 known hosts
-add to relevant known_hosts_servername file+ 
 +test slurm unmunge and update slurm.conf file
  
 </code> </code>
cluster/224.1697208059.txt.gz · Last modified: 2023/10/13 14:40 by hmeij07