User Tools

Site Tools


cluster:224

Warning: Undefined array key -1 in /usr/share/dokuwiki/inc/html.php on line 1458

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
cluster:224 [2023/10/13 11:26]
hmeij07
cluster:224 [2024/01/12 09:36] (current)
hmeij07
Line 7: Line 7:
  
 <code> <code>
 +
 +# first step
 +yum update -y  # get to the latest
 +reboot
  
 # IP ranges # IP ranges
Line 12: Line 16:
 10.10.102.48 n38-eth1 10.10.102.48 n38-eth1
 10.11.103.48 n38-ib0 10.11.103.48 n38-ib0
 +DEVROUTE=yes # others no
 +GATEWAY=192,168.102.251 # greentail52
  
 cd /etc/sysconfig/network-scripts/ cd /etc/sysconfig/network-scripts/
 vi ifcfg-eth0 # 192.168.102.x vi ifcfg-eth0 # 192.168.102.x
 vi ifcfg-eth1 # 10.10.102.x   # 'uuidgen eth1' to get uuid vi ifcfg-eth1 # 10.10.102.x   # 'uuidgen eth1' to get uuid
 +
 +# or via  rc.local? see n102
 +vi ifcfg-ib0
 +DEVICE=ib0
 +ONBOOT=yes
 +MTU=65520
 +CONNECTED_MODE=yes
 +BOOTPROTO=none
 +IPADDR=10.11.103.48
 +PREFIX=16
 +# check with ibstat
 + Port 1:
 + State: Active
 + Physical state: LinkUp
 +# check with ethtool ib0
 + Speed: 40000Mb/s
  
 # root: sync cottontail's master and known_hosts (tails+stores) # root: sync cottontail's master and known_hosts (tails+stores)
 ssh-keygen -t rsa ssh-keygen -t rsa
-scp 10.10.102.253:/root/.ssh/authorized_keys /root/.ssh/+scp 10.10.102.250:/root/.ssh/authorized_keys /root/.ssh/ #ctt2
 /etc/ssh/sshd_config (PermitRootLogin) /etc/ssh/sshd_config (PermitRootLogin)
 vi /etc/selinux/config # disabled, do not mistype, kernel will not boot! vi /etc/selinux/config # disabled, do not mistype, kernel will not boot!
Line 75: Line 97:
 rpm -qa | egrep  "libibverbs|libibverbs-devel" rpm -qa | egrep  "libibverbs|libibverbs-devel"
 # no # yum groupinstall "Infiniband Support" # ib already working # no # yum groupinstall "Infiniband Support" # ib already working
-yum install libibvers-devel ibutils infiniband-diags perftest qperf +yum install libibverbs-devel ibutils infiniband-diags perftest qperf -y
  
 # amber20 cmake readline error fix needs # amber20 cmake readline error fix needs
Line 107: Line 129:
 # compute nodes old level 3 # compute nodes old level 3
 systemctl set-default multi-user.target systemctl set-default multi-user.target
 +
 +### centos7 so not an OpenHPC environment
  
 # other configs # other configs
Line 128: Line 152:
 ### REST AT HOME ### REST AT HOME
  
-# or via  rc.local? see n102 
-vi ifcfg-ib0 
-DEVICE=ib0 
-ONBOOT=yes 
-MTU=65520 
-CONNECTED_MODE=yes 
-BOOTPROTO=none 
-IPADDR=10.11.103.48 
-PREFIX=16 
-# check with ibstat 
- Port 1: 
- State: Active 
- Physical state: LinkUp 
-# check with ethtool ib0 
- Speed: 40000Mb/s 
  
 # /etc/fstab # /etc/fstab
Line 178: Line 187:
 rocommunity public rocommunity public
 dontLogTCPWrappersConnects yes dontLogTCPWrappersConnects yes
-enable, start, add to zenoss +# add to zenoss 
 +systemctl enable snmpd 
 +systemctl start snmpd 
  
  
Line 190: Line 202:
 ln -s /usr/local/slurm-22.05.2 /usr/local/slurm ln -s /usr/local/slurm-22.05.2 /usr/local/slurm
  
-Put the warewulf cluster key in authorized_keys+ 
 +backup and update passwd, shadow, group and hosts files 
 +# scp from n79 or n45 
 + 
 +# slurm config 
 +echo SLURMD_OPTIONS="--conf-server 192.168.102.250" > /etc/sysconfig/slurmd 
 +  mkdir /var/log/slurm  
 +  chown slurm:munge /var/log/slurm  
 +  mkdir /var/spool/slurm  
 +  chown slurm:munge /var/spool/slurm  
 +# check 
 +chown -R munge:munge /etc/munge /var/log/munge /var/lib/munge /var/run/munge 
 +chown -R slurm:munge /var/log/slurm /var/spool/slurm 
 +systemctl enable munge 
 +systemctl start munge 
 +# test unmunge 
 +/usr/local/slurm/sbin/slurmd 
 +# check log 
 + 
 +# /etc/bashrc (login node) 
 +export PATH=/usr/local/slurm/bin:$PATH 
 +export LD_LIBRARY_PATH=/usr/local/slurm/lib:$LD_LIBRARY_PATH 
 + 
 +# crontab 
 + 
 +# ionice gaussian 
 +0,15,30,45 * * * * /share/apps/scripts/ionice_lexes.sh  > /dev/null 2>&
 + 
 +# cpu temps 
 +40 * * * * /share/apps/scripts/lm_sensors.sh > /dev/null 2>&
 + 
 +on compute node /etc/security/limits.conf 
 +*                -       memlock         270039400 
 + 
 + 
 +/etc/rc.local 
 +#timing issue with munge 
 +#sleep 15 
 +#/usr/local/slurm/sbin/slurmd 
 +chmod +x /etc/rc.d/rc.local 
 + 
 +# important!! put private back in place 
 +systemctl disable iptables 
 +systemctl stop iptables 
 +reboot 
 + 
 +# file date_ctt2.sh 
 + 
 +# ctt /etc/pdsh 
 + 
 +# ctt:/root/scripts 
 + 
 +# ctt2:/usr/local/bin/rslurm2022.sh 
 # Put eth0 fingerprints in cottontail/greentail52 known hosts # Put eth0 fingerprints in cottontail/greentail52 known hosts
-add to relevant known_hosts_servername file+ 
 +test slurm unmunge and update slurm.conf file
  
 </code> </code>
cluster/224.1697210807.txt.gz · Last modified: 2023/10/13 11:26 by hmeij07