This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision | ||
cluster:224 [2023/10/13 15:52] hmeij07 |
cluster:224 [2024/01/12 14:36] (current) hmeij07 |
||
---|---|---|---|
Line 7: | Line 7: | ||
< | < | ||
+ | |||
+ | # first step | ||
+ | yum update -y # get to the latest | ||
+ | reboot | ||
# IP ranges | # IP ranges | ||
Line 12: | Line 16: | ||
10.10.102.48 n38-eth1 | 10.10.102.48 n38-eth1 | ||
10.11.103.48 n38-ib0 | 10.11.103.48 n38-ib0 | ||
+ | DEVROUTE=yes # others no | ||
+ | GATEWAY=192, | ||
cd / | cd / | ||
vi ifcfg-eth0 # 192.168.102.x | vi ifcfg-eth0 # 192.168.102.x | ||
vi ifcfg-eth1 # 10.10.102.x | vi ifcfg-eth1 # 10.10.102.x | ||
+ | |||
+ | # or via rc.local? see n102 | ||
+ | vi ifcfg-ib0 | ||
+ | DEVICE=ib0 | ||
+ | ONBOOT=yes | ||
+ | MTU=65520 | ||
+ | CONNECTED_MODE=yes | ||
+ | BOOTPROTO=none | ||
+ | IPADDR=10.11.103.48 | ||
+ | PREFIX=16 | ||
+ | # check with ibstat | ||
+ | Port 1: | ||
+ | State: Active | ||
+ | Physical state: LinkUp | ||
+ | # check with ethtool ib0 | ||
+ | Speed: 40000Mb/s | ||
# root: sync cottontail' | # root: sync cottontail' | ||
ssh-keygen -t rsa | ssh-keygen -t rsa | ||
- | scp 10.10.102.253:/ | + | scp 10.10.102.250:/ |
/ | / | ||
vi / | vi / | ||
Line 75: | Line 97: | ||
rpm -qa | egrep " | rpm -qa | egrep " | ||
# no # yum groupinstall " | # no # yum groupinstall " | ||
- | yum install | + | yum install |
# amber20 cmake readline error fix needs | # amber20 cmake readline error fix needs | ||
Line 107: | Line 129: | ||
# compute nodes old level 3 | # compute nodes old level 3 | ||
systemctl set-default multi-user.target | systemctl set-default multi-user.target | ||
+ | |||
+ | ### centos7 so not an OpenHPC environment | ||
# other configs | # other configs | ||
Line 128: | Line 152: | ||
### REST AT HOME | ### REST AT HOME | ||
- | # or via rc.local? see n102 | ||
- | vi ifcfg-ib0 | ||
- | DEVICE=ib0 | ||
- | ONBOOT=yes | ||
- | MTU=65520 | ||
- | CONNECTED_MODE=yes | ||
- | BOOTPROTO=none | ||
- | IPADDR=10.11.103.48 | ||
- | PREFIX=16 | ||
- | # check with ibstat | ||
- | Port 1: | ||
- | State: Active | ||
- | Physical state: LinkUp | ||
- | # check with ethtool ib0 | ||
- | Speed: 40000Mb/s | ||
# /etc/fstab | # /etc/fstab | ||
Line 178: | Line 187: | ||
rocommunity public | rocommunity public | ||
dontLogTCPWrappersConnects yes | dontLogTCPWrappersConnects yes | ||
- | # enable, start, | + | # add to zenoss |
+ | systemctl enable snmpd | ||
+ | systemctl start snmpd | ||
Line 190: | Line 202: | ||
ln -s / | ln -s / | ||
- | # Put the warewulf cluster key in authorized_keys | + | |
- | cd /root/.ssh | + | # backup and update passwd, shadow, group and hosts files |
- | scp -rp 10.10.102.89:/root/.ssh/authorized_keys | + | # scp from n79 or n45 |
+ | |||
+ | # slurm config | ||
+ | echo SLURMD_OPTIONS=" | ||
+ | mkdir / | ||
+ | chown slurm:munge / | ||
+ | mkdir / | ||
+ | chown slurm:munge / | ||
+ | # check | ||
+ | chown -R munge:munge /etc/munge / | ||
+ | chown -R slurm:munge / | ||
+ | systemctl enable munge | ||
+ | systemctl start munge | ||
+ | # test unmunge | ||
+ | / | ||
+ | # check log | ||
+ | |||
+ | # /etc/bashrc (login node) | ||
+ | export PATH=/ | ||
+ | export LD_LIBRARY_PATH=/ | ||
+ | |||
+ | # crontab | ||
+ | |||
+ | # ionice gaussian | ||
+ | 0,15,30,45 * * * * / | ||
+ | |||
+ | # cpu temps | ||
+ | 40 * * * * / | ||
+ | |||
+ | on compute node / | ||
+ | * | ||
+ | |||
+ | |||
+ | /etc/rc.local | ||
+ | #timing issue with munge | ||
+ | #sleep 15 | ||
+ | #/ | ||
+ | chmod +x /etc/rc.d/rc.local | ||
+ | |||
+ | # important!! put private back in place | ||
+ | systemctl disable iptables | ||
+ | systemctl stop iptables | ||
+ | reboot | ||
+ | |||
+ | # file date_ctt2.sh | ||
+ | |||
+ | # ctt /etc/pdsh | ||
+ | |||
+ | # ctt:/root/scripts | ||
+ | |||
+ | # ctt2:/ | ||
# Put eth0 fingerprints in cottontail/ | # Put eth0 fingerprints in cottontail/ | ||
- | # add to relevant known_hosts_servername file | + | |
# test slurm unmunge and update slurm.conf file | # test slurm unmunge and update slurm.conf file | ||