Recipe for n38-n45 conversion of openlava/centos6 to slurm/centos7.
First install “server with GUI” via USB installation media. Enter BIOS (delete key). Set Date&Time and boot order (Removable, USB, Cd&DVD, Hdd). Reclaim disk space.. Kdump disabled.
# first step yum update -y # get to the latest reboot # IP ranges 192.168.102.48 n38 n38-eth0 10.10.102.48 n38-eth1 10.11.103.48 n38-ib0 DEVROUTE=yes # others no GATEWAY=192,168.102.251 # greentail52 cd /etc/sysconfig/network-scripts/ vi ifcfg-eth0 # 192.168.102.x vi ifcfg-eth1 # 10.10.102.x # 'uuidgen eth1' to get uuid # or via rc.local? see n102 vi ifcfg-ib0 DEVICE=ib0 ONBOOT=yes MTU=65520 CONNECTED_MODE=yes BOOTPROTO=none IPADDR=10.11.103.48 PREFIX=16 # check with ibstat Port 1: State: Active Physical state: LinkUp # check with ethtool ib0 Speed: 40000Mb/s # root: sync cottontail's master and known_hosts (tails+stores) ssh-keygen -t rsa scp 10.10.102.250:/root/.ssh/authorized_keys /root/.ssh/ #ctt2 /etc/ssh/sshd_config (PermitRootLogin) vi /etc/selinux/config # disabled, do not mistype, kernel will not boot! # make internet connection for yum # iptables systemctl stop firewalld systemctl disable firewalld dnf install -y iptables-services # port 22 wesleyan only -A INPUT -i eth1 -m state --state NEW -m tcp -p tcp -s 129.133.0.0/17 -d 129.133.52.222 --dport 22 -j ACCEPT -A INPUT -i eth1 -m state --state NEW -m tcp -p tcp -s 129.133.128.0/18 -d 129.133.52.222 --dport 22 -j ACCEPT systemctl start iptables yum install bind-utils -y dig google.com iptables -L # check! # CentOS 7 yum-complete-transaction --cleanup-only yum install epel-release -y yum install netcdf netcdf-devel -y yum install yum-utils # yumdownloader -y yum install ddd grace gnuplot alpine -y # pico yum install munge munge-devel -y scp -rp cottontail2.wesleyan.edu:/etc/munge /etc/ ls -ld /etc/munge # check chown munge:munge /etc/munge/munge.key ls -l /etc/munge/munge.key # check # test unmunge both ways when before starting slurmd # done via media #yum groupinstall "Server" # server for compute nodes "Server with GUI" # add packages and update yum install epel-release -y yum install flex bison -y yum install tcl tcl-devel dmtcp dmtcp-devel -y yum install net-snmp net-snmp-libs net-tools net-snmp-utils -y yum install freeglut-devel libXi-devel libXmu-devel -y yum install blas blas-devel lapack lapack-devel boost boost-devel -y yum install lm_sensors lm_sensors-libs -y yum install zlib-devel bzip2-devel -y yum install openmpi openmpi-devel perl-ExtUtils-MakeMaker -y yum install cmake -y yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y # check first systemctl status rdma # loaded/active rpm -qa | egrep "libibverbs|libibverbs-devel" # no # yum groupinstall "Infiniband Support" # ib already working yum install libibverbs-devel ibutils infiniband-diags perftest qperf -y # amber20 cmake readline error fix needs yum install ncurses-devel readline-devel -y # amber20 yum -y install tcsh make \ gcc gcc-gfortran gcc-c++ \ which flex bison patch bc \ libXt-devel libXext-devel \ perl perl-ExtUtils-MakeMaker util-linux wget \ bzip2 bzip2-devel zlib-devel tar # FINISH native vanilla installs # R version 4.1.2 (2021-11-01) -- "Bird Hippie" yum install R R-devel -y # openjdk version "1.8.0_322" rpm -qa | grep ^java # check, else yum install java-1.8.0-openjdk java-1.8.0-openjdk-devel \ java-1.8.0-openjdk-headless -y # python v 2.7.5 # fftw 3.3.5-11.el8 yum install fftw fftw-devel -y #gnu scientific libraries yum install gsl gsl-devel -y # ruby 2.5.9-109.module+el8.5.0 yum install ruby ruby-devel -y # obabel chem file formats yum install openbabel openbabel-devel -y # compute nodes old level 3 systemctl set-default multi-user.target ### centos7 so not an OpenHPC environment # other configs umount /home cd /; mv home /usr/local/ mkdir /home ln -s /home /share vi /etc/passwd (/usr/local/home) vi /etc/fstab (same) mount -a # postfix yum install postfix mailx echo "relayhost = 192.168.102.251" >> /etc/postfix/main.cf systemctl enable postfix systemctl restart postfix ### DONE undo vlan52, down iptables, reboot, test you can to it via privaate networks ### REST AT HOME # /etc/fstab /dev/sdb /localscratch5tb ext4 defaults 0 0 # if sdb present mkdir /sanscratch /localscratch5tb chmod ugo+rwx /sanscratch /localscratch5tb chmod o+t /sanscratch /localscratch5tb ln -s /localscratch5tb /localscratch cd /home ln -s /zfshomes/apps ln -s /zfshomes/tmp ln -s /zfshomes/csmith06 ls -l # ADD all the NFS mounts see greentail52:/root/n45.fstab # fstab file mounts mkdir -p /zfshomes /home66 /home33 /mindstore /astrostore mkdir -p /smithlab/home;cd /smithlab;ln -s /smithlab/home/opt/rhel07 opt; ls -l mount -a # via 10.10 # compute nodes /etc/chronyc.conf #pool 2.pool.ntp.org iburst Server 192.168.102.250 Server 192.168.102.251 # check systemctl restart chronyd chronyc sources # edit /etc/snmp/snmpd.conf, enable and start rocommunity public dontLogTCPWrappersConnects yes # add to zenoss systemctl enable snmpd systemctl start snmpd # compute nodes only # scp from n79:/usr/local/ # amber16/ amber20/ fsl-5.0.10/ gromacs-2018/ lammps-22Aug18/ scp -rp 10.10.102.89:/usr/local/amber16 . # copy slurm from n79 scp -rp 10.10.102.89:/usr/local/slurm-22.05.2 . ln -s /usr/local/slurm-22.05.2 /usr/local/slurm # backup and update passwd, shadow, group and hosts files # scp from n79 or n45 # slurm config echo SLURMD_OPTIONS="--conf-server 192.168.102.250" > /etc/sysconfig/slurmd mkdir /var/log/slurm chown slurm:munge /var/log/slurm mkdir /var/spool/slurm chown slurm:munge /var/spool/slurm # check chown -R munge:munge /etc/munge /var/log/munge /var/lib/munge /var/run/munge chown -R slurm:munge /var/log/slurm /var/spool/slurm systemctl enable munge systemctl start munge # test unmunge /usr/local/slurm/sbin/slurmd # check log # /etc/bashrc (login node) export PATH=/usr/local/slurm/bin:$PATH export LD_LIBRARY_PATH=/usr/local/slurm/lib:$LD_LIBRARY_PATH # crontab # ionice gaussian 0,15,30,45 * * * * /share/apps/scripts/ionice_lexes.sh > /dev/null 2>&1 # cpu temps 40 * * * * /share/apps/scripts/lm_sensors.sh > /dev/null 2>&1 on compute node /etc/security/limits.conf * - memlock 270039400 /etc/rc.local #timing issue with munge #sleep 15 #/usr/local/slurm/sbin/slurmd chmod +x /etc/rc.d/rc.local # important!! put private back in place systemctl disable iptables systemctl stop iptables reboot # file date_ctt2.sh # ctt /etc/pdsh # ctt:/root/scripts # ctt2:/usr/local/bin/rslurm2022.sh # Put eth0 fingerprints in cottontail/greentail52 known hosts # test slurm unmunge and update slurm.conf file