User Tools

Site Tools


cluster:224


Back

Recipe for n38-n45 conversion of openlava/centos6 to slurm/centos7.

First install “server with GUI” via USB installation media. Enter BIOS (delete key). Set Date&Time and boot order (Removable, USB, Cd&DVD, Hdd). Reclaim disk space.. Kdump disabled.

# first step
yum update -y  # get to the latest
reboot

# IP ranges
192.168.102.48 n38 n38-eth0
10.10.102.48 n38-eth1
10.11.103.48 n38-ib0
DEVROUTE=yes # others no
GATEWAY=192,168.102.251 # greentail52

cd /etc/sysconfig/network-scripts/
vi ifcfg-eth0 # 192.168.102.x
vi ifcfg-eth1 # 10.10.102.x   # 'uuidgen eth1' to get uuid

# or via  rc.local? see n102
vi ifcfg-ib0
DEVICE=ib0
ONBOOT=yes
MTU=65520
CONNECTED_MODE=yes
BOOTPROTO=none
IPADDR=10.11.103.48
PREFIX=16
# check with ibstat
	Port 1:
		State: Active
		Physical state: LinkUp
# check with ethtool ib0
	Speed: 40000Mb/s

# root: sync cottontail's master and known_hosts (tails+stores)
ssh-keygen -t rsa
scp 10.10.102.250:/root/.ssh/authorized_keys /root/.ssh/ #ctt2
/etc/ssh/sshd_config (PermitRootLogin)
vi /etc/selinux/config # disabled, do not mistype, kernel will not boot!

# make internet connection for yum

# iptables
systemctl stop firewalld
systemctl disable firewalld

dnf install -y iptables-services
# port 22 wesleyan only 
-A INPUT -i eth1 -m state --state NEW -m tcp -p tcp -s 129.133.0.0/17 -d 129.133.52.222 --dport 22 -j ACCEPT
-A INPUT -i eth1 -m state --state NEW -m tcp -p tcp -s 129.133.128.0/18 -d 129.133.52.222 --dport 22 -j ACCEPT
systemctl start iptables 

yum install bind-utils -y
dig google.com
iptables -L # check!


# CentOS 7
yum-complete-transaction --cleanup-only
yum install epel-release -y
yum install netcdf netcdf-devel -y
yum install yum-utils # yumdownloader -y
yum install ddd grace gnuplot alpine -y # pico

yum install munge munge-devel -y
scp -rp cottontail2.wesleyan.edu:/etc/munge /etc/
ls -ld /etc/munge # check
chown munge:munge /etc/munge/munge.key 
ls -l  /etc/munge/munge.key # check
# test unmunge both ways when before starting slurmd

# done via media
#yum groupinstall "Server" 
# server for compute nodes "Server with GUI"

# add packages and update
yum install epel-release -y
yum install flex bison -y 
yum install tcl tcl-devel dmtcp dmtcp-devel -y
yum install net-snmp net-snmp-libs net-tools net-snmp-utils -y
yum install freeglut-devel libXi-devel libXmu-devel -y
yum install blas blas-devel lapack lapack-devel boost boost-devel -y
yum install lm_sensors lm_sensors-libs -y
yum install zlib-devel bzip2-devel -y
yum install openmpi openmpi-devel perl-ExtUtils-MakeMaker -y
yum install cmake -y
yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y

# check first
systemctl status rdma # loaded/active
rpm -qa | egrep  "libibverbs|libibverbs-devel"
# no # yum groupinstall "Infiniband Support" # ib already working
yum install libibverbs-devel ibutils infiniband-diags perftest qperf -y

# amber20 cmake readline error fix needs
yum install ncurses-devel readline-devel -y

# amber20
yum -y install tcsh make \
               gcc gcc-gfortran gcc-c++ \
               which flex bison patch bc \
               libXt-devel libXext-devel \
               perl perl-ExtUtils-MakeMaker util-linux wget \
               bzip2 bzip2-devel zlib-devel tar 
               
# FINISH native vanilla installs
# R version 4.1.2 (2021-11-01) -- "Bird Hippie"
yum install R R-devel -y
# openjdk version "1.8.0_322"
rpm -qa | grep ^java  # check, else
yum install java-1.8.0-openjdk java-1.8.0-openjdk-devel \
java-1.8.0-openjdk-headless  -y 
# python v 2.7.5
# fftw 3.3.5-11.el8
yum install fftw fftw-devel -y 
#gnu scientific libraries
yum install gsl gsl-devel -y 
# ruby 2.5.9-109.module+el8.5.0
yum install ruby ruby-devel -y 
# obabel chem file formats
yum install openbabel openbabel-devel -y

# compute nodes old level 3
systemctl set-default multi-user.target

### centos7 so not an OpenHPC environment

# other configs
umount /home
cd /; mv home /usr/local/
mkdir /home
ln -s /home /share
vi /etc/passwd (/usr/local/home)
vi /etc/fstab  (same)
mount -a

# postfix
yum install postfix mailx
echo "relayhost = 192.168.102.251" >> /etc/postfix/main.cf
systemctl enable postfix
systemctl restart postfix

### DONE
undo vlan52, down iptables, reboot, 
test you can to it via privaate networks
### REST AT HOME


# /etc/fstab
/dev/sdb		/localscratch5tb        ext4    defaults        0 0

# if sdb present
mkdir /sanscratch /localscratch5tb
chmod ugo+rwx /sanscratch /localscratch5tb
chmod o+t /sanscratch /localscratch5tb
ln -s /localscratch5tb /localscratch

cd /home 
ln -s /zfshomes/apps
ln -s /zfshomes/tmp
ln -s /zfshomes/csmith06
ls -l

# ADD all the NFS mounts see greentail52:/root/n45.fstab

# fstab file mounts
mkdir -p /zfshomes /home66 /home33 /mindstore /astrostore
mkdir -p /smithlab/home;cd /smithlab;ln -s /smithlab/home/opt/rhel07 opt; ls -l
mount -a # via 10.10

# compute nodes /etc/chronyc.conf
#pool 2.pool.ntp.org iburst
Server 192.168.102.250
Server 192.168.102.251
# check
systemctl restart chronyd
chronyc sources


# edit /etc/snmp/snmpd.conf, enable and start
rocommunity public
dontLogTCPWrappersConnects yes
# add to zenoss
systemctl enable snmpd
systemctl start snmpd



# compute nodes only
# scp from n79:/usr/local/
# amber16/  amber20/ fsl-5.0.10/ gromacs-2018/ lammps-22Aug18/
scp -rp 10.10.102.89:/usr/local/amber16 .

# copy slurm from n79
scp -rp 10.10.102.89:/usr/local/slurm-22.05.2 .
ln -s /usr/local/slurm-22.05.2 /usr/local/slurm


# backup and update passwd, shadow, group and hosts files
# scp from n79 or n45

# slurm config
echo SLURMD_OPTIONS="--conf-server 192.168.102.250" > /etc/sysconfig/slurmd
  mkdir /var/log/slurm 
  chown slurm:munge /var/log/slurm 
  mkdir /var/spool/slurm 
  chown slurm:munge /var/spool/slurm 
# check
chown -R munge:munge /etc/munge /var/log/munge /var/lib/munge /var/run/munge
chown -R slurm:munge /var/log/slurm /var/spool/slurm
systemctl enable munge
systemctl start munge
# test unmunge
/usr/local/slurm/sbin/slurmd
# check log

# /etc/bashrc (login node)
export PATH=/usr/local/slurm/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/slurm/lib:$LD_LIBRARY_PATH

# crontab

# ionice gaussian
0,15,30,45 * * * * /share/apps/scripts/ionice_lexes.sh  > /dev/null 2>&1

# cpu temps
40 * * * * /share/apps/scripts/lm_sensors.sh > /dev/null 2>&1

on compute node /etc/security/limits.conf
*                -       memlock         270039400


/etc/rc.local
#timing issue with munge
#sleep 15
#/usr/local/slurm/sbin/slurmd
chmod +x /etc/rc.d/rc.local

# important!! put private back in place
systemctl disable iptables
systemctl stop iptables
reboot

# file date_ctt2.sh

# ctt /etc/pdsh

# ctt:/root/scripts

# ctt2:/usr/local/bin/rslurm2022.sh

# Put eth0 fingerprints in cottontail/greentail52 known hosts

# test slurm unmunge and update slurm.conf file


Back

cluster/224.txt · Last modified: 2024/01/12 09:36 by hmeij07