User Tools

Site Tools


cluster:224

Warning: Undefined array key -1 in /usr/share/dokuwiki/inc/html.php on line 1458

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
cluster:224 [2023/10/13 09:39]
hmeij07 a
cluster:224 [2024/01/12 09:36] (current)
hmeij07
Line 7: Line 7:
  
 <code> <code>
 +
 +# first step
 +yum update -y  # get to the latest
 +reboot
  
 # IP ranges # IP ranges
Line 12: Line 16:
 10.10.102.48 n38-eth1 10.10.102.48 n38-eth1
 10.11.103.48 n38-ib0 10.11.103.48 n38-ib0
 +DEVROUTE=yes # others no
 +GATEWAY=192,168.102.251 # greentail52
  
 cd /etc/sysconfig/network-scripts/ cd /etc/sysconfig/network-scripts/
 vi ifcfg-eth0 # 192.168.102.x vi ifcfg-eth0 # 192.168.102.x
 vi ifcfg-eth1 # 10.10.102.x   # 'uuidgen eth1' to get uuid vi ifcfg-eth1 # 10.10.102.x   # 'uuidgen eth1' to get uuid
 +
 +# or via  rc.local? see n102
 +vi ifcfg-ib0
 +DEVICE=ib0
 +ONBOOT=yes
 +MTU=65520
 +CONNECTED_MODE=yes
 +BOOTPROTO=none
 +IPADDR=10.11.103.48
 +PREFIX=16
 +# check with ibstat
 + Port 1:
 + State: Active
 + Physical state: LinkUp
 +# check with ethtool ib0
 + Speed: 40000Mb/s
  
 # root: sync cottontail's master and known_hosts (tails+stores) # root: sync cottontail's master and known_hosts (tails+stores)
 ssh-keygen -t rsa ssh-keygen -t rsa
-scp 10.10.102.253:/root/.ssh/authorized_keys /root/.ssh/+scp 10.10.102.250:/root/.ssh/authorized_keys /root/.ssh/ #ctt2
 /etc/ssh/sshd_config (PermitRootLogin) /etc/ssh/sshd_config (PermitRootLogin)
 vi /etc/selinux/config # disabled, do not mistype, kernel will not boot! vi /etc/selinux/config # disabled, do not mistype, kernel will not boot!
Line 35: Line 57:
 systemctl start iptables  systemctl start iptables 
  
-dnf install bind-utils+yum install bind-utils -y
 dig google.com dig google.com
 iptables -L # check! iptables -L # check!
  
  
-Rocky8 ??? +CentOS 7 
-dnf config-manager --set-enabled powertools -y +yum-complete-transaction --cleanup-only 
-dnf install epel-release -y +yum install epel-release -y 
-dnf install netcdf netcdf-devel -y +yum install netcdf netcdf-devel -y 
-dnf install yum-utils # yumdownloader -y +yum install yum-utils # yumdownloader -y 
-dnf install ddd grace gnuplot alpine -y # pico+yum install ddd grace gnuplot alpine -y # pico
  
-yum install munge munge-devel+yum install munge munge-devel -y 
 +scp -rp cottontail2.wesleyan.edu:/etc/munge /etc/ 
 +ls -ld /etc/munge # check 
 +chown munge:munge /etc/munge/munge.key  
 +ls -l  /etc/munge/munge.key # check 
 +# test unmunge both ways when before starting slurmd
  
 # done via media # done via media
Line 66: Line 93:
 yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y
  
-#easybuild +check first 
-yum install libibverbs libibverbs-devel+systemctl status rdma # loaded/active 
 +rpm -qa | egrep  "libibverbs|libibverbs-devel" 
 +# no # yum groupinstall "Infiniband Support" # ib already working 
 +yum install libibverbs-devel ibutils infiniband-diags perftest qperf -y
  
 # amber20 cmake readline error fix needs # amber20 cmake readline error fix needs
-yum install ncurses-c++-libs-6.1-9.20180224.el8.x86_64.rpm \ +yum install ncurses-devel readline-devel -y
-            ncurses-devel-6.1-9.20180224.el8.x86_64.rpm \ +
-            readline-devel-7.0-10.el8.x86_64.rpm+
  
 # amber20 # amber20
Line 84: Line 112:
 # FINISH native vanilla installs # FINISH native vanilla installs
 # R version 4.1.2 (2021-11-01) -- "Bird Hippie" # R version 4.1.2 (2021-11-01) -- "Bird Hippie"
-yum install R R-devel+yum install R R-devel -y
 # openjdk version "1.8.0_322" # openjdk version "1.8.0_322"
-rpm -qa | grep ^java  # check+rpm -qa | grep ^java  # check, else
 yum install java-1.8.0-openjdk java-1.8.0-openjdk-devel \ yum install java-1.8.0-openjdk java-1.8.0-openjdk-devel \
-java-1.8.0-openjdk-headless javapackages-filesystem +java-1.8.0-openjdk-headless  - 
-# python v 3.+# python v 2.7.5
-yum install python39 python39-devel +
-ln -s /usr/bin/python3.9 /usr/bin/python+
 # fftw 3.3.5-11.el8 # fftw 3.3.5-11.el8
-yum install fftw fftw-devel+yum install fftw fftw-devel -y 
 #gnu scientific libraries #gnu scientific libraries
-yum install gsl gsl-devel+yum install gsl gsl-devel -y 
 # ruby 2.5.9-109.module+el8.5.0 # ruby 2.5.9-109.module+el8.5.0
-yum install ruby ruby-devel+yum install ruby ruby-devel -y 
 # obabel chem file formats # obabel chem file formats
-yum install openbabel openbabel-devel+yum install openbabel openbabel-devel -y
  
 # compute nodes old level 3 # compute nodes old level 3
 systemctl set-default multi-user.target systemctl set-default multi-user.target
 +
 +### centos7 so not an OpenHPC environment
  
 # other configs # other configs
 +umount /home
 cd /; mv home /usr/local/ cd /; mv home /usr/local/
 mkdir /home mkdir /home
 ln -s /home /share ln -s /home /share
 vi /etc/passwd (/usr/local/home) vi /etc/passwd (/usr/local/home)
- +vi /etc/fstab  (same) 
- +mount -a
-# other configs +
-mv /home /usr/local/ +
-mkdir /home +
-ln -s /home /share +
-vi /etc/passwd (/usr/local/home)+
  
 # postfix # postfix
-dnf install postfix +yum install postfix mailx 
-dnf install mailx+echo "relayhost = 192.168.102.251" >> /etc/postfix/main.cf
 systemctl enable postfix systemctl enable postfix
-echo "relayhost = 192.168.102.251" >> /etc/postfix/main.cf+systemctl restart postfix
  
 ### DONE ### DONE
-undo vlan52, down iptables, reboot, test you can it via privs+undo vlan52, down iptables, reboot,  
 +test you can to it via privaate networks
 ### REST AT HOME ### REST AT HOME
  
-# or via  rc.local? 
-vi ifcfg-ib0 
-DEVICE=ib0 
-ONBOOT=yes 
-MTU=65520 
-CONNECTED_MODE=yes 
-BOOTPROTO=none 
-IPADDR=10.11.103.48 
-PREFIX=16 
  
-mkdir /sanscratch /localscratch +# /etc/fstab 
-chmod ugo+rwx /sanscratch /home/localscratch +/dev/sdb /localscratch5tb        ext4    defaults        0 0 
-chmod o+t /sanscratch /home/localscratch  + 
-ln -s /localscratch /localscratch5tb+# if sdb present 
 +mkdir /sanscratch /localscratch5tb 
 +chmod ugo+rwx /sanscratch /localscratch5tb 
 +chmod o+t /sanscratch /localscratch5tb 
 +ln -s /localscratch5tb /localscratch
  
 cd /home  cd /home 
Line 148: Line 168:
 ls -l ls -l
  
-cat /sanscratch/tmp/fstab.tmp >> /etc/fstab; mkdir /astrostore; mount -a; df -h+# ADD all the NFS mounts see greentail52:/root/n45.fstab
  
 # fstab file mounts # fstab file mounts
-mkdir -p /zfshomes /home66 /home33 /mindstore /opt/ohpc/pub /opt/intel+mkdir -p /zfshomes /home66 /home33 /mindstore /astrostore
 mkdir -p /smithlab/home;cd /smithlab;ln -s /smithlab/home/opt/rhel07 opt; ls -l mkdir -p /smithlab/home;cd /smithlab;ln -s /smithlab/home/opt/rhel07 opt; ls -l
 +mount -a # via 10.10
  
 # compute nodes /etc/chronyc.conf # compute nodes /etc/chronyc.conf
Line 162: Line 182:
 systemctl restart chronyd systemctl restart chronyd
 chronyc sources chronyc sources
- 
-cat /sanscratch/tmp/fstab.tmp >> /etc/fstab; mkdir /astrostore; mount -a; df -h; cd /smithlab/;  ln -s /smithlab/home/opt/rhel08 opt; ls -l 
  
  
Line 169: Line 187:
 rocommunity public rocommunity public
 dontLogTCPWrappersConnects yes dontLogTCPWrappersConnects yes
-enable, start, add to zenoss +# add to zenoss 
 +systemctl enable snmpd 
 +systemctl start snmpd 
  
  
 # compute nodes only # compute nodes only
-# scp fron n79:/usr/local/ +# scp from n79:/usr/local/ 
-amber16/  amber20/ fsl-5.0.10/ gromacs-2018/ lammps-22Aug18/+amber16/  amber20/ fsl-5.0.10/ gromacs-2018/ lammps-22Aug18/ 
 +scp -rp 10.10.102.89:/usr/local/amber16 . 
 + 
 +# copy slurm from n79 
 +scp -rp 10.10.102.89:/usr/local/slurm-22.05.2 . 
 +ln -s /usr/local/slurm-22.05.2 /usr/local/slurm 
 + 
 + 
 +# backup and update passwd, shadow, group and hosts files 
 +# scp from n79 or n45 
 + 
 +# slurm config 
 +echo SLURMD_OPTIONS="--conf-server 192.168.102.250" > /etc/sysconfig/slurmd 
 +  mkdir /var/log/slurm  
 +  chown slurm:munge /var/log/slurm  
 +  mkdir /var/spool/slurm  
 +  chown slurm:munge /var/spool/slurm  
 +# check 
 +chown -R munge:munge /etc/munge /var/log/munge /var/lib/munge /var/run/munge 
 +chown -R slurm:munge /var/log/slurm /var/spool/slurm 
 +systemctl enable munge 
 +systemctl start munge 
 +# test unmunge 
 +/usr/local/slurm/sbin/slurmd 
 +# check log 
 + 
 +# /etc/bashrc (login node) 
 +export PATH=/usr/local/slurm/bin:$PATH 
 +export LD_LIBRARY_PATH=/usr/local/slurm/lib:$LD_LIBRARY_PATH 
 + 
 +# crontab 
 + 
 +# ionice gaussian 
 +0,15,30,45 * * * * /share/apps/scripts/ionice_lexes.sh  > /dev/null 2>&
 + 
 +# cpu temps 
 +40 * * * * /share/apps/scripts/lm_sensors.sh > /dev/null 2>&
 + 
 +on compute node /etc/security/limits.conf 
 +*                -       memlock         270039400 
 + 
 + 
 +/etc/rc.local 
 +#timing issue with munge 
 +#sleep 15 
 +#/usr/local/slurm/sbin/slurmd 
 +chmod +x /etc/rc.d/rc.local 
 + 
 +# important!! put private back in place 
 +systemctl disable iptables 
 +systemctl stop iptables 
 +reboot 
 + 
 +# file date_ctt2.sh 
 + 
 +# ctt /etc/pdsh 
 + 
 +# ctt:/root/scripts 
 + 
 +# ctt2:/usr/local/bin/rslurm2022.sh
  
-# Put the warewulf cluster key in authorized_keys 
 # Put eth0 fingerprints in cottontail/greentail52 known hosts # Put eth0 fingerprints in cottontail/greentail52 known hosts
-add to relevant known_hosts_servername file+ 
 +test slurm unmunge and update slurm.conf file
  
 </code> </code>
cluster/224.1697204347.txt.gz · Last modified: 2023/10/13 09:39 by hmeij07