This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision Next revision Both sides next revision | ||
cluster:213 [2022/03/16 13:25] hmeij07 |
cluster:213 [2022/05/10 13:31] hmeij07 |
||
---|---|---|---|
Line 42: | Line 42: | ||
scp 10.10.102.253:/ | scp 10.10.102.253:/ | ||
/ | / | ||
+ | |||
+ | # Put the warewulf cluster key in authorized_keys | ||
+ | # Put eth0 fingerprints in cottontail/ | ||
+ | # add to relevant known_hosts_servername file | ||
# configure private subnets and ping file server | # configure private subnets and ping file server | ||
Line 53: | Line 57: | ||
# make internet connection for yum | # make internet connection for yum | ||
- | # eth3 for ctt2 or eth1 for n100-101 | ||
- | dnf install bind-utils | ||
- | dig google.com | ||
- | |||
- | #rocky8 | ||
- | # https:// | ||
- | dnf config-manager --set-enabled powertools | ||
- | dnf install epel-release | ||
- | dnf install netcdf netcdf-devel | ||
- | dnf install yum-utils # yumdownloader | ||
- | dnf install ddd | ||
- | dnf install grace | ||
- | dnf install gnuplot | ||
- | dnf install alpine # pico | ||
# iptables | # iptables | ||
Line 77: | Line 67: | ||
systemctl disable firewalld | systemctl disable firewalld | ||
- | # other configs | ||
- | vi / | ||
- | mv /home /usr/local/ | ||
- | vi /etc/passwd (exx, dockeruser $HOME) | ||
- | ## edit passwd, shadow, group, hosts files ## | ||
- | ## make -orig backups and stage in / | ||
- | ## cottontail2 = greentail52 sections | ||
- | |||
- | mkdir /sanscratch / | ||
- | chmod ugo+rwx /sanscratch / | ||
- | chmod o+t /sanscratch / | ||
- | # link localscratch in 1.4T /home to / | ||
- | mkdir /home | ||
- | cd /home # local dir | ||
- | ln -s / | ||
- | ln -s / | ||
- | ln -s / | ||
- | ln -s /zfshomes /share | ||
- | |||
- | # fstab file mounts | ||
- | # cottontail2 = greentail52 | ||
- | # n100-n101 = n79 | ||
- | |||
- | # postfix | ||
- | dnf install postfix | ||
- | dnf install mailx | ||
- | systemctl enable postfix | ||
- | echo " | ||
- | |||
- | # compute nodes / | ||
- | #pool 2.pool.ntp.org iburst | ||
- | Server 192.168.102.250 | ||
- | Server 192.168.102.251 | ||
- | |||
- | |||
- | # add packages and update | ||
- | yum install epel-release -y | ||
- | yum install flex bison -y | ||
- | yum install tcl tcl-devel dmtcp -y | ||
- | yum install net-snmp net-snmp-libs net-tools net-snmp-utils -y | ||
- | yum install freeglut-devel libXi-devel libXmu-devel -y | ||
- | yum install blas blas-devel lapack lapack-devel boost boost-devel -y | ||
- | yum install lm_sensors lm_sensors-libs -y | ||
- | yum install zlib-devel bzip2-devel -y | ||
- | yum install openmpi openmpi-devel perl-ExtUtils-MakeMaker -y | ||
- | yum install cmake -y | ||
- | yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y | ||
- | # amber | ||
- | yum -y install tcsh make \ | ||
- | gcc gcc-gfortran gcc-c++ \ | ||
- | which flex bison patch bc \ | ||
- | | ||
- | perl perl-ExtUtils-MakeMaker util-linux wget \ | ||
- | bzip2 bzip2-devel zlib-devel tar | ||
- | yum update -y | ||
- | yum clean all | ||
- | |||
- | # CENTOS7 pick the kernel vendor used for now | ||
- | grep ^menuentry / | ||
- | grub2-set-default 1 | ||
- | ls -d / | ||
- | # | ||
- | # | ||
- | |||
- | # compute nodes old level 3 | ||
- | systemctl set-default multi-user.target | ||
- | # remove internet, bring private back up | ||
- | reboot | ||
- | |||
- | # compute nodes only | ||
- | # leave old cuda versions behind (9.2 | 10.2) | ||
- | cd usr/local/ | ||
- | # scp fron n79:/ | ||
- | amber16/ | ||
- | |||
- | # compute nodes only / | ||
- | # copy scripts: gpu-free, gpu-info, gpu-process | ||
- | # copy 10.10.102.89:/ | ||
- | # done | ||
- | |||
- | # FINISH native vanilla installs | ||
- | # R version 4.1.2 (2021-11-01) -- "Bird Hippie" | ||
- | yum install R R-devel | ||
- | # openjdk version " | ||
- | rpm -qa | grep ^java # check | ||
- | # python v 3.9 | ||
- | yum install python39 python39-devel | ||
- | # fftw 3.3.5-11.el8 | ||
- | yum install fftw fftw-devel | ||
- | #gnu scientific libraries | ||
- | yum install gsl gsl-devel | ||
- | # ruby 2.5.9-109.module+el8.5.0 | ||
- | yum install ruby ruby-devel | ||
- | # obabel chem file formats | ||
- | yum install openbabel openbabel-devel | ||
- | # dmtcp | ||
- | yum install dmtcp dmtcp-devel | ||
- | |||
- | yum clean all | ||
- | reboot | ||
- | |||
- | |||
- | </ | ||
- | |||
- | ==== Configure Recipe ==== | ||
- | |||
- | Steps. "Ala n37" ... so the RTX nodes are similar to the K20 nodes and we can put the local software in place. See [[cluster: | ||
- | |||
- | New recipe for n100-n101 sporting Rocky 8.5 on '' | ||
- | Put node on internet...first though | ||
- | |||
- | * ** Vanilla Backups** using Warewulf and plain rsync (--exclude=[proc/, | ||
- | |||
- | < | ||
- | |||
- | # login as root check some things out... | ||
- | free -g | ||
- | nvidia-smi # if gpus | ||
- | cat / | ||
- | |||
- | # check and set local time zone | ||
- | mv / | ||
- | ln -s / | ||
- | |||
- | # change passwords for root and vendor account | ||
- | passwd | ||
- | passwd microway | ||
- | # set hostname | ||
- | hostnamectl set-hostname cottontail2 | ||
- | |||
- | # root: sync cottontail' | ||
- | ssh-keygen -t rsa | ||
- | scp 10.10.102.253:/ | ||
- | / | ||
- | |||
- | # configure private subnets and ping file server | ||
- | cd / | ||
- | vi ifcfg-eth0 # 192.168.102.x | ||
- | vi ifcfg-eth1 # 10.10.102.x | ||
- | vi ifcfg-eth3 # 129.133.52.x | ||
- | systemctl restart network | ||
- | ping -c 3 192.168.102.42 | ||
- | ping -c 3 10.10.102.42 | ||
- | |||
- | # make internet connection for yum | ||
# eth3 for ctt2 or eth1 for n100-101 | # eth3 for ctt2 or eth1 for n100-101 | ||
dnf install bind-utils | dnf install bind-utils | ||
dig google.com | dig google.com | ||
+ | iptables -L # check! | ||
- | #rocky8 | + | # Rocky8 |
# https:// | # https:// | ||
dnf config-manager --set-enabled powertools | dnf config-manager --set-enabled powertools | ||
Line 237: | Line 83: | ||
dnf install gnuplot | dnf install gnuplot | ||
dnf install alpine # pico | dnf install alpine # pico | ||
- | + | yum groupinstall " | |
- | # iptables | + | |
- | dnf install -y iptables-services | + | |
- | vi / | + | |
- | # add 'local allow' ports --dport 0:65535 | + | |
- | systemctl start iptables # and enable | + | |
- | iptables -L | + | |
- | systemctl stop firewalld | + | |
- | systemctl disable firewalld | + | |
# other configs | # other configs | ||
Line 275: | Line 113: | ||
dnf install mailx | dnf install mailx | ||
systemctl enable postfix | systemctl enable postfix | ||
- | echo " | + | echo " |
+ | # on head node / | ||
+ | allow 192.168.0.0/ | ||
# compute nodes / | # compute nodes / | ||
#pool 2.pool.ntp.org iburst | #pool 2.pool.ntp.org iburst | ||
Server 192.168.102.250 | Server 192.168.102.250 | ||
Server 192.168.102.251 | Server 192.168.102.251 | ||
+ | # check | ||
+ | chronyc sources | ||
+ | |||
+ | |||
+ | # on head node install from epel repo | ||
+ | yum install slurm-openlava | ||
+ | # error on conflicting libs, too bad! | ||
Line 295: | Line 142: | ||
yum install cmake -y | yum install cmake -y | ||
yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y | yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y | ||
- | # amber | + | |
+ | #easybuild | ||
+ | yum install libibvers libibverbs-devel | ||
+ | |||
+ | # amber20 cmake readline error fix needs | ||
+ | yum install ncurses-c++-libs-6.1-9.20180224.el8.x86_64.rpm \ | ||
+ | ncurses-devel-6.1-9.20180224.el8.x86_64.rpm \ | ||
+ | readline-devel-7.0-10.el8.x86_64.rpm | ||
+ | |||
+ | # amber20 | ||
yum -y install tcsh make \ | yum -y install tcsh make \ | ||
gcc gcc-gfortran gcc-c++ \ | gcc gcc-gfortran gcc-c++ \ | ||
Line 302: | Line 158: | ||
perl perl-ExtUtils-MakeMaker util-linux wget \ | perl perl-ExtUtils-MakeMaker util-linux wget \ | ||
bzip2 bzip2-devel zlib-devel tar | bzip2 bzip2-devel zlib-devel tar | ||
- | yum update -y | ||
- | yum clean all | ||
# CENTOS7 pick the kernel vendor used for now | # CENTOS7 pick the kernel vendor used for now | ||
Line 314: | Line 168: | ||
# compute nodes old level 3 | # compute nodes old level 3 | ||
systemctl set-default multi-user.target | systemctl set-default multi-user.target | ||
- | # remove internet, bring private back up | ||
- | reboot | ||
# compute nodes only | # compute nodes only | ||
Line 335: | Line 187: | ||
# python v 3.9 | # python v 3.9 | ||
yum install python39 python39-devel | yum install python39 python39-devel | ||
+ | ln -s / | ||
# fftw 3.3.5-11.el8 | # fftw 3.3.5-11.el8 | ||
yum install fftw fftw-devel | yum install fftw fftw-devel | ||
Line 347: | Line 200: | ||
yum clean all | yum clean all | ||
+ | # eth3 onboot=no, private networks only | ||
+ | systemctl disable iptables | ||
reboot | reboot | ||
+ | # now make it an ohpc compute node | ||
+ | yum repolist | ||
+ | yum install ohpc-base-compute | ||
+ | | ||
+ | scp cottontail2:/ | ||
+ | yum install ohpc-slurm-client | ||
+ | systemctl enable munge | ||
+ | systemctl start munge | ||
+ | scp cottontail2:/ | ||
+ | echo SLURMD_OPTIONS=" | ||
+ | yum install --allowerasing lmod-ohpc | ||
+ | grep '/ | ||
+ | mkdir / | ||
+ | chown slurm:munge / | ||
+ | mkdir / | ||
+ | chown slurm:munge / | ||
+ | scp cottontail2:/ | ||
+ | scp cottontail2:/ | ||
+ | #test | ||
+ | / | ||
+ | | ||
+ | # start via rc.local | ||
+ | chmod +x / | ||
+ | #timing issue with munge | ||
+ | sleep 15 | ||
+ | / | ||
+ | | ||
+ | # slurmd ??? | ||
+ | libhwloc.so.15 => / | ||
+ | |||
+ | # add to zenoss edit / | ||
+ | rocommunity public | ||
+ | dontLogTCPWrappersConnects yes | ||
</ | </ | ||
- | ==== Configure Recipe | + | ==== Pics ==== |
- | Steps. "Ala n37" ... so the RTX nodes are similar to the K20 nodes and we can put the local software in place. See [[cluster: | ||
- | New recipe for n100-n101 sporting Rocky 8.5 on '' | + | My data center robot thingie and node n100's gpus\\ |
- | Put node on internet...first though | + | |
- | * ** Vanilla Backups** using Warewulf and plain rsync (--exclude=[proc/, | + | \\ |
- | < | + | {{: |
+ | \\ | ||
+ | {{: | ||
+ | \\ | ||
- | # login as root check some things out... | + | ==== Amber20 ==== |
- | free -g | + | |
- | nvidia-smi # if gpus | + | |
- | cat / | + | |
- | # check and set local time zone | + | OpenHPC |
- | mv / | + | |
- | ln -s / | + | |
- | # change passwords for root and vendor account | + | < |
- | passwd | + | |
- | passwd microway | + | |
- | # set hostname | + | |
- | hostnamectl set-hostname cottontail2 | + | |
- | # root: sync cottontail' | + | |
- | ssh-keygen -t rsa | + | 989 tar xvfj ../Amber20.tar.bz2 |
- | scp 10.10.102.253:/ | + | 993 cd amber20_src/ |
- | /etc/ | + | |
+ | 996 vi run_cmake | ||
- | # configure private subnets and ping file server | + | # |
- | cd / | + | |
- | vi ifcfg-eth0 # 192.168.102.x | + | |
- | vi ifcfg-eth1 # 10.10.102.x | + | |
- | vi ifcfg-eth3 # 129.133.52.x | + | |
- | systemctl restart network | + | |
- | ping -c 3 192.168.102.42 | + | |
- | ping -c 3 10.10.102.42 | + | |
- | # make internet connection for yum | + | # serial, do on heasd node, pull down miniconda, compile, install |
- | # eth3 for ctt2 or eth1 for n100-101 | + | cmake $AMBER_PREFIX/ |
- | dnf install bind-utils | + | |
- | dig google.com | + | |
+ | -DMPI=FALSE -DCUDA=FALSE -DINSTALL_TESTS=TRUE \ | ||
+ | -DDOWNLOAD_MINICONDA=TRUE | ||
+ | | ||
- | #rocky8 | + | # Env |
- | # https:// | + | |
- | dnf config-manager --set-enabled powertools | + | |
- | dnf install epel-release | + | |
- | dnf install netcdf netcdf-devel | + | |
- | dnf install yum-utils # yumdownloader | + | |
- | dnf install ddd | + | |
- | dnf install grace | + | |
- | dnf install gnuplot | + | |
- | dnf install alpine # pico | + | |
- | # iptables | + | [hmeij@n100 ~]$ module load cuda/11.6 |
- | dnf install -y iptables-services | + | |
- | vi /etc/ | + | |
- | # add 'local allow' ports --dport 0:65535 | + | |
- | systemctl start iptables # and enable | + | |
- | iptables -L | + | |
- | systemctl stop firewalld | + | |
- | systemctl disable firewalld | + | |
- | # other configs | + | [hmeij@n100 ~]$ echo $CUDA_HOME |
- | vi / | + | /usr/local/cuda |
- | mv /home /usr/local/ | + | |
- | vi /etc/passwd (exx, dockeruser $HOME) | + | |
- | ## edit passwd, shadow, group, hosts files ## | + | [hmeij@n100 ~]$ which nvcc mpicc gcc |
- | ## make -orig backups and stage in /home/tmp/global | + | / |
- | ## cottontail2 = greentail52 sections | + | / |
+ | / | ||
- | mkdir /sanscratch / | + | # [FIXED} cmake error on conda install, set to FALSE |
- | chmod ugo+rwx /sanscratch / | + | # OS native python, install on n[100-101] |
- | chmod o+t /sanscratch / | + | -- Python version 3.9 -- OK |
- | # link localscratch in 1.4T /home to / | + | -- Found PythonLibs: |
- | mkdir /home | + | -- Checking for Python package numpy -- not found |
- | cd /home # local dir | + | -- Checking for Python package scipy -- not found |
- | ln -s / | + | -- Checking for Python package matplotlib -- not found |
- | ln -s /zfshomes/tmp | + | -- Checking for Python package setuptools -- found |
- | ln -s / | + | [END FIXED] |
- | ln -s /zfshomes /share | + | |
- | # fstab file mounts | + | # mpi & cuda FALSE builds serial |
- | # cottontail2 = greentail52 | + | ./ |
- | # n100-n101 = n79 | + | make install |
+ | # lots and lots of warnings | ||
- | # postfix | + | # then |
- | dnf install postfix | + | source / |
- | dnf install mailx | + | |
- | systemctl enable postfix | + | |
- | echo " | + | |
- | # compute nodes / | + | # on n100 now, parallel, set miniconda flags to FALSE |
- | #pool 2.pool.ntp.org iburst | + | -MPI=TRUE |
- | Server 192.168.102.250 | + | ./run_cmake |
- | Server 192.168.102.251 | + | make install |
+ | # on n100 just change cuda flag | ||
+ | -CUDA=TRUE | ||
+ | ./run_cmake | ||
+ | make install | ||
- | # add packages and update | + | #tests |
- | yum install epel-release -y | + | cd $AMBERHOME |
- | yum install flex bison -y | + | make test.serial |
- | yum install tcl tcl-devel dmtcp -y | + | export DO_PARALLEL=" |
- | yum install net-snmp net-snmp-libs net-tools net-snmp-utils -y | + | make test.parallel |
- | yum install freeglut-devel libXi-devel libXmu-devel -y | + | export CUDA_VISIBLE_DEVICES=0 |
- | yum install blas blas-devel lapack lapack-devel boost boost-devel -y | + | make test.cuda.serial |
- | yum install lm_sensors lm_sensors-libs -y | + | make test.cuda.parallel |
- | yum install zlib-devel bzip2-devel -y | + | |
- | yum install openmpi openmpi-devel perl-ExtUtils-MakeMaker -y | + | |
- | yum install cmake -y | + | |
- | yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y | + | |
- | # amber | + | |
- | yum -y install tcsh make \ | + | |
- | gcc gcc-gfortran gcc-c++ \ | + | |
- | which flex bison patch bc \ | + | |
- | | + | |
- | perl perl-ExtUtils-MakeMaker util-linux wget \ | + | |
- | bzip2 bzip2-devel zlib-devel tar | + | |
- | yum update -y | + | |
- | yum clean all | + | |
- | + | ||
- | # CENTOS7 pick the kernel vendor used for now | + | |
- | grep ^menuentry /etc/grub2.cfg | + | |
- | grub2-set-default 1 | + | |
- | ls -d / | + | |
- | # | + | |
- | # | + | |
- | + | ||
- | # compute nodes old level 3 | + | |
- | systemctl set-default multi-user.target | + | |
- | # remove internet, bring private back up | + | |
- | reboot | + | |
- | + | ||
- | # compute nodes only | + | |
- | # leave old cuda versions behind (9.2 | 10.2) | + | |
- | cd usr/ | + | |
- | # scp fron n79:/ | + | |
- | amber16/ | + | |
- | + | ||
- | # compute nodes only / | + | |
- | # copy scripts: gpu-free, gpu-info, gpu-process | + | |
- | # copy 10.10.102.89:/ | + | |
- | # done | + | |
- | + | ||
- | # FINISH native vanilla installs | + | |
- | # R version 4.1.2 (2021-11-01) -- "Bird Hippie" | + | |
- | yum install R R-devel | + | |
- | # openjdk version " | + | |
- | rpm -qa | grep ^java # check | + | |
- | # python v 3.9 | + | |
- | yum install python39 python39-devel | + | |
- | # fftw 3.3.5-11.el8 | + | |
- | yum install fftw fftw-devel | + | |
- | #gnu scientific libraries | + | |
- | yum install gsl gsl-devel | + | |
- | # ruby 2.5.9-109.module+el8.5.0 | + | |
- | yum install ruby ruby-devel | + | |
- | # obabel chem file formats | + | |
- | yum install openbabel openbabel-devel | + | |
- | # dmtcp | + | |
- | yum install dmtcp dmtcp-devel | + | |
- | + | ||
- | yum clean all | + | |
- | reboot | + | |
</ | </ | ||
- | ==== Pics ==== | ||
- | |||
- | |||
- | \\ | ||
**[[cluster: | **[[cluster: | ||