This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision Next revision Both sides next revision | ||
cluster:213 [2022/03/16 13:27] hmeij07 |
cluster:213 [2023/02/20 18:20] hmeij07 |
||
---|---|---|---|
Line 42: | Line 42: | ||
scp 10.10.102.253:/ | scp 10.10.102.253:/ | ||
/ | / | ||
+ | |||
+ | # Put the warewulf cluster key in authorized_keys | ||
+ | # Put eth0 fingerprints in cottontail/ | ||
+ | # add to relevant known_hosts_servername file | ||
# configure private subnets and ping file server | # configure private subnets and ping file server | ||
cd / | cd / | ||
vi ifcfg-eth0 # 192.168.102.x | vi ifcfg-eth0 # 192.168.102.x | ||
- | vi ifcfg-eth1 # 10.10.102.x | + | vi ifcfg-eth1 # 10.10.102.x |
vi ifcfg-eth3 # 129.133.52.x | vi ifcfg-eth3 # 129.133.52.x | ||
+ | scp 192.168.102.112:/ | ||
+ | |||
systemctl restart network | systemctl restart network | ||
ping -c 3 192.168.102.42 | ping -c 3 192.168.102.42 | ||
Line 53: | Line 59: | ||
# make internet connection for yum | # make internet connection for yum | ||
- | # eth3 for ctt2 or eth1 for n100-101 | ||
- | dnf install bind-utils | ||
- | dig google.com | ||
- | |||
- | #rocky8 | ||
- | # https:// | ||
- | dnf config-manager --set-enabled powertools | ||
- | dnf install epel-release | ||
- | dnf install netcdf netcdf-devel | ||
- | dnf install yum-utils # yumdownloader | ||
- | dnf install ddd | ||
- | dnf install grace | ||
- | dnf install gnuplot | ||
- | dnf install alpine # pico | ||
# iptables | # iptables | ||
Line 74: | Line 66: | ||
systemctl start iptables # and enable | systemctl start iptables # and enable | ||
iptables -L | iptables -L | ||
- | systemctl stop firewalld | + | |
- | systemctl disable firewalld | + | |
+ | # eth3 for ctt2 or eth1 for n100-101 | ||
+ | dnf install bind-utils | ||
+ | dig google.com | ||
+ | iptables -L # check! | ||
+ | |||
# other configs | # other configs | ||
vi / | vi / | ||
mv /home /usr/local/ | mv /home /usr/local/ | ||
+ | cd /;ln -s / | ||
+ | cd /; ln -s /home /share | ||
vi /etc/passwd (exx, dockeruser $HOME) | vi /etc/passwd (exx, dockeruser $HOME) | ||
- | ## edit passwd, shadow, group, hosts files ## | ||
- | ## make -orig backups and stage in / | ||
- | ## cottontail2 = greentail52 sections | ||
+ | #exx96 | ||
mkdir /sanscratch / | mkdir /sanscratch / | ||
chmod ugo+rwx /sanscratch / | chmod ugo+rwx /sanscratch / | ||
chmod o+t /sanscratch / | chmod o+t /sanscratch / | ||
+ | # exx96 | ||
# link localscratch in 1.4T /home to / | # link localscratch in 1.4T /home to / | ||
- | mkdir /home | + | |
- | cd /home # local dir | + | cd /home |
ln -s / | ln -s / | ||
ln -s / | ln -s / | ||
ln -s / | ln -s / | ||
- | ln -s /zfshomes | + | ls -l |
+ | |||
+ | cat / | ||
# fstab file mounts | # fstab file mounts | ||
+ | mkdir -p /zfshomes /home66 /home33 /mindstore / | ||
+ | mkdir -p / | ||
# cottontail2 = greentail52 | # cottontail2 = greentail52 | ||
# n100-n101 = n79 | # n100-n101 = n79 | ||
- | # postfix | ||
- | dnf install postfix | ||
- | dnf install mailx | ||
- | systemctl enable postfix | ||
- | echo " | ||
+ | # on head node / | ||
+ | allow 192.168.0.0/ | ||
# compute nodes / | # compute nodes / | ||
#pool 2.pool.ntp.org iburst | #pool 2.pool.ntp.org iburst | ||
Server 192.168.102.250 | Server 192.168.102.250 | ||
Server 192.168.102.251 | Server 192.168.102.251 | ||
+ | # check | ||
+ | systemctl restart chronyd | ||
+ | chronyc sources | ||
+ | |||
+ | # Rocky8 | ||
+ | # https:// | ||
+ | dnf config-manager --set-enabled powertools -y | ||
+ | dnf install epel-release -y | ||
+ | dnf install netcdf netcdf-devel -y | ||
+ | dnf install yum-utils # yumdownloader -y | ||
+ | dnf install ddd grace gnuplot alpine -y # pico | ||
+ | |||
+ | yum groupinstall " | ||
+ | |||
+ | |||
+ | # on head node install from epel repo | ||
+ | ### yum install slurm-openlava | ||
+ | # error on conflicting libs, too bad! | ||
Line 116: | Line 134: | ||
yum install epel-release -y | yum install epel-release -y | ||
yum install flex bison -y | yum install flex bison -y | ||
- | yum install tcl tcl-devel dmtcp -y | + | yum install tcl tcl-devel |
yum install net-snmp net-snmp-libs net-tools net-snmp-utils -y | yum install net-snmp net-snmp-libs net-tools net-snmp-utils -y | ||
yum install freeglut-devel libXi-devel libXmu-devel -y | yum install freeglut-devel libXi-devel libXmu-devel -y | ||
Line 125: | Line 143: | ||
yum install cmake -y | yum install cmake -y | ||
yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y | yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y | ||
- | # amber | + | |
+ | #easybuild | ||
+ | yum install libibverbs libibverbs-devel | ||
+ | |||
+ | # amber20 cmake readline error fix needs | ||
+ | yum install ncurses-c++-libs-6.1-9.20180224.el8.x86_64.rpm \ | ||
+ | ncurses-devel-6.1-9.20180224.el8.x86_64.rpm \ | ||
+ | readline-devel-7.0-10.el8.x86_64.rpm | ||
+ | |||
+ | # amber20 | ||
yum -y install tcsh make \ | yum -y install tcsh make \ | ||
gcc gcc-gfortran gcc-c++ \ | gcc gcc-gfortran gcc-c++ \ | ||
Line 132: | Line 159: | ||
perl perl-ExtUtils-MakeMaker util-linux wget \ | perl perl-ExtUtils-MakeMaker util-linux wget \ | ||
bzip2 bzip2-devel zlib-devel tar | bzip2 bzip2-devel zlib-devel tar | ||
- | yum update -y | ||
- | yum clean all | ||
# CENTOS7 pick the kernel vendor used for now | # CENTOS7 pick the kernel vendor used for now | ||
Line 144: | Line 169: | ||
# compute nodes old level 3 | # compute nodes old level 3 | ||
systemctl set-default multi-user.target | systemctl set-default multi-user.target | ||
- | # remove internet, bring private back up | + | |
- | reboot | + | |
+ | # postfix | ||
+ | dnf install postfix | ||
+ | dnf install mailx | ||
+ | systemctl enable postfix | ||
+ | echo " | ||
+ | |||
+ | |||
+ | # edit / | ||
+ | rocommunity public | ||
+ | dontLogTCPWrappersConnects yes | ||
+ | # enable, start, add to zenoss | ||
# compute nodes only | # compute nodes only | ||
Line 163: | Line 200: | ||
# openjdk version " | # openjdk version " | ||
rpm -qa | grep ^java # check | rpm -qa | grep ^java # check | ||
+ | yum install java-1.8.0-openjdk java-1.8.0-openjdk-devel \ | ||
+ | java-1.8.0-openjdk-headless javapackages-filesystem | ||
# python v 3.9 | # python v 3.9 | ||
yum install python39 python39-devel | yum install python39 python39-devel | ||
+ | ln -s / | ||
# fftw 3.3.5-11.el8 | # fftw 3.3.5-11.el8 | ||
yum install fftw fftw-devel | yum install fftw fftw-devel | ||
Line 173: | Line 213: | ||
# obabel chem file formats | # obabel chem file formats | ||
yum install openbabel openbabel-devel | yum install openbabel openbabel-devel | ||
- | # dmtcp | + | |
- | yum install dmtcp dmtcp-devel | + | |
yum clean all | yum clean all | ||
- | reboot | + | # eth3 onboot=no, private networks only |
+ | systemctl disable iptables | ||
+ | # now make it an ohpc compute node | ||
+ | yum repolist | ||
+ | yum install singularity-ohpc | ||
+ | yum install ohpc-base-compute --nobest | ||
+ | | ||
+ | scp cottontail2:/ | ||
+ | yum install ohpc-slurm-client | ||
+ | # check status of service munge | ||
+ | systemctl enable munge | ||
+ | systemctl start munge | ||
+ | scp cottontail2:/ | ||
+ | echo SLURMD_OPTIONS=" | ||
+ | yum install --allowerasing lmod-ohpc | ||
+ | grep '/ | ||
+ | mkdir / | ||
+ | chown slurm:munge / | ||
+ | mkdir / | ||
+ | chown slurm:munge / | ||
+ | scp cottontail2:/ | ||
+ | scp cottontail2:/ | ||
+ | scp cottontail2:/ | ||
+ | | ||
+ | # /etc/bashrc add | ||
+ | # ohpc lmod gcc mpicc | ||
+ | export PATH=/ | ||
+ | export LD_LIBRARY_PATH=/ | ||
+ | |||
+ | | ||
+ | # / | ||
+ | / | ||
+ | |||
+ | #test | ||
+ | / | ||
+ | | ||
+ | # start via rc.local (already copied) | ||
+ | #chmod +x / | ||
+ | #timing issue with munge | ||
+ | #sleep 15 | ||
+ | #/ | ||
+ | |||
+ | systemctl stop firewalld | ||
+ | systemctl disable firewalld | ||
+ | |||
+ | |||
+ | ## edit passwd, shadow, group, hosts files ## | ||
+ | ## make -orig backups and stage in / | ||
+ | ## cottontail2 = greentail52 sections | ||
+ | |||
+ | | ||
+ | # slurmd ??? | ||
+ | libhwloc.so.15 => / | ||
+ | |||
+ | # crontab | ||
+ | |||
+ | # ionice gaussian | ||
+ | 0,15,30,45 * * * * / | ||
+ | |||
+ | # cpu temps | ||
+ | 40 * * * * / | ||
+ | |||
+ | on compute node / | ||
+ | * - | ||
+ | |||
+ | |||
+ | |||
+ | # file date_ctt2.sh | ||
+ | |||
+ | # ctt /etc/pdsh | ||
+ | |||
+ | # ctt:/ | ||
</ | </ | ||
- | ==== Configure Recipe ==== | ||
- | Steps. "Ala n37" ... so the RTX nodes are similar to the K20 nodes and we can put the local software in place. See [[cluster: | + | ==== Pics ==== |
- | New recipe for n100-n101 sporting Rocky 8.5 on '' | ||
- | Put node on internet...first though | ||
- | * ** Vanilla Backups** using Warewulf | + | My data center robot thingie |
+ | |||
+ | \\ | ||
+ | |||
+ | {{: | ||
+ | \\ | ||
+ | {{: | ||
+ | \\ | ||
+ | |||
+ | ==== Amber20 ==== | ||
+ | |||
+ | OpenHPC | ||
< | < | ||
- | # login as root check some things out... | + | # First **all the necessary packages ** (yum install...) |
- | free -g | + | |
- | nvidia-smi # if gpus | + | |
- | cat / | + | |
- | # check and set local time zone | + | |
- | mv /etc/ | + | |
- | ln -s /usr/share/ | + | |
+ | 994 cd build/ | ||
+ | 996 vi run_cmake | ||
- | # change passwords for root and vendor account | + | # |
- | passwd | + | |
- | passwd microway | + | |
- | # set hostname | + | |
- | hostnamectl set-hostname cottontail2 | + | |
- | # root: sync cottontail' | + | # serial, do on head node, with miniconda true, compile, install |
- | ssh-keygen -t rsa | + | cmake $AMBER_PREFIX/ |
- | scp 10.10.102.253: | + | |
- | /etc/ | + | -DCOMPILER=GNU |
+ | -DMPI=FALSE -DCUDA=FALSE -DINSTALL_TESTS=TRUE \ | ||
+ | -DDOWNLOAD_MINICONDA=TRUE -DMINICONDA_USE_PY3=TRUE \ | ||
+ | 2>&1 | tee cmake.log | ||
- | # configure private subnets and ping file server | + | # Env |
- | cd / | + | |
- | vi ifcfg-eth0 # 192.168.102.x | + | |
- | vi ifcfg-eth1 # 10.10.102.x | + | |
- | vi ifcfg-eth3 # 129.133.52.x | + | |
- | systemctl restart network | + | |
- | ping -c 3 192.168.102.42 | + | |
- | ping -c 3 10.10.102.42 | + | |
- | # make internet connection for yum | + | [hmeij@n100 ~]$ module load cuda/11.6 |
- | # eth3 for ctt2 or eth1 for n100-101 | + | |
- | dnf install bind-utils | + | |
- | dig google.com | + | |
- | #rocky8 | + | [hmeij@n100 ~]$ echo $CUDA_HOME |
- | # https://docs.fedoraproject.org/en-US/ | + | /usr/local/cuda |
- | dnf config-manager --set-enabled powertools | + | |
- | dnf install epel-release | + | |
- | dnf install netcdf netcdf-devel | + | |
- | dnf install yum-utils # yumdownloader | + | |
- | dnf install ddd | + | |
- | dnf install grace | + | |
- | dnf install gnuplot | + | |
- | dnf install alpine # pico | + | |
- | # iptables | + | [hmeij@n100 ~]$ which nvcc mpicc gcc |
- | dnf install -y iptables-services | + | /usr/local/cuda/ |
- | vi /etc/sysconfig/iptables | + | / |
- | # add 'local allow' ports --dport 0:65535 | + | / |
- | systemctl start iptables # and enable | + | |
- | iptables -L | + | |
- | systemctl stop firewalld | + | |
- | systemctl disable firewalld | + | |
- | # other configs | + | # [FIXED} cmake error on conda install, set to FALSE |
- | vi / | + | # OS native python, install on n[100-101] |
- | mv /home /usr/local/ | + | -- Python version 3.9 -- OK |
- | vi / | + | -- Found PythonLibs: |
+ | -- Checking for Python package numpy -- not found | ||
+ | -- Checking for Python package scipy -- not found | ||
+ | -- Checking for Python package matplotlib -- not found | ||
+ | -- Checking for Python package setuptools -- found | ||
+ | [END FIXED] | ||
- | ## edit passwd, shadow, group, hosts files ## | + | # mpi & cuda FALSE builds serial |
- | ## make -orig backups and stage in /home/ | + | ./run_cmake |
- | ## cottontail2 = greentail52 sections | + | make install |
+ | # lots and lots of warnings | ||
- | mkdir /sanscratch / | + | # then |
- | chmod ugo+rwx /sanscratch / | + | source |
- | chmod o+t /sanscratch / | + | |
- | # link localscratch in 1.4T /home to / | + | |
- | mkdir /home | + | |
- | cd /home # local dir | + | |
- | ln -s /zfshomes/apps | + | |
- | ln -s /zfshomes/tmp | + | |
- | ln -s /zfshomes/csmith06 | + | |
- | ln -s /zfshomes | + | |
- | # fstab file mounts | + | # on n100 now, parallel, set miniconda flags to FALSE |
- | # cottontail2 = greentail52 | + | -MPI=TRUE |
- | # n100-n101 = n79 | + | ./ |
+ | make install | ||
- | # postfix | + | # on n100 just change cuda flag |
- | dnf install postfix | + | -CUDA=TRUE |
- | dnf install mailx | + | ./run_cmake |
- | systemctl enable postfix | + | make install |
- | echo " | + | |
- | # compute nodes / | + | #tests |
- | #pool 2.pool.ntp.org iburst | + | cd $AMBERHOME |
- | Server 192.168.102.250 | + | make test.serial |
- | Server 192.168.102.251 | + | export DO_PARALLEL=" |
+ | make test.parallel | ||
+ | export CUDA_VISIBLE_DEVICES=0 | ||
+ | make test.cuda.serial | ||
+ | make test.cuda.parallel | ||
+ | </ | ||
- | # add packages and update | + | ==== Amber22 ==== |
- | yum install epel-release -y | + | |
- | yum install flex bison -y | + | |
- | yum install tcl tcl-devel dmtcp -y | + | |
- | yum install net-snmp net-snmp-libs net-tools net-snmp-utils -y | + | |
- | yum install freeglut-devel libXi-devel libXmu-devel -y | + | |
- | yum install blas blas-devel lapack lapack-devel boost boost-devel -y | + | |
- | yum install lm_sensors lm_sensors-libs -y | + | |
- | yum install zlib-devel bzip2-devel -y | + | |
- | yum install openmpi openmpi-devel perl-ExtUtils-MakeMaker -y | + | |
- | yum install cmake -y | + | |
- | yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y | + | |
- | # amber | + | |
- | yum -y install tcsh make \ | + | |
- | gcc gcc-gfortran gcc-c++ \ | + | |
- | which flex bison patch bc \ | + | |
- | | + | |
- | perl perl-ExtUtils-MakeMaker util-linux wget \ | + | |
- | bzip2 bzip2-devel zlib-devel tar | + | |
- | yum update -y | + | |
- | yum clean all | + | |
- | # CENTOS7 pick the kernel vendor used for now | + | OpenHPC |
- | grep ^menuentry / | + | |
- | grub2-set-default 1 | + | |
- | ls -d / | + | |
- | # | + | |
- | # | + | |
- | # compute nodes old level 3 | + | < |
- | systemctl set-default multi-user.target | + | |
- | # remove internet, bring private back up | + | |
- | reboot | + | |
- | # compute nodes only | + | # First **all the necessary packages ** (yum install...) |
- | # leave old cuda versions behind | + | |
- | cd usr/ | + | |
- | # scp fron n79:/ | + | |
- | amber16/ | + | |
- | # compute nodes only / | + | |
- | # copy scripts: gpu-free, gpu-info, gpu-process | + | |
- | # copy 10.10.102.89:/usr/ | + | 993 cd amber20_src/ |
- | # done | + | 994 cd build/ |
+ | | ||
- | # FINISH native vanilla installs | + | # |
- | # R version 4.1.2 (2021-11-01) -- "Bird Hippie" | + | |
- | yum install R R-devel | + | |
- | # openjdk version " | + | |
- | rpm -qa | grep ^java # check | + | |
- | # python v 3.9 | + | |
- | yum install python39 python39-devel | + | |
- | # fftw 3.3.5-11.el8 | + | |
- | yum install fftw fftw-devel | + | |
- | #gnu scientific libraries | + | |
- | yum install gsl gsl-devel | + | |
- | # ruby 2.5.9-109.module+el8.5.0 | + | |
- | yum install ruby ruby-devel | + | |
- | # obabel chem file formats | + | |
- | yum install openbabel openbabel-devel | + | |
- | # dmtcp | + | |
- | yum install dmtcp dmtcp-devel | + | |
- | yum clean all | + | # serial, do on head node, with miniconda true, compile, install |
- | reboot | + | cmake $AMBER_PREFIX/ |
+ | -DCMAKE_INSTALL_PREFIX=/ | ||
+ | -DCOMPILER=GNU | ||
+ | -DMPI=FALSE -DCUDA=FALSE -DINSTALL_TESTS=TRUE \ | ||
+ | -DDOWNLOAD_MINICONDA=TRUE \ | ||
+ | 2>&1 | tee cmake.log | ||
+ | ./ | ||
+ | make install | ||
+ | |||
+ | # Note !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | ||
+ | The OpenMPI and MPICH system installations provided by CentOS | ||
+ | (i.e., through yum install) | ||
+ | are known to be somehow incompatible with Amber22. | ||
+ | # OUCH !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | ||
+ | |||
+ | |||
+ | # GO TO node n100 | ||
+ | |||
+ | # copy head node's amber22_src/ | ||
+ | |||
+ | |||
+ | source / | ||
+ | echo $AMBERHOME | ||
+ | |||
+ | # install latest openmpi version | ||
+ | cd amber_src/ | ||
+ | tar xvfj ../ | ||
+ | |||
+ | ./ | ||
+ | |||
+ | |||
+ | # on n100 now, parallel, set | ||
+ | -MPI=TRUE | ||
+ | -DDOWNLOAD_MINICONDA=FALSE | ||
+ | ./run_cmake | ||
+ | make install | ||
+ | |||
+ | # on n100 just change cuda flag | ||
+ | |||
+ | [hmeij@n100 build]$ module load cuda/11.6 | ||
+ | [hmeij@n100 build]$ which gcc mpicc nvcc | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | [hmeij@n100 ~]$ echo $CUDA_HOME | ||
+ | / | ||
+ | |||
+ | -MPI=TRUE | ||
+ | -CUDA=TRUE | ||
+ | -DDOWNLOAD_MINICONDA=FALSE | ||
+ | ./run_cmake | ||
+ | make install | ||
+ | |||
+ | |||
+ | [hmeij@n100 ~]$ which nvcc mpicc gcc | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | |||
+ | #tests | ||
+ | cd $AMBERHOME | ||
+ | make test.serial | ||
+ | export DO_PARALLEL=" | ||
+ | make test.parallel | ||
+ | export CUDA_VISIBLE_DEVICES=0 | ||
+ | make test.cuda.serial | ||
+ | make test.cuda.parallel | ||
</ | </ | ||
- | ==== Pics ==== | ||
- | |||
- | \\ | ||
**[[cluster: | **[[cluster: | ||