User Tools

Site Tools


cluster:213

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
Next revision Both sides next revision
cluster:213 [2022/03/17 17:23]
hmeij07
cluster:213 [2023/01/31 19:19]
hmeij07
Line 42: Line 42:
 scp 10.10.102.253:/root/.ssh/authorized_keys /root/.ssh/ scp 10.10.102.253:/root/.ssh/authorized_keys /root/.ssh/
 /etc/ssh/sshd_config (PermitRootLogin) /etc/ssh/sshd_config (PermitRootLogin)
 +
 +# Put the warewulf cluster key in authorized_keys
 +# Put eth0 fingerprints in cottontail/greentail52 known hosts
 +# add to relevant known_hosts_servername file
  
 # configure private subnets and ping file server # configure private subnets and ping file server
Line 53: Line 57:
  
 # make internet connection for yum # make internet connection for yum
 +
 +# iptables
 +dnf install -y iptables-services
 +vi /etc/sysconfig/iptables
 +# add 'local allow' ports  --dport 0:65535
 +systemctl start iptables # and enable
 +iptables -L
 +systemctl stop firewalld
 +systemctl disable firewalld
 +
 +
 # eth3 for ctt2 or eth1 for n100-101 # eth3 for ctt2 or eth1 for n100-101
 dnf install bind-utils dnf install bind-utils
 dig google.com dig google.com
 +iptables -L # check!
  
 # Rocky8 # Rocky8
Line 67: Line 83:
 dnf install gnuplot dnf install gnuplot
 dnf install alpine # pico dnf install alpine # pico
- +yum groupinstall "Server" server for compute nodes "Server with GUI"
-iptables +
-dnf install -y iptables-services +
-vi /etc/sysconfig/iptables +
-# add 'local allow' ports  --dport 0:65535 +
-systemctl start iptables # and enable +
-iptables -L +
-systemctl stop firewalld +
-systemctl disable firewalld+
  
 # other configs # other configs
 vi /etc/selinux/config # disabled, do not mistype, kernel will not boot! vi /etc/selinux/config # disabled, do not mistype, kernel will not boot!
 mv /home /usr/local/ mv /home /usr/local/
 +cd /;ln -s /usr/local/home 
 +cd /; ln -s /home /share
 vi /etc/passwd (exx, dockeruser $HOME) vi /etc/passwd (exx, dockeruser $HOME)
  
Line 86: Line 96:
 ## cottontail2 = greentail52 sections ## cottontail2 = greentail52 sections
  
 +#exx96
 mkdir /sanscratch /home/localscratch mkdir /sanscratch /home/localscratch
 chmod ugo+rwx /sanscratch /home/localscratch chmod ugo+rwx /sanscratch /home/localscratch
 chmod o+t /sanscratch /home/localscratch  chmod o+t /sanscratch /home/localscratch 
 +# exx96
 # link localscratch in 1.4T /home to / # link localscratch in 1.4T /home to /
-mkdir /home  + 
-cd /home # local dir+cd /home 
 ln -s /zfshomes/apps ln -s /zfshomes/apps
 ln -s /zfshomes/tmp ln -s /zfshomes/tmp
 ln -s /zfshomes/csmith06 ln -s /zfshomes/csmith06
-ln -s /zfshomes /share+
  
 # fstab file mounts # fstab file mounts
 +mkdir -p /zfshomes /home66 /home33 /mindstore /opt/ohpc/pub /opt/intel
 # cottontail2 = greentail52 # cottontail2 = greentail52
 # n100-n101 = n79 # n100-n101 = n79
  
-postfix + 
-dnf install postfix +add to zenoss edit /etc/snmp/snmpd.conf, enable and start 
-dnf install mailx +rocommunity public 
-systemctl enable postfix +dontLogTCPWrappersConnects yes 
-echo "relayhost = 192.168.102.251" >> /etc/postfix/main.cf+
  
 # on head node /etc/chronyc.conf # on head node /etc/chronyc.conf
-allow 192.1668.0.0/6+allow 192.168.0.0/16
 # compute nodes /etc/chronyc.conf # compute nodes /etc/chronyc.conf
 #pool 2.pool.ntp.org iburst #pool 2.pool.ntp.org iburst
Line 115: Line 128:
 # check # check
 chronyc sources chronyc sources
 +
 +
 +# on head node install from epel repo
 +yum install slurm-openlava
 +# error on conflicting libs, too bad!
  
  
Line 129: Line 147:
 yum install cmake -y yum install cmake -y
 yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y
-amber+ 
 +#easybuild 
 +yum install libibverbs libibverbs-devel 
 + 
 +# amber20 cmake readline error fix needs 
 +yum install ncurses-c++-libs-6.1-9.20180224.el8.x86_64.rpm \ 
 +            ncurses-devel-6.1-9.20180224.el8.x86_64.rpm \ 
 +            readline-devel-7.0-10.el8.x86_64.rpm 
 + 
 +# amber20
 yum -y install tcsh make \ yum -y install tcsh make \
                gcc gcc-gfortran gcc-c++ \                gcc gcc-gfortran gcc-c++ \
Line 146: Line 173:
 # compute nodes old level 3 # compute nodes old level 3
 systemctl set-default multi-user.target systemctl set-default multi-user.target
 +
 +
 +# postfix
 +dnf install postfix
 +dnf install mailx
 +systemctl enable postfix
 +echo "relayhost = 192.168.102.251" >> /etc/postfix/main.cf
  
 # compute nodes only # compute nodes only
Line 163: Line 197:
 # openjdk version "1.8.0_322" # openjdk version "1.8.0_322"
 rpm -qa | grep ^java  # check rpm -qa | grep ^java  # check
 +yum install java-1.8.0-openjdk java-1.8.0-openjdk-devel \
 +java-1.8.0-openjdk-headless javapackages-filesystem
 # python v 3.9 # python v 3.9
 yum install python39 python39-devel yum install python39 python39-devel
 +ln -s /usr/bin/python3.9 /usr/bin/python
 # fftw 3.3.5-11.el8 # fftw 3.3.5-11.el8
 yum install fftw fftw-devel yum install fftw fftw-devel
Line 175: Line 212:
 # dmtcp # dmtcp
 yum install dmtcp dmtcp-devel yum install dmtcp dmtcp-devel
 +
 +# check status of service munge
  
 yum clean all yum clean all
Line 180: Line 219:
 systemctl disable iptables systemctl disable iptables
 reboot reboot
 +
 +# now make it an ohpc compute node
 +  yum repolist
 +  yum  install ohpc-base-compute
 +  
 +  scp cottontail2:/etc/resolv.conf /etc/resolv.conf
 +  yum  install ohpc-slurm-client
 +  systemctl enable munge
 +  systemctl start munge
 +  scp cottontail2:/etc/munge/munge.key /etc/munge/munge.key
 +  echo SLURMD_OPTIONS="--conf-server 192.168.102.250" > /etc/sysconfig/slurmd
 +  yum  install --allowerasing lmod-ohpc
 +  grep '/var' /etc/slurm/slurm.conf
 +  mkdir /var/log/slurm 
 +  chown slurm:munge /var/log/slurm 
 +  mkdir /var/spool/slurm 
 +  chown slurm:munge /var/spool/slurm 
 +  scp cottontail2:/etc/slurm/slurm.conf /etc/slurm/slurm.conf
 +  scp cottontail2:/etc/slurm/gres.conf /etc/slurm/gres.conf
 +  scp cottontail2:/etc/profile.d/lmod.sh /etc/profile.d/
 +  
 +# /var/[log|spool|run] need to be removed from
 +/usr/libexec/warewulf/wwmkchroot/gold-template
 +
 +#test
 +  /usr/sbin/slurmd -D 
 +  
 +# start via rc.local
 +chmod +x /etc/rc.d/rc.local
 +#timing issue with munge
 +sleep 15
 +/usr/sbin/slurmd
 +  
 +# slurmd ???
 + libhwloc.so.15 => /opt/ohpc/pub/libs/hwloc/lib/libhwloc.so.15 (0x00007fd6e5684000)
 +
  
  
Line 188: Line 263:
  
 My data center robot thingie and node n100's gpus\\ My data center robot thingie and node n100's gpus\\
-**[[cluster:0|Back]]**\\+
 \\ \\
-{{:cluster:dcrobot.jpg?400 |}}\\+ 
 +{{:cluster:dcrobot.jpg?400|}}
 \\ \\
-{{:cluster:n100.jpg?400 |}}\\+{{:cluster:n100.jpg?400|}}\\
 \\ \\
  
 +==== Amber20 ====
 +
 +OpenHPC
 +
 +<code>
 +
 +# First **all the necessary packages ** (yum install...)
 +
 + 988  tar xvfj ../AmberTools21.tar.bz2 
 +  989  tar xvfj ../Amber20.tar.bz2 
 +  993  cd amber20_src/
 +  994  cd build/
 +  996  vi run_cmake
 +
 +#  Assume this is Linux:
 +
 +# serial, do on head node, with miniconda true, compile, install
 +  cmake $AMBER_PREFIX/amber20_src \
 +    -DCMAKE_INSTALL_PREFIX=/share/apps/CENTOS8/ohpc/software/amber/20 \
 +    -DCOMPILER=GNU  \
 +    -DMPI=FALSE -DCUDA=FALSE -DINSTALL_TESTS=TRUE \
 +    -DDOWNLOAD_MINICONDA=TRUE -DMINICONDA_USE_PY3=TRUE \
 +    2>&1 | tee  cmake.log
 +
 +# Env
 +
 +[hmeij@n100 ~]$ module load cuda/11.6
 +
 +[hmeij@n100 ~]$ echo $CUDA_HOME
 +/usr/local/cuda
 +
 +[hmeij@n100 ~]$ which nvcc mpicc gcc
 +/usr/local/cuda/bin/nvcc
 +/opt/ohpc/pub/mpi/openmpi4-gnu9/4.1.1/bin/mpicc
 +/opt/ohpc/pub/compiler/gcc/9.4.0/bin/gcc
 +
 +# [FIXED} cmake error on conda install, set to FALSE
 +# OS native python, install on n[100-101]
 +-- Python version 3.9 -- OK
 +-- Found PythonLibs: /usr/lib64/libpython3.9.so (found version "3.9.6"
 +-- Checking for Python package numpy -- not found
 +-- Checking for Python package scipy -- not found
 +-- Checking for Python package matplotlib -- not found
 +-- Checking for Python package setuptools -- found
 +[END FIXED]
 +
 +# mpi & cuda FALSE builds serial
 +./run_cmake
 +make install
 +# lots and lots of warnings
 +
 +# then
 +source /share/apps/CENTOS8/ohpc/software/amber/20/amber.sh
 +
 +# on n100 now, parallel, set miniconda flags to FALSE
 +-MPI=TRUE
 +./run_cmake
 +make install
 +
 +# on n100 just change cuda flag
 +-CUDA=TRUE
 +./run_cmake
 +make install
 +
 +#tests
 +cd $AMBERHOME
 +make test.serial
 +export DO_PARALLEL="mpirun -np 6"
 +make test.parallel
 +export CUDA_VISIBLE_DEVICES=0
 +make test.cuda.serial
 +make test.cuda.parallel
 +
 +</code>
 +
 +==== Amber22 ====
 +
 +OpenHPC
 +
 +<code>
 +
 +# First **all the necessary packages ** (yum install...)
 +
 + 988  tar xvfj ../AmberTools22.tar.bz2 
 +  989  tar xvfj ../Amber22.tar.bz2 
 +  993  cd amber20_src/
 +  994  cd build/
 +  996  vi run_cmake
 +
 +#  Assume this is Linux:
 +
 +# serial, do on head node, with miniconda true, compile, install
 +  cmake $AMBER_PREFIX/amber22_src \
 +    -DCMAKE_INSTALL_PREFIX=/share/apps/CENTOS8/ohpc/software/amber/22 \
 +    -DCOMPILER=GNU  \
 +    -DMPI=FALSE -DCUDA=FALSE -DINSTALL_TESTS=TRUE \
 +    -DDOWNLOAD_MINICONDA=TRUE \
 +    2>&1 | tee  cmake.log
 +./run_cmake
 +make install
 +
 +
 +# Note !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 +The OpenMPI and MPICH system installations provided by CentOS 
 +(i.e., through yum install) 
 +are known to be somehow incompatible with Amber22.
 +# OUCH !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 +
 +
 +# GO TO node n100
 +
 +# copy head node's amber22_src/ to n100:/usr/local/src/tmp/
 +
 +
 +source /share/apps/CENTOS8/ohpc/software/amber/22/amber.sh
 +echo $AMBERHOME
 +
 +# install latest openmpi version
 +cd amber_src/Ambertools/src
 +tar xvfj ../../../../openmpi-4.1.4.tar.bz2 
 +
 +./configure_openmpi gnu # openhpc gcc/gfortran 
 +
 +
 +# on n100 now, parallel, set 
 +-MPI=TRUE
 +-DDOWNLOAD_MINICONDA=FALSE
 +./run_cmake
 +make install
 +
 +# on n100 just change cuda flag
 +
 +[hmeij@n100 build]$ module load cuda/11.6
 +[hmeij@n100 build]$ which gcc mpicc nvcc
 +/opt/ohpc/pub/compiler/gcc/9.4.0/bin/gcc
 +/share/apps/CENTOS8/ohpc/software/amber/22/bin/mpicc
 +/usr/local/cuda/bin/nvcc
 +[hmeij@n100 ~]$ echo $CUDA_HOME
 +/usr/local/cuda
 +
 +-MPI=TRUE
 +-CUDA=TRUE
 +-DDOWNLOAD_MINICONDA=FALSE
 +./run_cmake
 +make install
 +
 +
 +[hmeij@n100 ~]$ which nvcc mpicc gcc
 +/usr/local/cuda/bin/nvcc
 +/opt/ohpc/pub/mpi/openmpi4-gnu9/4.1.1/bin/mpicc
 +/opt/ohpc/pub/compiler/gcc/9.4.0/bin/gcc
 +
 +#tests
 +cd $AMBERHOME
 +make test.serial
 +export DO_PARALLEL="mpirun -np 6"
 +make test.parallel
 +export CUDA_VISIBLE_DEVICES=0
 +make test.cuda.serial
 +make test.cuda.parallel
 +
 +</code>
 +
 +
 +**[[cluster:0|Back]]**
  
cluster/213.txt · Last modified: 2024/01/12 15:09 by hmeij07