This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision Next revision Both sides next revision | ||
cluster:213 [2022/03/21 16:06] hmeij07 |
cluster:213 [2022/07/25 13:34] hmeij07 [Amber22] |
||
---|---|---|---|
Line 44: | Line 44: | ||
# Put the warewulf cluster key in authorized_keys | # Put the warewulf cluster key in authorized_keys | ||
+ | # Put eth0 fingerprints in cottontail/ | ||
+ | # add to relevant known_hosts_servername file | ||
# configure private subnets and ping file server | # configure private subnets and ping file server | ||
Line 55: | Line 57: | ||
# make internet connection for yum | # make internet connection for yum | ||
+ | |||
+ | # iptables | ||
+ | dnf install -y iptables-services | ||
+ | vi / | ||
+ | # add 'local allow' ports --dport 0:65535 | ||
+ | systemctl start iptables # and enable | ||
+ | iptables -L | ||
+ | systemctl stop firewalld | ||
+ | systemctl disable firewalld | ||
+ | |||
+ | |||
# eth3 for ctt2 or eth1 for n100-101 | # eth3 for ctt2 or eth1 for n100-101 | ||
dnf install bind-utils | dnf install bind-utils | ||
dig google.com | dig google.com | ||
+ | iptables -L # check! | ||
# Rocky8 | # Rocky8 | ||
Line 69: | Line 83: | ||
dnf install gnuplot | dnf install gnuplot | ||
dnf install alpine # pico | dnf install alpine # pico | ||
- | + | yum groupinstall " | |
- | # iptables | + | |
- | dnf install -y iptables-services | + | |
- | vi / | + | |
- | # add 'local allow' ports --dport 0:65535 | + | |
- | systemctl start iptables # and enable | + | |
- | iptables -L | + | |
- | systemctl stop firewalld | + | |
- | systemctl disable firewalld | + | |
# other configs | # other configs | ||
Line 110: | Line 116: | ||
# on head node / | # on head node / | ||
- | allow 192.1668.0.0/6 | + | allow 192.168.0.0/16 |
# compute nodes / | # compute nodes / | ||
#pool 2.pool.ntp.org iburst | #pool 2.pool.ntp.org iburst | ||
Line 117: | Line 123: | ||
# check | # check | ||
chronyc sources | chronyc sources | ||
+ | |||
+ | |||
+ | # on head node install from epel repo | ||
+ | yum install slurm-openlava | ||
+ | # error on conflicting libs, too bad! | ||
Line 131: | Line 142: | ||
yum install cmake -y | yum install cmake -y | ||
yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y | yum install libjpeg libjpeg-devel libjpeg-turbo-devel -y | ||
- | # amber | + | |
+ | #easybuild | ||
+ | yum install libibverbs libibverbs-devel | ||
+ | |||
+ | # amber20 cmake readline error fix needs | ||
+ | yum install ncurses-c++-libs-6.1-9.20180224.el8.x86_64.rpm \ | ||
+ | ncurses-devel-6.1-9.20180224.el8.x86_64.rpm \ | ||
+ | readline-devel-7.0-10.el8.x86_64.rpm | ||
+ | |||
+ | # amber20 | ||
yum -y install tcsh make \ | yum -y install tcsh make \ | ||
gcc gcc-gfortran gcc-c++ \ | gcc gcc-gfortran gcc-c++ \ | ||
Line 165: | Line 185: | ||
# openjdk version " | # openjdk version " | ||
rpm -qa | grep ^java # check | rpm -qa | grep ^java # check | ||
+ | yum install java-1.8.0-openjdk java-1.8.0-openjdk-devel \ | ||
+ | java-1.8.0-openjdk-headless javapackages-filesystem | ||
# python v 3.9 | # python v 3.9 | ||
yum install python39 python39-devel | yum install python39 python39-devel | ||
+ | ln -s / | ||
# fftw 3.3.5-11.el8 | # fftw 3.3.5-11.el8 | ||
yum install fftw fftw-devel | yum install fftw fftw-devel | ||
Line 177: | Line 200: | ||
# dmtcp | # dmtcp | ||
yum install dmtcp dmtcp-devel | yum install dmtcp dmtcp-devel | ||
+ | |||
+ | # check status of service munge | ||
yum clean all | yum clean all | ||
Line 190: | Line 215: | ||
yum install ohpc-slurm-client | yum install ohpc-slurm-client | ||
systemctl enable munge | systemctl enable munge | ||
- | scp cottontail2:/ | + | |
+ | | ||
echo SLURMD_OPTIONS=" | echo SLURMD_OPTIONS=" | ||
yum install --allowerasing lmod-ohpc | yum install --allowerasing lmod-ohpc | ||
Line 198: | Line 224: | ||
mkdir / | mkdir / | ||
chown slurm:munge / | chown slurm:munge / | ||
+ | scp cottontail2:/ | ||
+ | scp cottontail2:/ | ||
+ | scp cottontail2:/ | ||
+ | | ||
+ | # / | ||
+ | / | ||
+ | |||
+ | #test | ||
+ | / | ||
| | ||
# start via rc.local | # start via rc.local | ||
Line 208: | Line 243: | ||
libhwloc.so.15 => / | libhwloc.so.15 => / | ||
+ | # add to zenoss edit / | ||
+ | rocommunity public | ||
+ | dontLogTCPWrappersConnects yes | ||
</ | </ | ||
Line 215: | Line 253: | ||
My data center robot thingie and node n100's gpus\\ | My data center robot thingie and node n100's gpus\\ | ||
- | **[[cluster: | + | |
\\ | \\ | ||
- | {{: | + | |
+ | {{: | ||
\\ | \\ | ||
- | {{: | + | {{: |
\\ | \\ | ||
+ | ==== Amber20 ==== | ||
+ | |||
+ | OpenHPC | ||
+ | |||
+ | < | ||
+ | |||
+ | # First **all the necessary packages ** (yum install...) | ||
+ | |||
+ | | ||
+ | 989 tar xvfj ../ | ||
+ | 993 cd amber20_src/ | ||
+ | 994 cd build/ | ||
+ | 996 vi run_cmake | ||
+ | |||
+ | # Assume this is Linux: | ||
+ | |||
+ | # serial, do on head node, with miniconda true, compile, install | ||
+ | cmake $AMBER_PREFIX/ | ||
+ | -DCMAKE_INSTALL_PREFIX=/ | ||
+ | -DCOMPILER=GNU | ||
+ | -DMPI=FALSE -DCUDA=FALSE -DINSTALL_TESTS=TRUE \ | ||
+ | -DDOWNLOAD_MINICONDA=TRUE -DMINICONDA_USE_PY3=TRUE \ | ||
+ | 2>&1 | tee cmake.log | ||
+ | |||
+ | # Env | ||
+ | |||
+ | [hmeij@n100 ~]$ module load cuda/11.6 | ||
+ | |||
+ | [hmeij@n100 ~]$ echo $CUDA_HOME | ||
+ | / | ||
+ | |||
+ | [hmeij@n100 ~]$ which nvcc mpicc gcc | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | |||
+ | # [FIXED} cmake error on conda install, set to FALSE | ||
+ | # OS native python, install on n[100-101] | ||
+ | -- Python version 3.9 -- OK | ||
+ | -- Found PythonLibs: / | ||
+ | -- Checking for Python package numpy -- not found | ||
+ | -- Checking for Python package scipy -- not found | ||
+ | -- Checking for Python package matplotlib -- not found | ||
+ | -- Checking for Python package setuptools -- found | ||
+ | [END FIXED] | ||
+ | |||
+ | # mpi & cuda FALSE builds serial | ||
+ | ./run_cmake | ||
+ | make install | ||
+ | # lots and lots of warnings | ||
+ | |||
+ | # then | ||
+ | source / | ||
+ | |||
+ | # on n100 now, parallel, set miniconda flags to FALSE | ||
+ | -MPI=TRUE | ||
+ | ./run_cmake | ||
+ | make install | ||
+ | |||
+ | # on n100 just change cuda flag | ||
+ | -CUDA=TRUE | ||
+ | ./run_cmake | ||
+ | make install | ||
+ | |||
+ | #tests | ||
+ | cd $AMBERHOME | ||
+ | make test.serial | ||
+ | export DO_PARALLEL=" | ||
+ | make test.parallel | ||
+ | export CUDA_VISIBLE_DEVICES=0 | ||
+ | make test.cuda.serial | ||
+ | make test.cuda.parallel | ||
+ | |||
+ | </ | ||
+ | |||
+ | ==== Amber22 ==== | ||
+ | |||
+ | OpenHPC | ||
+ | |||
+ | < | ||
+ | |||
+ | # First **all the necessary packages ** (yum install...) | ||
+ | |||
+ | | ||
+ | 989 tar xvfj ../ | ||
+ | 993 cd amber20_src/ | ||
+ | 994 cd build/ | ||
+ | 996 vi run_cmake | ||
+ | |||
+ | # Assume this is Linux: | ||
+ | |||
+ | # serial, do on head node, with miniconda true, compile, install | ||
+ | cmake $AMBER_PREFIX/ | ||
+ | -DCMAKE_INSTALL_PREFIX=/ | ||
+ | -DCOMPILER=GNU | ||
+ | -DMPI=FALSE -DCUDA=FALSE -DINSTALL_TESTS=TRUE \ | ||
+ | -DDOWNLOAD_MINICONDA=TRUE \ | ||
+ | 2>&1 | tee cmake.log | ||
+ | ./run_cmake | ||
+ | make install | ||
+ | |||
+ | |||
+ | # Note !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | ||
+ | The OpenMPI and MPICH system installations provided by CentOS | ||
+ | (i.e., through yum install) | ||
+ | are known to be somehow incompatible with Amber22. | ||
+ | # OUCH !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | ||
+ | |||
+ | |||
+ | # GO TO node n100 | ||
+ | |||
+ | # install latest openmpi version | ||
+ | |||
+ | |||
+ | |||
+ | # Env | ||
+ | |||
+ | [hmeij@n100 ~]$ module load cuda/11.6 | ||
+ | |||
+ | [hmeij@n100 ~]$ echo $CUDA_HOME | ||
+ | / | ||
+ | |||
+ | [hmeij@n100 ~]$ which nvcc mpicc gcc | ||
+ | / | ||
+ | / | ||
+ | / | ||
+ | |||
+ | # [FIXED} cmake error on conda install, set to FALSE | ||
+ | # OS native python, install on n[100-101] | ||
+ | -- Python version 3.9 -- OK | ||
+ | -- Found PythonLibs: / | ||
+ | -- Checking for Python package numpy -- not found | ||
+ | -- Checking for Python package scipy -- not found | ||
+ | -- Checking for Python package matplotlib -- not found | ||
+ | -- Checking for Python package setuptools -- found | ||
+ | [END FIXED] | ||
+ | |||
+ | # mpi & cuda FALSE builds serial | ||
+ | ./run_cmake | ||
+ | make install | ||
+ | # lots and lots of warnings | ||
+ | |||
+ | # then | ||
+ | source / | ||
+ | |||
+ | # on n100 now, parallel, set miniconda flags to FALSE | ||
+ | -MPI=TRUE | ||
+ | ./run_cmake | ||
+ | make install | ||
+ | |||
+ | # on n100 just change cuda flag | ||
+ | -CUDA=TRUE | ||
+ | ./run_cmake | ||
+ | make install | ||
+ | |||
+ | #tests | ||
+ | cd $AMBERHOME | ||
+ | make test.serial | ||
+ | export DO_PARALLEL=" | ||
+ | make test.parallel | ||
+ | export CUDA_VISIBLE_DEVICES=0 | ||
+ | make test.cuda.serial | ||
+ | make test.cuda.parallel | ||
+ | |||
+ | </ | ||
+ | |||
+ | |||
+ | **[[cluster: | ||