User Tools

Site Tools


cluster:213

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
cluster:213 [2022/07/25 09:51]
hmeij07 [Amber22]
cluster:213 [2024/01/12 10:09] (current)
hmeij07
Line 50: Line 50:
 cd /etc/sysconfig/network-scripts/ cd /etc/sysconfig/network-scripts/
 vi ifcfg-eth0 # 192.168.102.x vi ifcfg-eth0 # 192.168.102.x
-vi ifcfg-eth1 # 10.10.102.x+vi ifcfg-eth1 # 10.10.102.x   # 'uuidgen eth1' to get uuid
 vi ifcfg-eth3 # 129.133.52.x vi ifcfg-eth3 # 129.133.52.x
 +scp 192.168.102.112:/etc/rc.d/rc.local /etc/rc.d/  # check +x, edit ib0, start
 +
 systemctl restart network systemctl restart network
 ping -c 3 192.168.102.42 ping -c 3 192.168.102.42
Line 64: Line 66:
 systemctl start iptables # and enable systemctl start iptables # and enable
 iptables -L iptables -L
-systemctl stop firewalld 
-systemctl disable firewalld 
  
  
Line 73: Line 73:
 iptables -L # check! iptables -L # check!
  
-# Rocky8 +
-# https://docs.fedoraproject.org/en-US/epel/#Quickstart +
-dnf config-manager --set-enabled powertools +
-dnf install epel-release +
-dnf install netcdf netcdf-devel +
-dnf install yum-utils # yumdownloader +
-dnf install ddd  +
-dnf install grace +
-dnf install gnuplot +
-dnf install alpine # pico +
-yum groupinstall "Server" # server for compute nodes "Server with GUI"+
  
 # other configs # other configs
 vi /etc/selinux/config # disabled, do not mistype, kernel will not boot! vi /etc/selinux/config # disabled, do not mistype, kernel will not boot!
 mv /home /usr/local/ mv /home /usr/local/
 +cd /;ln -s /usr/local/home 
 +cd /; ln -s /home /share
 vi /etc/passwd (exx, dockeruser $HOME) vi /etc/passwd (exx, dockeruser $HOME)
  
-## edit passwd, shadow, group, hosts files ## 
-## make -orig backups and stage in /home/tmp/global 
-## cottontail2 = greentail52 sections 
  
 +#exx96
 mkdir /sanscratch /home/localscratch mkdir /sanscratch /home/localscratch
 chmod ugo+rwx /sanscratch /home/localscratch chmod ugo+rwx /sanscratch /home/localscratch
 chmod o+t /sanscratch /home/localscratch  chmod o+t /sanscratch /home/localscratch 
 +# exx96
 # link localscratch in 1.4T /home to / # link localscratch in 1.4T /home to /
-mkdir /home  + 
-cd /home # local dir+cd /home 
 ln -s /zfshomes/apps ln -s /zfshomes/apps
 ln -s /zfshomes/tmp ln -s /zfshomes/tmp
 ln -s /zfshomes/csmith06 ln -s /zfshomes/csmith06
-ln -s /zfshomes /share+ls -l 
 + 
 +cat /sanscratch/tmp/fstab.tmp >> /etc/fstab; mkdir /astrostore; mount -a; df -h; cd /smithlab/;  ln -s /smithlab/home/opt/rhel08 opt; ls -l
  
 # fstab file mounts # fstab file mounts
 +mkdir -p /zfshomes /home66 /home33 /mindstore /opt/ohpc/pub /opt/intel
 +mkdir -p /smithlab/home;cd /smithlab;ln -s /smithlab/home/opt/rhel08 opt; ls -l
 # cottontail2 = greentail52 # cottontail2 = greentail52
 # n100-n101 = n79 # n100-n101 = n79
  
-# postfix 
-dnf install postfix 
-dnf install mailx 
-systemctl enable postfix 
-echo "relayhost = 192.168.102.251" >> /etc/postfix/main.cf 
  
 # on head node /etc/chronyc.conf # on head node /etc/chronyc.conf
Line 122: Line 112:
 Server 192.168.102.251 Server 192.168.102.251
 # check # check
 +systemctl restart chronyd
 chronyc sources chronyc sources
 +
 +# Rocky8
 +# https://docs.fedoraproject.org/en-US/epel/#Quickstart
 +dnf config-manager --set-enabled powertools -y
 +dnf install epel-release -y
 +dnf install netcdf netcdf-devel -y
 +dnf install yum-utils # yumdownloader -y
 +dnf install ddd grace gnuplot alpine -y # pico
 +
 +yum groupinstall "Server" # server for compute nodes "Server with GUI"
  
  
 # on head node install from epel repo # on head node install from epel repo
-yum install slurm-openlava+### yum install slurm-openlava
 # error on conflicting libs, too bad! # error on conflicting libs, too bad!
  
Line 133: Line 134:
 yum install epel-release -y yum install epel-release -y
 yum install flex bison -y  yum install flex bison -y 
-yum install tcl tcl-devel dmtcp -y+yum install tcl tcl-devel dmtcp dmtcp-devel -y
 yum install net-snmp net-snmp-libs net-tools net-snmp-utils -y yum install net-snmp net-snmp-libs net-tools net-snmp-utils -y
 yum install freeglut-devel libXi-devel libXmu-devel -y yum install freeglut-devel libXi-devel libXmu-devel -y
Line 168: Line 169:
 # compute nodes old level 3 # compute nodes old level 3
 systemctl set-default multi-user.target systemctl set-default multi-user.target
 +
 +
 +# postfix
 +dnf install postfix
 +dnf install mailx
 +systemctl enable postfix
 +echo "relayhost = 192.168.102.251" >> /etc/postfix/main.cf
 +
 +
 +# edit /etc/snmp/snmpd.conf, enable and start
 +rocommunity public
 +dontLogTCPWrappersConnects yes
 +# enable, start, add to zenoss 
 +
  
 # compute nodes only # compute nodes only
Line 198: Line 213:
 # obabel chem file formats # obabel chem file formats
 yum install openbabel openbabel-devel yum install openbabel openbabel-devel
-# dmtcp 
-yum install dmtcp dmtcp-devel 
  
-# check status of service munge+ 
  
 yum clean all yum clean all
 # eth3 onboot=no, private networks only # eth3 onboot=no, private networks only
 systemctl disable iptables systemctl disable iptables
-reboot+
  
 # now make it an ohpc compute node # now make it an ohpc compute node
 +# DO THIS on compute nodes BEFORE mounting ctt2:/opt
 +# pulls in newer version, potentail problem later on
   yum repolist   yum repolist
-  yum  install ohpc-base-compute+  rpm -ivh ohpc-release-2-1.el8.x86_64.rpm  
 +  yum install singularity-ohpc 
 +  yum  install ohpc-base-compute --nobest 
 +    yum  install ohpc-slurm-client
      
   scp cottontail2:/etc/resolv.conf /etc/resolv.conf   scp cottontail2:/etc/resolv.conf /etc/resolv.conf
-  yum  install ohpc-slurm-client+ 
 +   
 +  # check status of service munge 
 +  rpm -ivh /sanscratch/tmp/rpms/munge-devel-0.5.13-2.el8.x86_64.rpm
   systemctl enable munge   systemctl enable munge
   systemctl start munge   systemctl start munge
Line 227: Line 249:
   scp cottontail2:/etc/slurm/gres.conf /etc/slurm/gres.conf   scp cottontail2:/etc/slurm/gres.conf /etc/slurm/gres.conf
   scp cottontail2:/etc/profile.d/lmod.sh /etc/profile.d/   scp cottontail2:/etc/profile.d/lmod.sh /etc/profile.d/
 +  
 +# /etc/bashrc add
 +# ohpc lmod gcc mpicc
 +export PATH=/usr/local/slurm/bin:$PATH
 +export LD_LIBRARY_PATH=/usr/local/slurm/lib:$LD_LIBRARY_PATH
 +
      
 # /var/[log|spool|run] need to be removed from # /var/[log|spool|run] need to be removed from
Line 234: Line 262:
   /usr/sbin/slurmd -D    /usr/sbin/slurmd -D 
      
-# start via rc.local +# start via rc.local (already copied) 
-chmod +x /etc/rc.d/rc.local+#chmod +x /etc/rc.d/rc.local
 #timing issue with munge #timing issue with munge
-sleep 15 +#sleep 15 
-/usr/sbin/slurmd+#/usr/sbin/slurmd 
 + 
 +systemctl stop firewalld 
 +systemctl disable firewalld 
 + 
 +systemctl  disable dnf-makecache.timer 
 +systemctl stop dnf-makecache.timer 
 + 
 + mv /etc/issue.d/cockpit.issue /root/etc_issue.d_cockpit.issue 
 + mv /etc/motd.d/cockpit /root/etc_motd.d_cockpit 
 + 
 + 
 +## edit passwd, shadow, group, hosts files ## 
 +## make -orig backups and stage in /home/tmp/global 
 +## cottontail2 = greentail52 sections 
 +chown -R munge:munge /etc/munge /var/log/munge /var/lib/munge /var/run/munge 
 +chown -R slurm:munge /var/log/slurm /var/spool/slurm 
 + 
      
 # slurmd ??? # slurmd ???
  libhwloc.so.15 => /opt/ohpc/pub/libs/hwloc/lib/libhwloc.so.15 (0x00007fd6e5684000)  libhwloc.so.15 => /opt/ohpc/pub/libs/hwloc/lib/libhwloc.so.15 (0x00007fd6e5684000)
  
-add to zenoss edit /etc/snmp/snmpd.conf, enable and start +crontab 
-rocommunity public + 
-dontLogTCPWrappersConnects yes+# ionice gaussian 
 +0,15,30,45 * * * * /share/apps/scripts/ionice_lexes.sh  > /dev/null 2>&
 + 
 +# cpu temps 
 +40 * * * * /share/apps/scripts/lm_sensors.sh > /dev/null 2>&
 + 
 +on compute node /etc/security/limits.conf 
 +*                -       memlock         270039400 
 + 
 + 
 +# file date_ctt2.sh 
 + 
 +# ctt /etc/pdsh 
 + 
 +# ctt:/root/scripts 
 + 
 +# ctt2:/usr/local/bin/rslurm2022.sh
  
 </code> </code>
Line 372: Line 434:
 # GO TO node n100 # GO TO node n100
  
-# install latest openmpi version +# copy head node'amber22_src/ to n100:/usr/local/src/tmp/
-# copy head node'amber22_source/ to n100+
  
  
 +source /share/apps/CENTOS8/ohpc/software/amber/22/amber.sh
 +echo $AMBERHOME
  
 +# install latest openmpi version
 +cd amber_src/Ambertools/src
 +tar xvfj ../../../../openmpi-4.1.4.tar.bz2 
  
-Env+./configure_openmpi gnu openhpc gcc/gfortran 
  
-[hmeij@n100 ~]$ module load cuda/11.6 
  
-[hmeij@n100 ~]$ echo $CUDA_HOME +# on n100 now, parallel, set  
-/usr/local/cuda +-MPI=TRUE 
- +-DDOWNLOAD_MINICONDA=FALSE
-[hmeij@n100 ~]$ which nvcc mpicc gcc +
-/usr/local/cuda/bin/nvcc +
-/opt/ohpc/pub/mpi/openmpi4-gnu9/4.1.1/bin/mpicc +
-/opt/ohpc/pub/compiler/gcc/9.4.0/bin/gcc +
- +
-[FIXED} cmake error on conda install, set to FALSE +
-# OS native python, install on n[100-101] +
--- Python version 3.9 -- OK +
--- Found PythonLibs: /usr/lib64/libpython3.9.so (found version "3.9.6")  +
--- Checking for Python package numpy -- not found +
--- Checking for Python package scipy -- not found +
--- Checking for Python package matplotlib -- not found +
--- Checking for Python package setuptools -- found +
-[END FIXED] +
- +
-# mpi & cuda FALSE builds serial+
 ./run_cmake ./run_cmake
 make install make install
-# lots and lots of warnings 
  
-then +on n100 just change cuda flag 
-source /share/apps/CENTOS8/ohpc/software/amber/20/amber.sh+ 
 +[hmeij@n100 build]$ module load cuda/11.6 
 +[hmeij@n100 build]$ which gcc mpicc nvcc 
 +/opt/ohpc/pub/compiler/gcc/9.4.0/bin/gcc 
 +/share/apps/CENTOS8/ohpc/software/amber/22/bin/mpicc 
 +/usr/local/cuda/bin/nvcc 
 +[hmeij@n100 ~]$ echo $CUDA_HOME 
 +/usr/local/cuda
  
-# on n100 now, parallel, set miniconda flags to FALSE 
 -MPI=TRUE -MPI=TRUE
 +-CUDA=TRUE
 +-DDOWNLOAD_MINICONDA=FALSE
 ./run_cmake ./run_cmake
 make install make install
  
-# on n100 just change cuda flag + 
--CUDA=TRUE +[hmeij@n100 ~]$ which nvcc mpicc gcc 
-./run_cmake +/usr/local/cuda/bin/nvcc 
-make install+/opt/ohpc/pub/mpi/openmpi4-gnu9/4.1.1/bin/mpicc 
 +/opt/ohpc/pub/compiler/gcc/9.4.0/bin/gcc
  
 #tests #tests
cluster/213.1658757063.txt.gz · Last modified: 2022/07/25 09:51 by hmeij07