Table of Contents


Back

OpenHPC page 1

             
[root@ohpc0-test ~]# systemctl disable NetworkManager 
             
[root@ohpc0-test ~]# systemctl disable firewalld         
     
[root@ohpc0-test ~]#  yum install iptables-services -y   
                       
[root@ohpc0-test ~]# systemctl enable iptables

[root@ohpc0-test ~]# vi /etc/sysconfig/iptables

# lock up port 22: note "eth1"
-A INPUT -p tcp -m state --state NEW -m tcp -s 129.133.0.0/16 --dport 22 -j ACCEPT

# local allow: note "eth0"
-A INPUT -d 192.168.0.0/16 -p tcp --dport 0:65535 -j ACCEPT
-A INPUT -d 192.168.0.0/16 -p udp --dport 0:65535 -j ACCEPT

[root@ohpc0-test ~]# reboot

# check firwewall
[root@ohpc0-test ~]# iptables -L
Chain INPUT (policy ACCEPT)
...
ACCEPT     tcp  --  129.133.0.0/16       anywhere             state NEW tcp dpt:ssh
ACCEPT     tcp  --  anywhere             192.168.0.0/16       tcp
ACCEPT     udp  --  anywhere             192.168.0.0/16       udp
REJECT     all  --  anywhere             anywhere             reject-with icmp-host-prohibited

# copy global hpc /etc/hosts in place
# check hostname is on provisioning network
[root@ohpc0-test ~]# ping `hostname`
PING ohpc0-test (192.168.1.249) 56(84) bytes of data.
64 bytes from ohpc0-test (192.168.1.249): icmp_seq=1 ttl=64 time=0.043 ms
[root@ohpc0-test ~]# yum install http://build.openhpc.community/OpenHPC:/1.2/CentOS_7.2/x86_64/ohpc-release-1.2-1.x86_64.rpm

Installed:
  ohpc-release.x86_64 0:1.2-1
Dependency Installed:
  epel-release.noarch 0:7-9

[root@ohpc0-test ~]# yum repolist
repo id                       repo name
OpenHPC                       OpenHPC-1.2 - Base
OpenHPC-updates               OpenHPC-1.2 - Updates
base/7/x86_64                 CentOS-7 - Base
*epel/x86_64                  Extra Packages for Enterprise Linux 7 - x86_64
extras/7/x86_64               CentOS-7 - Extras
updates/7/x86_64              CentOS-7 - Updates
 yum -y groupinstall ohpc-base
 yum -y groupinstall ohpc-warewulf
# for openlava if we decide not slurm
 yum install tcl-devel

 systemctl enable ntpd.service
 systemctl start ntpd
 systemctl status ntpd

 yum -y groupinstall ohpc-slurm-server

Configure

 
perl -pi -e "s/device = eth1/device = enp4s0/" /etc/warewulf/provision.conf

perl -pi -e "s/^\s+disable\s+= yes/ disable = no /" /etc/xinetd.d/tftp
   
perl -pi -e "s/cgi-bin>\$/cgi-bin>\n Require all granted/" /etc/httpd/conf.d/warewulf-httpd.conf
perl -pi -e "s/Allow from all/Require all granted/" /etc/httpd/conf.d/warewulf-httpd.conf

  
# -ni not -pi
perl -ni -e "print unless /^\s+Order allow,deny/" /etc/httpd/conf.d/warewulf-httpd.conf

# the recipe does not set a mysql root password but we will
[root@ohpc0-test]# vi /etc/warewulf/database-root.conf 

mysql> set password for 'root'@'localhost' = PASSWORD('some_string');
Query OK, 0 rows affected (0.00 sec)                                

[root@ohcp0-test]# chmod o-r /etc/warewulf/database-root.conf 

# restart/enable services
systemctl restart xinetd
systemctl enable mariadb.service
systemctl restart mariadb
systemctl enable httpd.service
systemctl restart httpd

# defined repo
less /usr/libexec/warewulf/wwmkchroot/centos-7.tmpl

# admin area
ls -R /opt/ohpc/admin/

# use another disk for images
mkdir /data
mkfs.xfs -f /dev/sdb1
mount /data
mkdir -p /data/ohpc/images/centos7.2

# this yields an error as 7.2.1511 is already deprecated
wwmkchroot centos-7 /data/ohpc/images/centos7.2
"This directory (and version of CentOS) is deprecated.  For normal users,
you should use /7/ and not /7.2.1511/ in your path."
# so we follow their advise and edit the repo destination s/7.2.1511/7/
vi /usr/libexec/warewulf/wwmkchroot/centos-7.tmpl

# try again
wwmkchroot centos-7 /data/ohpc/images/centos7.2
# ls /data/ohpc/images/centos7.2/
bin  boot  dev  etc  fastboot  home  lib  lib64  media  mnt  opt  
proc  root  run  sbin  srv  sys  tmp  usr  var
# du -hs /data/ohpc/images/centos7.2/
490M    /data/ohpc/images/centos7.2/
cp -p /etc/resolv.conf /data/ohpc/images/centos7.2/etc/
yum -y --installroot=/data/ohpc/images/centos7.2 groupinstall ohpc-slurm-client
yum -y --installroot=/data/ohpc/images/centos7.2 install kernel
yum -y --installroot=/data/ohpc/images/centos7.2 install ntp
yum -y --installroot=/data/ohpc/images/centos7.2 install lmod-ohpc
# pass on infiniband

# if it does not exist on **master** issue command ''wwinit ssh''
cat ~/.ssh/cluster.pub
cat ~/.ssh/cluster.pub >> /data/ohpc/images/centos7.2/root/.ssh/authorized_keys

echo "192.168.1.249:/home /home nfs nfsvers=3,rsize=1024,wsize=1024,cto 0 0 " \
      >> /data/ohpc/images/centos7.2/etc/fstab
echo "192.168.1.249:/opt/ohpc/pub /opt/ohpc/pub nfs nfsvers=3 0 0 " \
      >> /data/ohpc/images/centos7.2/etc/fstab

chroot /data/ohpc/images/centos7.2 systemctl enable ntpd
echo "server 192.168.1.249" >> /data/ohpc/images/centos7.2/etc/ntp.conf

# finally on **master** issue
perl -pi -e "s/ControlMachine=\S+/ControlMachine=ohpc0-test/" /etc/slurm/slurm.conf
# this turned out to be wrong, first I change the hostname to ''ohpc0-slurm'' ControllerMachine=
# added line in /etc/hosts pointing this to 192.168.1.249, then I defined ''/etc/slurm/slurm.con''

NodeName=ohpc0-slurm NodeAddr=192.168.1.249 CPUs=2 \
        RealMemory=8 Sockets=2 CoresPerSocket=4 ThreadsPerCore=2 State=UNKNOWN
NodeName=n29 NodeAddr=192.168.102.38 CPUs=2 \
        RealMemory=8 Sockets=2 CoresPerSocket=4 ThreadsPerCore=2 State=UNKNOWN
NodeName=n31 NodeAddr=192.168.102.40 CPUs=2 
        RealMemory=8 Sockets=2 CoresPerSocket=4 ThreadsPerCore=2 State=UNKNOWN
PartitionName=test Nodes=n29,n31 Default=YES MaxTime=INFINITE State=UP





echo "/home *(rw,no_subtree_check,fsid=10,no_root_squash)" >> /etc/exports
echo "/opt/ohpc/pub *(ro,no_subtree_check,fsid=11)" >> /etc/exports
exportfs -ra
systemctl restart nfs
systemctl enable nfs-server

Deploy

# Both are loaded in database; back up if production
echo "drivers += updates/kernel" >> /etc/warewulf/bootstrap.conf
wwbootstrap `uname -r`
# Bootstrap image '3.10.0-327.el7.x86_64' is ready
wwvnfs -y --chroot /data/ohpc/images/centos7.2
# VNFS 'centos7.2' has been imported
# Wrote a new configuration file at: /etc/warewulf/vnfs/centos7.2.conf

wwsh -y file import /tmp/network.12501 --name network
wwsh -y file set network --path /etc/sysconfig/network --mode=0644 --uid=0 
wwsh -y node new ohpc0 --ipaddr=192.168.1.248 --hwaddr=00:15:C5:EF:08:5F -D enp4s0

wwsh -y file import /etc/passwd
wwsh -y file import /etc/group
wwsh -y file import /etc/shadow
wwsh -y file import /etc/slurm/slurm.conf
wwsh -y file import /etc/munge/munge.key
wwsh -y provision set ohpc0 --vnfs=centos7.2 --bootstrap=`uname -r` \
     --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network

wwsh pxe update
wwsh dhcp update
systemctl restart dhcpd
systemctl restart httpd
yum -y --installroot=/data/ohpc/images/centos7.2 install grub2
wwvnfs -y --chroot /data/ohpc/images/centos7.2

wwsh -y object modify -s bootloader=sda -t node ohpc0
wwsh -y object modify -s diskpartition=sda -t node ohpc0
wwsh -y object modify -s diskformat=sda1,sda2,sda3 -t node ohpc0
wwsh -y object modify -s filesystems=\    "mountpoint=/boot:dev=sda1:type=ext3:size=500,\
                         dev=sda2:type=swap:size=32768,\
                         mountpoint=/:dev=sda3:type=ext3:size=fill" -t node ohpc0
wwsh -y object modify -s bootlocal=UNDEF -t node ohpc0 

wwsh pxe update
wwsh dhcp update
systemctl restart dhcpd
systemctl restart httpd


[root@ohpc0-test ~]# wwsh -y object print ohpc0 -p :all
#### node ohpc0 ###############################################################
       4: NAME       = ohpc0
       4: BOOTLOADER = sda
       4: BOOTLOCAL = UNDEF  
       4: BOOTSTRAPID = 1
       4: DISKFORMAT = sda1,sda2,sda3
       4: DISKPARTITION = sda
       4: FILEIDS    = 10,3,5,6,7,8,9
       4: FILESYSTEMS = dev=sda2:type=swap:size=32768,
                        mountpoint=/:dev=sda3:type=ext3:size=fill,
                        mountpoint=/boot:dev=sda1:type=ext3:size=500
       4: NETDEVS    = ObjectSet
            NETDEVS.enp4s0.NAME       = enp8s0
            NETDEVS.enp4s0.HWADDR     = 00:15:c5:ef:08:5F
            NETDEVS.enp4s0.IPADDR     = 192.168.1.248
       4: NODENAME   = ohpc0
       4: VNFSID     = 2
       
# Strange that netmask is not listed, but when I recreated the object I used
wwsh -y node new ohpc0 --ipaddr=192.168.1.248 --hwaddr=00:15:c5:ef:0c:bf \
     --netdev=enp4s0 --netmask=255.255.0.0  --network=255.255.0.0

[root@ohpc0-test ~]# wwsh -y bootstrap list
BOOTSTRAP NAME            SIZE (M)
3.10.0-327.el7.x86_64     26.4

[root@ohpc0-test ~]# wwsh -y vnfs list
VNFS NAME            SIZE (M) CHROOT LOCATION
centos7.2            278.2    /data/ohpc/images/centos7.2

Final notes. I now have a 3 node OpenHPC cluster up using CentOS 7.3.1611 … because of the edit mention above of the provision template URL the CHROOT is at the latest version of CentOS. Thus I updated my SMS master too so that the construct bootstrap=`uname -r` builds an image compatible between SMS and CHROOT.

On towards testing the tools.

Made a little script to recreate nodes as we'll do this often. And I also some weirdo situation where eth0/1 change NIC location during PXE boot, so I work around it.

#!/bin/bash
# enable both NIC to boot from 501/500
# provision black on bottom, red on top (handler swithces to this)
# set bootlocal to EXIT reboot, handler exits
# switch black to top reboot (no media, fails to hdd)
# insane

node=n31
ipaddr0=192.168.102.40
hwaddr0=1c:c1:de:19:40:6f
wwsh object delete $node -y 
wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0  --network=255.255.0.0 -y
wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
wwsh object modify -s bootloader=sda $node -y
wwsh object modify -s diskpartition=sda $node -y
wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
wwsh provision set --bootlocal=UNDEF $node -y

node=n31e
ipaddr0=192.168.102.40
hwaddr0=1c:c1:de:19:40:6e
wwsh object delete $node -y 
wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0  --network=255.255.0.0 -y
wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
wwsh object modify -s bootloader=sda $node -y
wwsh object modify -s diskpartition=sda $node -y
wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
wwsh provision set --bootlocal=UNDEF $node -y

node=n29
ipaddr0=192.168.102.38
hwaddr0=1c:c1:de:1c:88:c3
wwsh object delete $node -y 
wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0  --network=255.255.0.0 -y
wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
wwsh object modify -s bootloader=sda $node -y
wwsh object modify -s diskpartition=sda $node -y
wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
wwsh provision set --bootlocal=UNDEF $node -y

node=n29e
ipaddr0=192.168.102.38
hwaddr0=1c:c1:de:1c:88:c2
wwsh object delete $node -y 
wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0  --network=255.255.0.0 -y
wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
wwsh object modify -s bootloader=sda $node -y
wwsh object modify -s diskpartition=sda $node -y
wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
wwsh provision set --bootlocal=UNDEF $node -y

wwsh pxe update
wwsh dhcp update
systemctl restart dhcpd 
systemctl restart httpd 
echo "after first boot: wwsh provision set --bootlocal=EXIT $node"

page 1 - OpenHPC page 2 - OpenHPC page 3 - OpenHPC page 4
Back