[root@ohpc0-test ~]# systemctl disable NetworkManager [root@ohpc0-test ~]# systemctl disable firewalld [root@ohpc0-test ~]# yum install iptables-services -y [root@ohpc0-test ~]# systemctl enable iptables [root@ohpc0-test ~]# vi /etc/sysconfig/iptables # lock up port 22: note "eth1" -A INPUT -p tcp -m state --state NEW -m tcp -s 129.133.0.0/16 --dport 22 -j ACCEPT # local allow: note "eth0" -A INPUT -d 192.168.0.0/16 -p tcp --dport 0:65535 -j ACCEPT -A INPUT -d 192.168.0.0/16 -p udp --dport 0:65535 -j ACCEPT [root@ohpc0-test ~]# reboot # check firwewall [root@ohpc0-test ~]# iptables -L Chain INPUT (policy ACCEPT) ... ACCEPT tcp -- 129.133.0.0/16 anywhere state NEW tcp dpt:ssh ACCEPT tcp -- anywhere 192.168.0.0/16 tcp ACCEPT udp -- anywhere 192.168.0.0/16 udp REJECT all -- anywhere anywhere reject-with icmp-host-prohibited # copy global hpc /etc/hosts in place # check hostname is on provisioning network [root@ohpc0-test ~]# ping `hostname` PING ohpc0-test (192.168.1.249) 56(84) bytes of data. 64 bytes from ohpc0-test (192.168.1.249): icmp_seq=1 ttl=64 time=0.043 ms
[root@ohpc0-test ~]# yum install http://build.openhpc.community/OpenHPC:/1.2/CentOS_7.2/x86_64/ohpc-release-1.2-1.x86_64.rpm Installed: ohpc-release.x86_64 0:1.2-1 Dependency Installed: epel-release.noarch 0:7-9 [root@ohpc0-test ~]# yum repolist repo id repo name OpenHPC OpenHPC-1.2 - Base OpenHPC-updates OpenHPC-1.2 - Updates base/7/x86_64 CentOS-7 - Base *epel/x86_64 Extra Packages for Enterprise Linux 7 - x86_64 extras/7/x86_64 CentOS-7 - Extras updates/7/x86_64 CentOS-7 - Updates
yum -y groupinstall ohpc-base yum -y groupinstall ohpc-warewulf # for openlava if we decide not slurm yum install tcl-devel systemctl enable ntpd.service systemctl start ntpd systemctl status ntpd yum -y groupinstall ohpc-slurm-server
perl -pi -e "s/device = eth1/device = enp4s0/" /etc/warewulf/provision.conf perl -pi -e "s/^\s+disable\s+= yes/ disable = no /" /etc/xinetd.d/tftp perl -pi -e "s/cgi-bin>\$/cgi-bin>\n Require all granted/" /etc/httpd/conf.d/warewulf-httpd.conf perl -pi -e "s/Allow from all/Require all granted/" /etc/httpd/conf.d/warewulf-httpd.conf # -ni not -pi perl -ni -e "print unless /^\s+Order allow,deny/" /etc/httpd/conf.d/warewulf-httpd.conf # the recipe does not set a mysql root password but we will [root@ohpc0-test]# vi /etc/warewulf/database-root.conf mysql> set password for 'root'@'localhost' = PASSWORD('some_string'); Query OK, 0 rows affected (0.00 sec) [root@ohcp0-test]# chmod o-r /etc/warewulf/database-root.conf # restart/enable services systemctl restart xinetd systemctl enable mariadb.service systemctl restart mariadb systemctl enable httpd.service systemctl restart httpd
# defined repo less /usr/libexec/warewulf/wwmkchroot/centos-7.tmpl # admin area ls -R /opt/ohpc/admin/ # use another disk for images mkdir /data mkfs.xfs -f /dev/sdb1 mount /data mkdir -p /data/ohpc/images/centos7.2 # this yields an error as 7.2.1511 is already deprecated wwmkchroot centos-7 /data/ohpc/images/centos7.2 "This directory (and version of CentOS) is deprecated. For normal users, you should use /7/ and not /7.2.1511/ in your path." # so we follow their advise and edit the repo destination s/7.2.1511/7/ vi /usr/libexec/warewulf/wwmkchroot/centos-7.tmpl # try again wwmkchroot centos-7 /data/ohpc/images/centos7.2 # ls /data/ohpc/images/centos7.2/ bin boot dev etc fastboot home lib lib64 media mnt opt proc root run sbin srv sys tmp usr var # du -hs /data/ohpc/images/centos7.2/ 490M /data/ohpc/images/centos7.2/
cp -p /etc/resolv.conf /data/ohpc/images/centos7.2/etc/ yum -y --installroot=/data/ohpc/images/centos7.2 groupinstall ohpc-slurm-client yum -y --installroot=/data/ohpc/images/centos7.2 install kernel yum -y --installroot=/data/ohpc/images/centos7.2 install ntp yum -y --installroot=/data/ohpc/images/centos7.2 install lmod-ohpc # pass on infiniband # if it does not exist on **master** issue command ''wwinit ssh'' cat ~/.ssh/cluster.pub cat ~/.ssh/cluster.pub >> /data/ohpc/images/centos7.2/root/.ssh/authorized_keys echo "192.168.1.249:/home /home nfs nfsvers=3,rsize=1024,wsize=1024,cto 0 0 " \ >> /data/ohpc/images/centos7.2/etc/fstab echo "192.168.1.249:/opt/ohpc/pub /opt/ohpc/pub nfs nfsvers=3 0 0 " \ >> /data/ohpc/images/centos7.2/etc/fstab chroot /data/ohpc/images/centos7.2 systemctl enable ntpd echo "server 192.168.1.249" >> /data/ohpc/images/centos7.2/etc/ntp.conf # finally on **master** issue perl -pi -e "s/ControlMachine=\S+/ControlMachine=ohpc0-test/" /etc/slurm/slurm.conf # this turned out to be wrong, first I change the hostname to ''ohpc0-slurm'' ControllerMachine= # added line in /etc/hosts pointing this to 192.168.1.249, then I defined ''/etc/slurm/slurm.con'' NodeName=ohpc0-slurm NodeAddr=192.168.1.249 CPUs=2 \ RealMemory=8 Sockets=2 CoresPerSocket=4 ThreadsPerCore=2 State=UNKNOWN NodeName=n29 NodeAddr=192.168.102.38 CPUs=2 \ RealMemory=8 Sockets=2 CoresPerSocket=4 ThreadsPerCore=2 State=UNKNOWN NodeName=n31 NodeAddr=192.168.102.40 CPUs=2 RealMemory=8 Sockets=2 CoresPerSocket=4 ThreadsPerCore=2 State=UNKNOWN PartitionName=test Nodes=n29,n31 Default=YES MaxTime=INFINITE State=UP echo "/home *(rw,no_subtree_check,fsid=10,no_root_squash)" >> /etc/exports echo "/opt/ohpc/pub *(ro,no_subtree_check,fsid=11)" >> /etc/exports exportfs -ra systemctl restart nfs systemctl enable nfs-server
# Both are loaded in database; back up if production echo "drivers += updates/kernel" >> /etc/warewulf/bootstrap.conf wwbootstrap `uname -r` # Bootstrap image '3.10.0-327.el7.x86_64' is ready wwvnfs -y --chroot /data/ohpc/images/centos7.2 # VNFS 'centos7.2' has been imported # Wrote a new configuration file at: /etc/warewulf/vnfs/centos7.2.conf wwsh -y file import /tmp/network.12501 --name network wwsh -y file set network --path /etc/sysconfig/network --mode=0644 --uid=0 wwsh -y node new ohpc0 --ipaddr=192.168.1.248 --hwaddr=00:15:C5:EF:08:5F -D enp4s0 wwsh -y file import /etc/passwd wwsh -y file import /etc/group wwsh -y file import /etc/shadow wwsh -y file import /etc/slurm/slurm.conf wwsh -y file import /etc/munge/munge.key wwsh -y provision set ohpc0 --vnfs=centos7.2 --bootstrap=`uname -r` \ --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network wwsh pxe update wwsh dhcp update systemctl restart dhcpd systemctl restart httpd
stateless
(in memory) but I'd rather deploy statefull
in which VNFS is written to disk. The advantage is that if the node crashed, it will reboot without the help of the master, and Linux typically survives crashes. Actually I like the golden image
even better, customize a node, then create the image. More details at Warewulf Golden Imageyum -y --installroot=/data/ohpc/images/centos7.2 install grub2 wwvnfs -y --chroot /data/ohpc/images/centos7.2 wwsh -y object modify -s bootloader=sda -t node ohpc0 wwsh -y object modify -s diskpartition=sda -t node ohpc0 wwsh -y object modify -s diskformat=sda1,sda2,sda3 -t node ohpc0 wwsh -y object modify -s filesystems=\ "mountpoint=/boot:dev=sda1:type=ext3:size=500,\ dev=sda2:type=swap:size=32768,\ mountpoint=/:dev=sda3:type=ext3:size=fill" -t node ohpc0 wwsh -y object modify -s bootlocal=UNDEF -t node ohpc0 wwsh pxe update wwsh dhcp update systemctl restart dhcpd systemctl restart httpd [root@ohpc0-test ~]# wwsh -y object print ohpc0 -p :all #### node ohpc0 ############################################################### 4: NAME = ohpc0 4: BOOTLOADER = sda 4: BOOTLOCAL = UNDEF 4: BOOTSTRAPID = 1 4: DISKFORMAT = sda1,sda2,sda3 4: DISKPARTITION = sda 4: FILEIDS = 10,3,5,6,7,8,9 4: FILESYSTEMS = dev=sda2:type=swap:size=32768, mountpoint=/:dev=sda3:type=ext3:size=fill, mountpoint=/boot:dev=sda1:type=ext3:size=500 4: NETDEVS = ObjectSet NETDEVS.enp4s0.NAME = enp8s0 NETDEVS.enp4s0.HWADDR = 00:15:c5:ef:08:5F NETDEVS.enp4s0.IPADDR = 192.168.1.248 4: NODENAME = ohpc0 4: VNFSID = 2 # Strange that netmask is not listed, but when I recreated the object I used wwsh -y node new ohpc0 --ipaddr=192.168.1.248 --hwaddr=00:15:c5:ef:0c:bf \ --netdev=enp4s0 --netmask=255.255.0.0 --network=255.255.0.0 [root@ohpc0-test ~]# wwsh -y bootstrap list BOOTSTRAP NAME SIZE (M) 3.10.0-327.el7.x86_64 26.4 [root@ohpc0-test ~]# wwsh -y vnfs list VNFS NAME SIZE (M) CHROOT LOCATION centos7.2 278.2 /data/ohpc/images/centos7.2
Final notes. I now have a 3 node OpenHPC cluster up using CentOS 7.3.1611 … because of the edit mention above of the provision template URL the CHROOT is at the latest version of CentOS. Thus I updated my SMS master too so that the construct bootstrap=`uname -r` builds an image compatible between SMS and CHROOT.
On towards testing the tools.
Made a little script to recreate nodes as we'll do this often. And I also some weirdo situation where eth0/1 change NIC location during PXE boot, so I work around it.
deploy.sh
#!/bin/bash # enable both NIC to boot from 501/500 # provision black on bottom, red on top (handler swithces to this) # set bootlocal to EXIT reboot, handler exits # switch black to top reboot (no media, fails to hdd) # insane node=n31 ipaddr0=192.168.102.40 hwaddr0=1c:c1:de:19:40:6f wwsh object delete $node -y wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0 --network=255.255.0.0 -y wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network wwsh object modify -s bootloader=sda $node -y wwsh object modify -s diskpartition=sda $node -y wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y wwsh provision set --bootlocal=UNDEF $node -y node=n31e ipaddr0=192.168.102.40 hwaddr0=1c:c1:de:19:40:6e wwsh object delete $node -y wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0 --network=255.255.0.0 -y wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network wwsh object modify -s bootloader=sda $node -y wwsh object modify -s diskpartition=sda $node -y wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y wwsh provision set --bootlocal=UNDEF $node -y node=n29 ipaddr0=192.168.102.38 hwaddr0=1c:c1:de:1c:88:c3 wwsh object delete $node -y wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0 --network=255.255.0.0 -y wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network wwsh object modify -s bootloader=sda $node -y wwsh object modify -s diskpartition=sda $node -y wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y wwsh provision set --bootlocal=UNDEF $node -y node=n29e ipaddr0=192.168.102.38 hwaddr0=1c:c1:de:1c:88:c2 wwsh object delete $node -y wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0 --network=255.255.0.0 -y wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network wwsh object modify -s bootloader=sda $node -y wwsh object modify -s diskpartition=sda $node -y wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y wwsh provision set --bootlocal=UNDEF $node -y wwsh pxe update wwsh dhcp update systemctl restart dhcpd systemctl restart httpd echo "after first boot: wwsh provision set --bootlocal=EXIT $node"
page 1 - OpenHPC page 2 - OpenHPC page 3 - OpenHPC page 4
Back