Warning: Undefined array key "DOKU_PREFS" in /usr/share/dokuwiki/inc/common.php on line 2082
cluster:154 [DokuWiki]

User Tools

Site Tools


cluster:154

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
Last revision Both sides next revision
cluster:154 [2017/03/08 15:21]
hmeij07 [Deploy]
cluster:154 [2018/07/31 12:38]
hmeij07
Line 2: Line 2:
 **[[cluster:0|Back]]** **[[cluster:0|Back]]**
  
-==== OpenHPC ====+==== OpenHPC page 1====
  
   * install vanilla CentOS 7.2 on //master//   * install vanilla CentOS 7.2 on //master//
Line 10: Line 10:
  
 <code> <code>
 +
 +
 +[root@ohpc0-test ~]# systemctl stop NetworkManager                
 +[root@ohpc0-test ~]# systemctl disable NetworkManager 
  
 [root@ohpc0-test ~]# systemctl stop firewalld                 [root@ohpc0-test ~]# systemctl stop firewalld                
Line 20: Line 24:
 [root@ohpc0-test ~]# vi /etc/sysconfig/iptables [root@ohpc0-test ~]# vi /etc/sysconfig/iptables
  
-# lock up port 22: note "eth0+# lock up port 22: note "eth1
--A INPUT -i enp4s0 -p tcp -m state --state NEW -m tcp -s 129.133.0.0/16 --dport 22 -j ACCEPT+-A INPUT -i enp8s0 -p tcp -m state --state NEW -m tcp -s 129.133.0.0/16 --dport 22 -j ACCEPT
  
-# local allow: note "eth1+# local allow: note "eth0
--A INPUT -i enp8s0 -d 192.168.0.0/16 -p tcp --dport 0:65535 -j ACCEPT +-A INPUT -i enp4s0 -d 192.168.0.0/16 -p tcp --dport 0:65535 -j ACCEPT 
--A INPUT -i enp8s0 -d 192.168.0.0/16 -p udp --dport 0:65535 -j ACCEPT+-A INPUT -i enp4s0 -d 192.168.0.0/16 -p udp --dport 0:65535 -j ACCEPT
  
 [root@ohpc0-test ~]# vi /etc/sysconfig/ip6tables [root@ohpc0-test ~]# vi /etc/sysconfig/ip6tables
Line 50: Line 54:
 Chain OUTPUT (policy ACCEPT) Chain OUTPUT (policy ACCEPT)
 target     prot opt source               destination target     prot opt source               destination
 +
 +# copy global hpc /etc/hosts in place
 +# check hostname is on rpovisionng network
 +[root@ohpc0-test ~]# ping `hostname`
 +PING ohpc0-test (192.168.1.249) 56(84) bytes of data.
 +64 bytes from ohpc0-test (192.168.1.249): icmp_seq=1 ttl=64 time=0.043 ms
  
 [root@ohpc0-test ~]# reboot [root@ohpc0-test ~]# reboot
Line 102: Line 112:
 <code> <code>
    
-perl -pi -e "s/device = eth1/device = enp8s0/" /etc/warewulf/provision.conf+perl -pi -e "s/device = eth1/device = enp4s0/" /etc/warewulf/provision.conf
  
 perl -pi -e "s/^\s+disable\s+= yes/ disable = no /" /etc/xinetd.d/tftp perl -pi -e "s/^\s+disable\s+= yes/ disable = no /" /etc/xinetd.d/tftp
Line 112: Line 122:
 # -ni not -pi # -ni not -pi
 perl -ni -e "print unless /^\s+Order allow,deny/" /etc/httpd/conf.d/warewulf-httpd.conf perl -ni -e "print unless /^\s+Order allow,deny/" /etc/httpd/conf.d/warewulf-httpd.conf
 +
 +# the recipe does not set a mysql root password but we will
 +[root@ohpc0-test]# vi /etc/warewulf/database-root.conf 
 +
 +mysql> set password for 'root'@'localhost' = PASSWORD('some_string');
 +Query OK, 0 rows affected (0.00 sec)                                
 +
 +[root@ohcp0-test]# chmod o-r /etc/warewulf/database-root.conf 
  
 # restart/enable services # restart/enable services
Line 181: Line 199:
 # finally on **master** issue # finally on **master** issue
 perl -pi -e "s/ControlMachine=\S+/ControlMachine=ohpc0-test/" /etc/slurm/slurm.conf perl -pi -e "s/ControlMachine=\S+/ControlMachine=ohpc0-test/" /etc/slurm/slurm.conf
 +# this turned out to be wrong, first I change the hostname to ''ohpc0-slurm'' ControllerMachine=
 +# added line in /etc/hosts pointing this to 192.168.1.249, then I defined ''/etc/slurm/slurm.con''
 +
 +NodeName=ohpc0-slurm NodeAddr=192.168.1.249 CPUs=2 \
 +        RealMemory=8 Sockets=2 CoresPerSocket=4 ThreadsPerCore=2 State=UNKNOWN
 +NodeName=n29 NodeAddr=192.168.102.38 CPUs=2 \
 +        RealMemory=8 Sockets=2 CoresPerSocket=4 ThreadsPerCore=2 State=UNKNOWN
 +NodeName=n31 NodeAddr=192.168.102.40 CPUs=2 
 +        RealMemory=8 Sockets=2 CoresPerSocket=4 ThreadsPerCore=2 State=UNKNOWN
 +PartitionName=test Nodes=n29,n31 Default=YES MaxTime=INFINITE State=UP
 +
 +
 +
 +
 +
 echo "/home *(rw,no_subtree_check,fsid=10,no_root_squash)" >> /etc/exports echo "/home *(rw,no_subtree_check,fsid=10,no_root_squash)" >> /etc/exports
 echo "/opt/ohpc/pub *(ro,no_subtree_check,fsid=11)" >> /etc/exports echo "/opt/ohpc/pub *(ro,no_subtree_check,fsid=11)" >> /etc/exports
Line 195: Line 228:
 <code> <code>
  
-# Both are loaded in database+# Both are loaded in database; back up if production
 echo "drivers += updates/kernel" >> /etc/warewulf/bootstrap.conf echo "drivers += updates/kernel" >> /etc/warewulf/bootstrap.conf
 wwbootstrap `uname -r` wwbootstrap `uname -r`
Line 205: Line 238:
 wwsh -y file import /tmp/network.12501 --name network wwsh -y file import /tmp/network.12501 --name network
 wwsh -y file set network --path /etc/sysconfig/network --mode=0644 --uid=0  wwsh -y file set network --path /etc/sysconfig/network --mode=0644 --uid=0 
-wwsh -y node new ohpc0 --ipaddr=192.168.1.248 --hwaddr=00:15:C5:EF:08:61 -D enp8s0+wwsh -y node new ohpc0 --ipaddr=192.168.1.248 --hwaddr=00:15:C5:EF:08:5F -D enp4s0
  
 wwsh -y file import /etc/passwd wwsh -y file import /etc/passwd
Line 214: Line 247:
 wwsh -y provision set ohpc0 --vnfs=centos7.2 --bootstrap=`uname -r` \ wwsh -y provision set ohpc0 --vnfs=centos7.2 --bootstrap=`uname -r` \
      --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network      --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
 +
 +wwsh pxe update
 +wwsh dhcp update
 systemctl restart dhcpd systemctl restart dhcpd
 +systemctl restart httpd
 +
 +</code>
 +
 +  * Note: the next part is optional but I recommend it. Warewulf by defaults deploys ''stateless'' (in memory) but I'd rather deploy ''statefull'' in which VNFS is written to disk. The advantage is that if the node crashed, it will reboot without the help of the **master**, and Linux typically survives crashes. Actually I like the ''golden image'' even better, customize a node, then create the image. More details at [[cluster:144|Warewulf Golden Image]]
 +
 +<code>
 +
 +yum -y --installroot=/data/ohpc/images/centos7.2 install grub2
 +wwvnfs -y --chroot /data/ohpc/images/centos7.2
 +
 +wwsh -y object modify -s bootloader=sda -t node ohpc0
 +wwsh -y object modify -s diskpartition=sda -t node ohpc0
 +wwsh -y object modify -s diskformat=sda1,sda2,sda3 -t node ohpc0
 +wwsh -y object modify -s filesystems=\    "mountpoint=/boot:dev=sda1:type=ext3:size=500,\
 +                         dev=sda2:type=swap:size=32768,\
 +                         mountpoint=/:dev=sda3:type=ext3:size=fill" -t node ohpc0
 +wwsh -y object modify -s bootlocal=UNDEF -t node ohpc0 
 +
 wwsh pxe update wwsh pxe update
 +wwsh dhcp update
 +systemctl restart dhcpd
 +systemctl restart httpd
 +
 +
 +[root@ohpc0-test ~]# wwsh -y object print ohpc0 -p :all
 +#### node ohpc0 ###############################################################
 +       4: NAME       = ohpc0
 +       4: BOOTLOADER = sda
 +       4: BOOTLOCAL = UNDEF  
 +       4: BOOTSTRAPID = 1
 +       4: DISKFORMAT = sda1,sda2,sda3
 +       4: DISKPARTITION = sda
 +       4: FILEIDS    = 10,3,5,6,7,8,9
 +       4: FILESYSTEMS = dev=sda2:type=swap:size=32768,
 +                        mountpoint=/:dev=sda3:type=ext3:size=fill,
 +                        mountpoint=/boot:dev=sda1:type=ext3:size=500
 +       4: NETDEVS    = ObjectSet
 +            NETDEVS.enp4s0.NAME       = enp8s0
 +            NETDEVS.enp4s0.HWADDR     = 00:15:c5:ef:08:5F
 +            NETDEVS.enp4s0.IPADDR     = 192.168.1.248
 +       4: NODENAME   = ohpc0
 +       4: VNFSID     = 2
 +       
 +# Strange that netmask is not listed, but when I recreated the object I used
 +wwsh -y node new ohpc0 --ipaddr=192.168.1.248 --hwaddr=00:15:c5:ef:0c:bf \
 +     --netdev=enp4s0 --netmask=255.255.0.0  --network=255.255.0.0
 +
 +[root@ohpc0-test ~]# wwsh -y bootstrap list
 +BOOTSTRAP NAME            SIZE (M)
 +3.10.0-327.el7.x86_64     26.4
 +
 +[root@ohpc0-test ~]# wwsh -y vnfs list
 +VNFS NAME            SIZE (M) CHROOT LOCATION
 +centos7.2            278.2    /data/ohpc/images/centos7.2
  
 </code> </code>
 +
 +Final notes. I now have a 3 node OpenHPC cluster up using CentOS 7.3.1611 ... because of the edit mention above of the provision template URL the CHROOT is at the latest version of CentOS. Thus I updated my SMS master too so that the construct bootstrap=`uname -r` builds an image compatible between SMS and CHROOT.
 +
 +On towards testing the tools.
 +
 +Made a little script to recreate nodes as we'll do this often. And I also some weirdo situation where eth0/1 change NIC location during PXE boot, so I work around it.
 +
 +  * ''deploy.sh''
 +
 +<code>
 +
 +#!/bin/bash
 +# enable both NIC to boot from 501/500
 +# provision black on bottom, red on top (handler swithces to this)
 +# set bootlocal to EXIT reboot, handler exits
 +# switch black to top reboot (no media, fails to hdd)
 +# insane
 +
 +node=n31
 +ipaddr0=192.168.102.40
 +hwaddr0=1c:c1:de:19:40:6f
 +wwsh object delete $node -y 
 +wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0  --network=255.255.0.0 -y
 +wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
 +wwsh object modify -s bootloader=sda $node -y
 +wwsh object modify -s diskpartition=sda $node -y
 +wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
 +wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
 +wwsh provision set --bootlocal=UNDEF $node -y
 +
 +node=n31e
 +ipaddr0=192.168.102.40
 +hwaddr0=1c:c1:de:19:40:6e
 +wwsh object delete $node -y 
 +wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0  --network=255.255.0.0 -y
 +wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
 +wwsh object modify -s bootloader=sda $node -y
 +wwsh object modify -s diskpartition=sda $node -y
 +wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
 +wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
 +wwsh provision set --bootlocal=UNDEF $node -y
 +
 +node=n29
 +ipaddr0=192.168.102.38
 +hwaddr0=1c:c1:de:1c:88:c3
 +wwsh object delete $node -y 
 +wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0  --network=255.255.0.0 -y
 +wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
 +wwsh object modify -s bootloader=sda $node -y
 +wwsh object modify -s diskpartition=sda $node -y
 +wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
 +wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
 +wwsh provision set --bootlocal=UNDEF $node -y
 +
 +node=n29e
 +ipaddr0=192.168.102.38
 +hwaddr0=1c:c1:de:1c:88:c2
 +wwsh object delete $node -y 
 +wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0  --network=255.255.0.0 -y
 +wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
 +wwsh object modify -s bootloader=sda $node -y
 +wwsh object modify -s diskpartition=sda $node -y
 +wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
 +wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
 +wwsh provision set --bootlocal=UNDEF $node -y
 +
 +wwsh pxe update
 +wwsh dhcp update
 +systemctl restart dhcpd 
 +systemctl restart httpd 
 +echo "after first boot: wwsh provision set --bootlocal=EXIT $node"
 +
 +</code>
 +
 +page 1 - [[cluster:155|OpenHPC page 2]] - [[cluster:156|OpenHPC page 3]] - [[cluster:160|OpenHPC page 4]]
 \\ \\
 **[[cluster:0|Back]]** **[[cluster:0|Back]]**
cluster/154.txt · Last modified: 2018/08/17 08:48 by hmeij07