User Tools

Site Tools


cluster:154

Warning: Undefined array key -1 in /usr/share/dokuwiki/inc/html.php on line 1458

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
cluster:154 [2017/03/08 15:21]
hmeij07 [Deploy]
cluster:154 [2018/08/17 08:48] (current)
hmeij07
Line 2: Line 2:
 **[[cluster:0|Back]]** **[[cluster:0|Back]]**
  
-==== OpenHPC ====+==== OpenHPC page 1====
  
   * install vanilla CentOS 7.2 on //master//   * install vanilla CentOS 7.2 on //master//
Line 10: Line 10:
  
 <code> <code>
- +              
-[root@ohpc0-test ~]# systemctl stop firewalld                +[root@ohpc0-test ~]# systemctl disable NetworkManager  
 +             
 [root@ohpc0-test ~]# systemctl disable firewalld          [root@ohpc0-test ~]# systemctl disable firewalld         
            
-[root@ohpc0-test ~]#  yum install iptables-services -y                          +[root@ohpc0-test ~]#  yum install iptables-services -y    
 +                       
 [root@ohpc0-test ~]# systemctl enable iptables [root@ohpc0-test ~]# systemctl enable iptables
-[root@ohpc0-test ~]# systemctl enable ip6tables 
  
 [root@ohpc0-test ~]# vi /etc/sysconfig/iptables [root@ohpc0-test ~]# vi /etc/sysconfig/iptables
  
-# lock up port 22: note "eth0+# lock up port 22: note "eth1
--A INPUT -i enp4s0 -p tcp -m state --state NEW -m tcp -s 129.133.0.0/16 --dport 22 -j ACCEPT+-A INPUT -p tcp -m state --state NEW -m tcp -s 129.133.0.0/16 --dport 22 -j ACCEPT
  
-# local allow: note "eth1+# local allow: note "eth0
--A INPUT -i enp8s0 -d 192.168.0.0/16 -p tcp --dport 0:65535 -j ACCEPT +-A INPUT -d 192.168.0.0/16 -p tcp --dport 0:65535 -j ACCEPT 
--A INPUT -i enp8s0 -d 192.168.0.0/16 -p udp --dport 0:65535 -j ACCEPT+-A INPUT -d 192.168.0.0/16 -p udp --dport 0:65535 -j ACCEPT
  
-[root@ohpc0-test ~]# vi /etc/sysconfig/ip6tables+[root@ohpc0-test ~]# reboot
  
-comment out port 22 +check firwewall
- +
-[root@ohpc0-test ~]# systemctl restart iptables +
-[root@ohpc0-test ~]# systemctl restart ip6tables+
 [root@ohpc0-test ~]# iptables -L [root@ohpc0-test ~]# iptables -L
 Chain INPUT (policy ACCEPT) Chain INPUT (policy ACCEPT)
-target     prot opt source               destination +...
-ACCEPT     all  --  anywhere             anywhere             state RELATED,ESTABLISHED +
-ACCEPT     icmp --  anywhere             anywhere +
-ACCEPT     all  --  anywhere             anywhere+
 ACCEPT     tcp  --  129.133.0.0/16       anywhere             state NEW tcp dpt:ssh ACCEPT     tcp  --  129.133.0.0/16       anywhere             state NEW tcp dpt:ssh
 ACCEPT     tcp  --  anywhere             192.168.0.0/16       tcp ACCEPT     tcp  --  anywhere             192.168.0.0/16       tcp
Line 44: Line 39:
 REJECT     all  --  anywhere             anywhere             reject-with icmp-host-prohibited REJECT     all  --  anywhere             anywhere             reject-with icmp-host-prohibited
  
-Chain FORWARD (policy ACCEPT) +# copy global hpc /etc/hosts in place 
-target     prot opt source               destination +# check hostname is on provisioning network 
-REJECT     all  --  anywhere             anywhere             reject-with icmp-host-prohibited +[root@ohpc0-test ~]# ping `hostname` 
- +PING ohpc0-test (192.168.1.249) 56(84) bytes of data. 
-Chain OUTPUT (policy ACCEPT) +64 bytes from ohpc0-test (192.168.1.249): icmp_seq=1 ttl=64 time=0.043 ms
-target     prot opt source               destination +
- +
-[root@ohpc0-test ~]# reboot+
  
 </code> </code>
Line 102: Line 94:
 <code> <code>
    
-perl -pi -e "s/device = eth1/device = enp8s0/" /etc/warewulf/provision.conf+perl -pi -e "s/device = eth1/device = enp4s0/" /etc/warewulf/provision.conf
  
 perl -pi -e "s/^\s+disable\s+= yes/ disable = no /" /etc/xinetd.d/tftp perl -pi -e "s/^\s+disable\s+= yes/ disable = no /" /etc/xinetd.d/tftp
Line 112: Line 104:
 # -ni not -pi # -ni not -pi
 perl -ni -e "print unless /^\s+Order allow,deny/" /etc/httpd/conf.d/warewulf-httpd.conf perl -ni -e "print unless /^\s+Order allow,deny/" /etc/httpd/conf.d/warewulf-httpd.conf
 +
 +# the recipe does not set a mysql root password but we will
 +[root@ohpc0-test]# vi /etc/warewulf/database-root.conf 
 +
 +mysql> set password for 'root'@'localhost' = PASSWORD('some_string');
 +Query OK, 0 rows affected (0.00 sec)                                
 +
 +[root@ohcp0-test]# chmod o-r /etc/warewulf/database-root.conf 
  
 # restart/enable services # restart/enable services
Line 181: Line 181:
 # finally on **master** issue # finally on **master** issue
 perl -pi -e "s/ControlMachine=\S+/ControlMachine=ohpc0-test/" /etc/slurm/slurm.conf perl -pi -e "s/ControlMachine=\S+/ControlMachine=ohpc0-test/" /etc/slurm/slurm.conf
 +# this turned out to be wrong, first I change the hostname to ''ohpc0-slurm'' ControllerMachine=
 +# added line in /etc/hosts pointing this to 192.168.1.249, then I defined ''/etc/slurm/slurm.con''
 +
 +NodeName=ohpc0-slurm NodeAddr=192.168.1.249 CPUs=2 \
 +        RealMemory=8 Sockets=2 CoresPerSocket=4 ThreadsPerCore=2 State=UNKNOWN
 +NodeName=n29 NodeAddr=192.168.102.38 CPUs=2 \
 +        RealMemory=8 Sockets=2 CoresPerSocket=4 ThreadsPerCore=2 State=UNKNOWN
 +NodeName=n31 NodeAddr=192.168.102.40 CPUs=2 
 +        RealMemory=8 Sockets=2 CoresPerSocket=4 ThreadsPerCore=2 State=UNKNOWN
 +PartitionName=test Nodes=n29,n31 Default=YES MaxTime=INFINITE State=UP
 +
 +
 +
 +
 +
 echo "/home *(rw,no_subtree_check,fsid=10,no_root_squash)" >> /etc/exports echo "/home *(rw,no_subtree_check,fsid=10,no_root_squash)" >> /etc/exports
 echo "/opt/ohpc/pub *(ro,no_subtree_check,fsid=11)" >> /etc/exports echo "/opt/ohpc/pub *(ro,no_subtree_check,fsid=11)" >> /etc/exports
Line 195: Line 210:
 <code> <code>
  
-# Both are loaded in database+# Both are loaded in database; back up if production
 echo "drivers += updates/kernel" >> /etc/warewulf/bootstrap.conf echo "drivers += updates/kernel" >> /etc/warewulf/bootstrap.conf
 wwbootstrap `uname -r` wwbootstrap `uname -r`
Line 205: Line 220:
 wwsh -y file import /tmp/network.12501 --name network wwsh -y file import /tmp/network.12501 --name network
 wwsh -y file set network --path /etc/sysconfig/network --mode=0644 --uid=0  wwsh -y file set network --path /etc/sysconfig/network --mode=0644 --uid=0 
-wwsh -y node new ohpc0 --ipaddr=192.168.1.248 --hwaddr=00:15:C5:EF:08:61 -D enp8s0+wwsh -y node new ohpc0 --ipaddr=192.168.1.248 --hwaddr=00:15:C5:EF:08:5F -D enp4s0
  
 wwsh -y file import /etc/passwd wwsh -y file import /etc/passwd
Line 214: Line 229:
 wwsh -y provision set ohpc0 --vnfs=centos7.2 --bootstrap=`uname -r` \ wwsh -y provision set ohpc0 --vnfs=centos7.2 --bootstrap=`uname -r` \
      --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network      --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
 +
 +wwsh pxe update
 +wwsh dhcp update
 systemctl restart dhcpd systemctl restart dhcpd
 +systemctl restart httpd
 +
 +</code>
 +
 +  * Note: the next part is optional but I recommend it. Warewulf by defaults deploys ''stateless'' (in memory) but I'd rather deploy ''statefull'' in which VNFS is written to disk. The advantage is that if the node crashed, it will reboot without the help of the **master**, and Linux typically survives crashes. Actually I like the ''golden image'' even better, customize a node, then create the image. More details at [[cluster:144|Warewulf Golden Image]]
 +
 +<code>
 +
 +yum -y --installroot=/data/ohpc/images/centos7.2 install grub2
 +wwvnfs -y --chroot /data/ohpc/images/centos7.2
 +
 +wwsh -y object modify -s bootloader=sda -t node ohpc0
 +wwsh -y object modify -s diskpartition=sda -t node ohpc0
 +wwsh -y object modify -s diskformat=sda1,sda2,sda3 -t node ohpc0
 +wwsh -y object modify -s filesystems=\    "mountpoint=/boot:dev=sda1:type=ext3:size=500,\
 +                         dev=sda2:type=swap:size=32768,\
 +                         mountpoint=/:dev=sda3:type=ext3:size=fill" -t node ohpc0
 +wwsh -y object modify -s bootlocal=UNDEF -t node ohpc0 
 +
 wwsh pxe update wwsh pxe update
 +wwsh dhcp update
 +systemctl restart dhcpd
 +systemctl restart httpd
 +
 +
 +[root@ohpc0-test ~]# wwsh -y object print ohpc0 -p :all
 +#### node ohpc0 ###############################################################
 +       4: NAME       = ohpc0
 +       4: BOOTLOADER = sda
 +       4: BOOTLOCAL = UNDEF  
 +       4: BOOTSTRAPID = 1
 +       4: DISKFORMAT = sda1,sda2,sda3
 +       4: DISKPARTITION = sda
 +       4: FILEIDS    = 10,3,5,6,7,8,9
 +       4: FILESYSTEMS = dev=sda2:type=swap:size=32768,
 +                        mountpoint=/:dev=sda3:type=ext3:size=fill,
 +                        mountpoint=/boot:dev=sda1:type=ext3:size=500
 +       4: NETDEVS    = ObjectSet
 +            NETDEVS.enp4s0.NAME       = enp8s0
 +            NETDEVS.enp4s0.HWADDR     = 00:15:c5:ef:08:5F
 +            NETDEVS.enp4s0.IPADDR     = 192.168.1.248
 +       4: NODENAME   = ohpc0
 +       4: VNFSID     = 2
 +       
 +# Strange that netmask is not listed, but when I recreated the object I used
 +wwsh -y node new ohpc0 --ipaddr=192.168.1.248 --hwaddr=00:15:c5:ef:0c:bf \
 +     --netdev=enp4s0 --netmask=255.255.0.0  --network=255.255.0.0
 +
 +[root@ohpc0-test ~]# wwsh -y bootstrap list
 +BOOTSTRAP NAME            SIZE (M)
 +3.10.0-327.el7.x86_64     26.4
 +
 +[root@ohpc0-test ~]# wwsh -y vnfs list
 +VNFS NAME            SIZE (M) CHROOT LOCATION
 +centos7.2            278.2    /data/ohpc/images/centos7.2
  
 </code> </code>
 +
 +Final notes. I now have a 3 node OpenHPC cluster up using CentOS 7.3.1611 ... because of the edit mention above of the provision template URL the CHROOT is at the latest version of CentOS. Thus I updated my SMS master too so that the construct bootstrap=`uname -r` builds an image compatible between SMS and CHROOT.
 +
 +On towards testing the tools.
 +
 +Made a little script to recreate nodes as we'll do this often. And I also some weirdo situation where eth0/1 change NIC location during PXE boot, so I work around it.
 +
 +  * ''deploy.sh''
 +
 +<code>
 +
 +#!/bin/bash
 +# enable both NIC to boot from 501/500
 +# provision black on bottom, red on top (handler swithces to this)
 +# set bootlocal to EXIT reboot, handler exits
 +# switch black to top reboot (no media, fails to hdd)
 +# insane
 +
 +node=n31
 +ipaddr0=192.168.102.40
 +hwaddr0=1c:c1:de:19:40:6f
 +wwsh object delete $node -y 
 +wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0  --network=255.255.0.0 -y
 +wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
 +wwsh object modify -s bootloader=sda $node -y
 +wwsh object modify -s diskpartition=sda $node -y
 +wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
 +wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
 +wwsh provision set --bootlocal=UNDEF $node -y
 +
 +node=n31e
 +ipaddr0=192.168.102.40
 +hwaddr0=1c:c1:de:19:40:6e
 +wwsh object delete $node -y 
 +wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0  --network=255.255.0.0 -y
 +wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
 +wwsh object modify -s bootloader=sda $node -y
 +wwsh object modify -s diskpartition=sda $node -y
 +wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
 +wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
 +wwsh provision set --bootlocal=UNDEF $node -y
 +
 +node=n29
 +ipaddr0=192.168.102.38
 +hwaddr0=1c:c1:de:1c:88:c3
 +wwsh object delete $node -y 
 +wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0  --network=255.255.0.0 -y
 +wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
 +wwsh object modify -s bootloader=sda $node -y
 +wwsh object modify -s diskpartition=sda $node -y
 +wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
 +wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
 +wwsh provision set --bootlocal=UNDEF $node -y
 +
 +node=n29e
 +ipaddr0=192.168.102.38
 +hwaddr0=1c:c1:de:1c:88:c2
 +wwsh object delete $node -y 
 +wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0  --network=255.255.0.0 -y
 +wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
 +wwsh object modify -s bootloader=sda $node -y
 +wwsh object modify -s diskpartition=sda $node -y
 +wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
 +wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
 +wwsh provision set --bootlocal=UNDEF $node -y
 +
 +wwsh pxe update
 +wwsh dhcp update
 +systemctl restart dhcpd 
 +systemctl restart httpd 
 +echo "after first boot: wwsh provision set --bootlocal=EXIT $node"
 +
 +</code>
 +
 +page 1 - [[cluster:155|OpenHPC page 2]] - [[cluster:156|OpenHPC page 3]] - [[cluster:160|OpenHPC page 4]]
 \\ \\
 **[[cluster:0|Back]]** **[[cluster:0|Back]]**
cluster/154.1489004511.txt.gz · Last modified: 2017/03/08 15:21 by hmeij07