\\
**[[cluster:0|Back]]**
==== OpenHPC page 1====
* install vanilla CentOS 7.2 on //master//
* find Install_guide-CentOS7.2-SLURM-1.2.1-x86_64.pdf recipe guide on http://openhpc.community
* turn selinux off
* next switch to iptables
[root@ohpc0-test ~]# systemctl disable NetworkManager
[root@ohpc0-test ~]# systemctl disable firewalld
[root@ohpc0-test ~]# yum install iptables-services -y
[root@ohpc0-test ~]# systemctl enable iptables
[root@ohpc0-test ~]# vi /etc/sysconfig/iptables
# lock up port 22: note "eth1"
-A INPUT -p tcp -m state --state NEW -m tcp -s 129.133.0.0/16 --dport 22 -j ACCEPT
# local allow: note "eth0"
-A INPUT -d 192.168.0.0/16 -p tcp --dport 0:65535 -j ACCEPT
-A INPUT -d 192.168.0.0/16 -p udp --dport 0:65535 -j ACCEPT
[root@ohpc0-test ~]# reboot
# check firwewall
[root@ohpc0-test ~]# iptables -L
Chain INPUT (policy ACCEPT)
...
ACCEPT tcp -- 129.133.0.0/16 anywhere state NEW tcp dpt:ssh
ACCEPT tcp -- anywhere 192.168.0.0/16 tcp
ACCEPT udp -- anywhere 192.168.0.0/16 udp
REJECT all -- anywhere anywhere reject-with icmp-host-prohibited
# copy global hpc /etc/hosts in place
# check hostname is on provisioning network
[root@ohpc0-test ~]# ping `hostname`
PING ohpc0-test (192.168.1.249) 56(84) bytes of data.
64 bytes from ohpc0-test (192.168.1.249): icmp_seq=1 ttl=64 time=0.043 ms
* next add OpenHPC component, install the RPM package which also enables repo EPEL
[root@ohpc0-test ~]# yum install http://build.openhpc.community/OpenHPC:/1.2/CentOS_7.2/x86_64/ohpc-release-1.2-1.x86_64.rpm
Installed:
ohpc-release.x86_64 0:1.2-1
Dependency Installed:
epel-release.noarch 0:7-9
[root@ohpc0-test ~]# yum repolist
repo id repo name
OpenHPC OpenHPC-1.2 - Base
OpenHPC-updates OpenHPC-1.2 - Updates
base/7/x86_64 CentOS-7 - Base
*epel/x86_64 Extra Packages for Enterprise Linux 7 - x86_64
extras/7/x86_64 CentOS-7 - Extras
updates/7/x86_64 CentOS-7 - Updates
* Next provisioning, pull down a suite of packages
yum -y groupinstall ohpc-base
yum -y groupinstall ohpc-warewulf
# for openlava if we decide not slurm
yum install tcl-devel
systemctl enable ntpd.service
systemctl start ntpd
systemctl status ntpd
yum -y groupinstall ohpc-slurm-server
* Configure ''ib0'' and ''IPoIB'' if needed, consult [[cluster:145|Infiniband]], also consult the PDF file.
==== Configure ====
* Warewulf (do yourself a favor and check changes in file to avoid typos)
perl -pi -e "s/device = eth1/device = enp4s0/" /etc/warewulf/provision.conf
perl -pi -e "s/^\s+disable\s+= yes/ disable = no /" /etc/xinetd.d/tftp
perl -pi -e "s/cgi-bin>\$/cgi-bin>\n Require all granted/" /etc/httpd/conf.d/warewulf-httpd.conf
perl -pi -e "s/Allow from all/Require all granted/" /etc/httpd/conf.d/warewulf-httpd.conf
# -ni not -pi
perl -ni -e "print unless /^\s+Order allow,deny/" /etc/httpd/conf.d/warewulf-httpd.conf
# the recipe does not set a mysql root password but we will
[root@ohpc0-test]# vi /etc/warewulf/database-root.conf
mysql> set password for 'root'@'localhost' = PASSWORD('some_string');
Query OK, 0 rows affected (0.00 sec)
[root@ohcp0-test]# chmod o-r /etc/warewulf/database-root.conf
# restart/enable services
systemctl restart xinetd
systemctl enable mariadb.service
systemctl restart mariadb
systemctl enable httpd.service
systemctl restart httpd
* Now let get ready to provision a node. First we need to build a CHROOT environment.
# defined repo
less /usr/libexec/warewulf/wwmkchroot/centos-7.tmpl
# admin area
ls -R /opt/ohpc/admin/
# use another disk for images
mkdir /data
mkfs.xfs -f /dev/sdb1
mount /data
mkdir -p /data/ohpc/images/centos7.2
# this yields an error as 7.2.1511 is already deprecated
wwmkchroot centos-7 /data/ohpc/images/centos7.2
"This directory (and version of CentOS) is deprecated. For normal users,
you should use /7/ and not /7.2.1511/ in your path."
# so we follow their advise and edit the repo destination s/7.2.1511/7/
vi /usr/libexec/warewulf/wwmkchroot/centos-7.tmpl
# try again
wwmkchroot centos-7 /data/ohpc/images/centos7.2
# ls /data/ohpc/images/centos7.2/
bin boot dev etc fastboot home lib lib64 media mnt opt
proc root run sbin srv sys tmp usr var
# du -hs /data/ohpc/images/centos7.2/
490M /data/ohpc/images/centos7.2/
* Next customize the CHROOT environment
cp -p /etc/resolv.conf /data/ohpc/images/centos7.2/etc/
yum -y --installroot=/data/ohpc/images/centos7.2 groupinstall ohpc-slurm-client
yum -y --installroot=/data/ohpc/images/centos7.2 install kernel
yum -y --installroot=/data/ohpc/images/centos7.2 install ntp
yum -y --installroot=/data/ohpc/images/centos7.2 install lmod-ohpc
# pass on infiniband
# if it does not exist on **master** issue command ''wwinit ssh''
cat ~/.ssh/cluster.pub
cat ~/.ssh/cluster.pub >> /data/ohpc/images/centos7.2/root/.ssh/authorized_keys
echo "192.168.1.249:/home /home nfs nfsvers=3,rsize=1024,wsize=1024,cto 0 0 " \
>> /data/ohpc/images/centos7.2/etc/fstab
echo "192.168.1.249:/opt/ohpc/pub /opt/ohpc/pub nfs nfsvers=3 0 0 " \
>> /data/ohpc/images/centos7.2/etc/fstab
chroot /data/ohpc/images/centos7.2 systemctl enable ntpd
echo "server 192.168.1.249" >> /data/ohpc/images/centos7.2/etc/ntp.conf
# finally on **master** issue
perl -pi -e "s/ControlMachine=\S+/ControlMachine=ohpc0-test/" /etc/slurm/slurm.conf
# this turned out to be wrong, first I change the hostname to ''ohpc0-slurm'' ControllerMachine=
# added line in /etc/hosts pointing this to 192.168.1.249, then I defined ''/etc/slurm/slurm.con''
NodeName=ohpc0-slurm NodeAddr=192.168.1.249 CPUs=2 \
RealMemory=8 Sockets=2 CoresPerSocket=4 ThreadsPerCore=2 State=UNKNOWN
NodeName=n29 NodeAddr=192.168.102.38 CPUs=2 \
RealMemory=8 Sockets=2 CoresPerSocket=4 ThreadsPerCore=2 State=UNKNOWN
NodeName=n31 NodeAddr=192.168.102.40 CPUs=2
RealMemory=8 Sockets=2 CoresPerSocket=4 ThreadsPerCore=2 State=UNKNOWN
PartitionName=test Nodes=n29,n31 Default=YES MaxTime=INFINITE State=UP
echo "/home *(rw,no_subtree_check,fsid=10,no_root_squash)" >> /etc/exports
echo "/opt/ohpc/pub *(ro,no_subtree_check,fsid=11)" >> /etc/exports
exportfs -ra
systemctl restart nfs
systemctl enable nfs-server
==== Deploy ====
* Next we PXE boot the compute node for imaging (after building bootstrp image and VNFS)
# Both are loaded in database; back up if production
echo "drivers += updates/kernel" >> /etc/warewulf/bootstrap.conf
wwbootstrap `uname -r`
# Bootstrap image '3.10.0-327.el7.x86_64' is ready
wwvnfs -y --chroot /data/ohpc/images/centos7.2
# VNFS 'centos7.2' has been imported
# Wrote a new configuration file at: /etc/warewulf/vnfs/centos7.2.conf
wwsh -y file import /tmp/network.12501 --name network
wwsh -y file set network --path /etc/sysconfig/network --mode=0644 --uid=0
wwsh -y node new ohpc0 --ipaddr=192.168.1.248 --hwaddr=00:15:C5:EF:08:5F -D enp4s0
wwsh -y file import /etc/passwd
wwsh -y file import /etc/group
wwsh -y file import /etc/shadow
wwsh -y file import /etc/slurm/slurm.conf
wwsh -y file import /etc/munge/munge.key
wwsh -y provision set ohpc0 --vnfs=centos7.2 --bootstrap=`uname -r` \
--files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
wwsh pxe update
wwsh dhcp update
systemctl restart dhcpd
systemctl restart httpd
* Note: the next part is optional but I recommend it. Warewulf by defaults deploys ''stateless'' (in memory) but I'd rather deploy ''statefull'' in which VNFS is written to disk. The advantage is that if the node crashed, it will reboot without the help of the **master**, and Linux typically survives crashes. Actually I like the ''golden image'' even better, customize a node, then create the image. More details at [[cluster:144|Warewulf Golden Image]]
yum -y --installroot=/data/ohpc/images/centos7.2 install grub2
wwvnfs -y --chroot /data/ohpc/images/centos7.2
wwsh -y object modify -s bootloader=sda -t node ohpc0
wwsh -y object modify -s diskpartition=sda -t node ohpc0
wwsh -y object modify -s diskformat=sda1,sda2,sda3 -t node ohpc0
wwsh -y object modify -s filesystems=\ "mountpoint=/boot:dev=sda1:type=ext3:size=500,\
dev=sda2:type=swap:size=32768,\
mountpoint=/:dev=sda3:type=ext3:size=fill" -t node ohpc0
wwsh -y object modify -s bootlocal=UNDEF -t node ohpc0
wwsh pxe update
wwsh dhcp update
systemctl restart dhcpd
systemctl restart httpd
[root@ohpc0-test ~]# wwsh -y object print ohpc0 -p :all
#### node ohpc0 ###############################################################
4: NAME = ohpc0
4: BOOTLOADER = sda
4: BOOTLOCAL = UNDEF
4: BOOTSTRAPID = 1
4: DISKFORMAT = sda1,sda2,sda3
4: DISKPARTITION = sda
4: FILEIDS = 10,3,5,6,7,8,9
4: FILESYSTEMS = dev=sda2:type=swap:size=32768,
mountpoint=/:dev=sda3:type=ext3:size=fill,
mountpoint=/boot:dev=sda1:type=ext3:size=500
4: NETDEVS = ObjectSet
NETDEVS.enp4s0.NAME = enp8s0
NETDEVS.enp4s0.HWADDR = 00:15:c5:ef:08:5F
NETDEVS.enp4s0.IPADDR = 192.168.1.248
4: NODENAME = ohpc0
4: VNFSID = 2
# Strange that netmask is not listed, but when I recreated the object I used
wwsh -y node new ohpc0 --ipaddr=192.168.1.248 --hwaddr=00:15:c5:ef:0c:bf \
--netdev=enp4s0 --netmask=255.255.0.0 --network=255.255.0.0
[root@ohpc0-test ~]# wwsh -y bootstrap list
BOOTSTRAP NAME SIZE (M)
3.10.0-327.el7.x86_64 26.4
[root@ohpc0-test ~]# wwsh -y vnfs list
VNFS NAME SIZE (M) CHROOT LOCATION
centos7.2 278.2 /data/ohpc/images/centos7.2
Final notes. I now have a 3 node OpenHPC cluster up using CentOS 7.3.1611 ... because of the edit mention above of the provision template URL the CHROOT is at the latest version of CentOS. Thus I updated my SMS master too so that the construct bootstrap=`uname -r` builds an image compatible between SMS and CHROOT.
On towards testing the tools.
Made a little script to recreate nodes as we'll do this often. And I also some weirdo situation where eth0/1 change NIC location during PXE boot, so I work around it.
* ''deploy.sh''
#!/bin/bash
# enable both NIC to boot from 501/500
# provision black on bottom, red on top (handler swithces to this)
# set bootlocal to EXIT reboot, handler exits
# switch black to top reboot (no media, fails to hdd)
# insane
node=n31
ipaddr0=192.168.102.40
hwaddr0=1c:c1:de:19:40:6f
wwsh object delete $node -y
wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0 --network=255.255.0.0 -y
wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
wwsh object modify -s bootloader=sda $node -y
wwsh object modify -s diskpartition=sda $node -y
wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
wwsh provision set --bootlocal=UNDEF $node -y
node=n31e
ipaddr0=192.168.102.40
hwaddr0=1c:c1:de:19:40:6e
wwsh object delete $node -y
wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0 --network=255.255.0.0 -y
wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
wwsh object modify -s bootloader=sda $node -y
wwsh object modify -s diskpartition=sda $node -y
wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
wwsh provision set --bootlocal=UNDEF $node -y
node=n29
ipaddr0=192.168.102.38
hwaddr0=1c:c1:de:1c:88:c3
wwsh object delete $node -y
wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0 --network=255.255.0.0 -y
wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
wwsh object modify -s bootloader=sda $node -y
wwsh object modify -s diskpartition=sda $node -y
wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
wwsh provision set --bootlocal=UNDEF $node -y
node=n29e
ipaddr0=192.168.102.38
hwaddr0=1c:c1:de:1c:88:c2
wwsh object delete $node -y
wwsh node new $node --netdev=eth0 --hwaddr=$hwaddr0 --ipaddr=$ipaddr0 --netmask=255.255.0.0 --network=255.255.0.0 -y
wwsh -y provision set $node --vnf=centos7.2 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,slurm.conf,munge.key,network
wwsh object modify -s bootloader=sda $node -y
wwsh object modify -s diskpartition=sda $node -y
wwsh object modify -s diskformat=sda1,sda2,sda3 $node -y
wwsh object modify -s filesystems="mountpoint=/boot:dev=sda1:type=ext3:size=1024,dev=sda2:type=swap:size=6144,mountpoint=/:dev=sda3:type=ext3:size=+" $node -y
wwsh provision set --bootlocal=UNDEF $node -y
wwsh pxe update
wwsh dhcp update
systemctl restart dhcpd
systemctl restart httpd
echo "after first boot: wwsh provision set --bootlocal=EXIT $node"
page 1 - [[cluster:155|OpenHPC page 2]] - [[cluster:156|OpenHPC page 3]] - [[cluster:160|OpenHPC page 4]]
\\
**[[cluster:0|Back]]**