This is an old revision of the document!
Additional tools for the OpenHPC environment. First add these two lines to SMS and all compute nodes. Patch CHROOT as well.
# added for RLIMIT_MEMLOCK warnings with libibverbs -hmeij * soft memlock unlimited * hard memlock unlimited
ssh-keygen -t rsa
cat ~/.ssh/idrsa.pub » ~/.ssh/authorized_keys
known_hosts
file in ~/.ssh/yum -y groupinstall ohpc-nagios yum -y --installroot=/data/ohpc/images/centos7.2 install nagios-plugins-all-ohpc nrpe-ohpc chroot /data/ohpc/images/centos7.2 systemctl enable nrpe perl -pi -e "s/^allowed_hosts=/# allowed_hosts=/" /data/ohpc/images/centos7.2/etc/nagios/nrpe.cfg echo "nrpe 5666/tcp # NRPE" >> /data/ohpc/images/centos7.2/etc/services echo "nrpe : 192.168.1.249 : ALLOW" >> /data/ohpc/images/centos7.2/etc/hosts.allow echo "nrpe : ALL : DENY" >> /data/ohpc/images/centos7.2/etc/hosts.allow chroot /data/ohpc/images/centos7.2 /usr/sbin/useradd -c "NRPE user for the NRPE service" \ -d /var/run/nrpe -r -g nrpe -s /sbin/nologin nrpe mv /etc/nagios/conf.d/services.cfg.example /etc/nagios/conf.d/services.cfg mv /etc/nagios/conf.d/hosts.cfg.example /etc/nagios/conf.d/hosts.cfg perl -pi -e "s/HOSTNAME1/n29/ || s/HOST1_IP/192.168.102.38/" /etc/nagios/conf.d/hosts.cfg perl -pi -e "s/HOSTNAME2/n31/ || s/HOST2_IP/192.168.102.40/" /etc/nagios/conf.d/hosts.cfg perl -pi -e "s/ \/bin\/mail/\/usr\/bin\/mailx/g" /etc/nagios/objects/commands.cfg perl -pi -e "s/nagios\@localhost/root\@ohpc0-test/" /etc/nagios/objects/contacts.cfg chkconfig nagios on systemctl start nagios chmod u+s `which ping` echo "relayhost = 192.168.102.42" >> /etc/postfix/main.cf echo "root: hmeij@wes..." >> /etc/aliases newaliases systemctl restart postfix # recreate vnfs and reimage nodes, see page1 wwvnfs -y --chroot /data/ohpc/images/centos7.2 /root/deploy.sh
htpasswd -c /etc/nagios/passwd nagiosadmin
yum -y groupinstall ohpc-ganglia yum -y --installroot=/data/ohpc/images/centos7.2 install ganglia-gmond-ohpc # import passwd, shadow and group files for new user account ganglia mv /etc/ganglia/gmond.conf /etc/ganglia/gmond.conf-orig cp /opt/ohpc/pub/examples/ganglia/gmond.conf /etc/ganglia/ # use provision IP perl -pi -e "s/<sms>/192.168.1.249/" /etc/ganglia/gmond.conf cp /etc/ganglia/gmond.conf /data/ohpc/images/centos7.2/etc/ganglia/ echo "gridname MySite" >> /etc/ganglia/gmetad.conf systemctl enable gmond systemctl enable gmetad systemctl start gmond systemctl start gmetad systemctl restart httpd chroot /data/ohpc/images/centos7.2 systemctl enable gmond # recreate vnfs and reimage nodes, see page1 wwvnfs -y --chroot /data/ohpc/images/centos7.2 /root/deploy.sh
echo export WCOLL=/etc/hosts.pdsh » /root/.bashrc
[root@ohpc0-test ~]# pdsh uptime n31: 10:44:25 up 19:14, 1 user, load average: 0.00, 0.01, 0.05 n29: 10:44:25 up 19:19, 0 users, load average: 0.00, 0.01, 0.05
mrsh
installationGenders
installationConMan
installation, ipmi serial consolesrsysslog
forwarding of compute node logs to SMSControlMachine
in /etc/slurm.slurm.conf Ran into a slurm config problem here.