User Tools

Site Tools


cluster:172

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
Next revision Both sides next revision
cluster:172 [2018/09/25 15:20]
hmeij07 [Finish]
cluster:172 [2018/10/11 14:04]
hmeij07 [Finish]
Line 34: Line 34:
 yum install kernel-devel kernel-headers (remove old headers after reboot) yum install kernel-devel kernel-headers (remove old headers after reboot)
 yum install gcc gcc-gfortran gcc-c++  # CHROOT done yum install gcc gcc-gfortran gcc-c++  # CHROOT done
 +yum install tcl tcl-devel # CHROOT done
 +
 +# /etc/modprobe.d/blacklist-nouveau.conf (new file by nvidia)
 +# reboot before driver installation # CHROOT done
 +blacklist nouveau
 +options nouveau modeset=0
 +
 +# new kernel initramfs, load
 +dracut --force
 +
 +reboot
 +
  
 # download runfiles from https://developer.nvidia.com/cuda-downloads # download runfiles from https://developer.nvidia.com/cuda-downloads
Line 54: Line 66:
 Install the CUDA 9.2 Samples? Install the CUDA 9.2 Samples?
 (y)es/(n)o/(q)uit: n (y)es/(n)o/(q)uit: n
- 
-# /etc/modprobe.d/blacklist-nouveau.conf (new file by nvidia) 
-# reboot before driver installation # CHROOT done 
-blacklist nouveau 
-options nouveau modeset=0 
-reboot 
  
 # nvidia driver # nvidia driver
-./cuda_name_of_runfile \-\-silent \-\-accept-eula driver+./cuda_name_of_runfile -silent -driver 
 + 
 +# Device files/dev/nvidia* exist with 0666 permissions? 
 +# They were not  
 +/usr/local/src/nvidia-modprobe.sh
  
 # backup # backup
 [root@n37 src]# rpm -qf /usr/lib/libGL.so [root@n37 src]# rpm -qf /usr/lib/libGL.so
 file /usr/lib/libGL.so is not owned by any package file /usr/lib/libGL.so is not owned by any package
-cp /usr/lib/libGL.so /usr/lib/libGL.so-nvidia+cp /usr/lib/libGL.so.1.7.0   /usr/lib/libGL.so.1.7.0-nvidia 
 +cp /usr/lib64/libGl.so.1.7.0 /usr/lib64/libGL.so.1.7.0-nvidia
  
 [root@n37 src]# ls /etc/X11/xorg.conf [root@n37 src]# ls /etc/X11/xorg.conf
Line 74: Line 85:
 [root@n37 src]# [root@n37 src]#
 [root@n37 src]# scp n78:/etc/X11/xorg.conf /etc/X11/  # CHROOT done [root@n37 src]# scp n78:/etc/X11/xorg.conf /etc/X11/  # CHROOT done
- 
-# Device files/dev/nvidia* exist with 0666 permissions? 
-# They were not  
-/usr/local/src/nvidia-modprobe.sh 
- 
-# new kernel initramfs, load 
-dracut --force 
  
 # for mapd graphics support needs to be enabled # for mapd graphics support needs to be enabled
Line 164: Line 168:
  
   * yum install freeglut-devel libX11-devel libXi-devel libXmu-devel \ make mesa-libGLU-devel # CHROOT done   * yum install freeglut-devel libX11-devel libXi-devel libXmu-devel \ make mesa-libGLU-devel # CHROOT done
 +  * yum install blas blas-devel lapack lapack-devel #CHROOT done
   * check for /usr/lib64/libvdpau_nvidia.so   * check for /usr/lib64/libvdpau_nvidia.so
 +
   * [root@n37 /]# tar -cvf /tmp/n37.chroot.ul.tar usr/local   * [root@n37 /]# tar -cvf /tmp/n37.chroot.ul.tar usr/local
   * [root@n37 /]# scp /tmp/n37.chroot.ul.tar sms_server:/var/chroots/goldimages/   * [root@n37 /]# scp /tmp/n37.chroot.ul.tar sms_server:/var/chroots/goldimages/
Line 174: Line 180:
 <code> <code>
 # As root check requirements # CHROOT done # As root check requirements # CHROOT done
-rpm -qa | grep ^gcc 
-rpm -qa | grep ^g++ 
 rpm -qa | grep ^flex rpm -qa | grep ^flex
 rpm -qa | grep ^tcsh rpm -qa | grep ^tcsh
Line 194: Line 198:
  
 # As root install missing # CHROOT done # As root install missing # CHROOT done
-yum install flex bzip2-devel libXdmcp zlib zlib-devel +# CHROOT done
-yum install tkinter openmpi perl-ExtUtils-MakeMaker patch bison+
  
 </code> </code>
Line 350: Line 353:
   javapackages-tools libxslt \   javapackages-tools libxslt \
   lksctp-tools python-javapackages \   lksctp-tools python-javapackages \
-  python-lxml tzdata-java  # CHROOT done+  python-lxml tzdata-java  nfs-utils psmisc lm_sensors 
 +  # CHROOT done
  
 yum install mapd   # n37:/usr/local/src yum install mapd   # n37:/usr/local/src
Line 369: Line 373:
  
  
-To do another node, the steps are NOT WORKING! +To do another node, the steps are
-Trying n36 with cuda rpm (local)+
  
-  * add node in deploy.txt of n37.chroot/+  * add node in deploy.txt of n36.chroot/  (centos 7.2)
   * ./deploy.txt `grep node_name deploy.txt`   * ./deploy.txt `grep node_name deploy.txt`
-  * scp in place passwd, shadow, group, hosts, fstab from global archive 
   * umount -a   * umount -a
   * ONBOOT=no, ib0 ??? connectX mlx4_0 IB interface breaks in CentOS 7.3+   * ONBOOT=no, ib0 ??? connectX mlx4_0 IB interface breaks in CentOS 7.3+
Line 381: Line 383:
   * hostnamectl set-hostname node_name (logout/login)   * hostnamectl set-hostname node_name (logout/login)
   * eth1 on 129.133   * eth1 on 129.133
-  * rpm -kernel-devel +  * yum update 
-  * rpm -i /usr/local/src/cuda-repo-rhel10-0-local-10.0.130-410.48-1.0-1.x86_64.rpm+  * yum install kernel-headers kernel-devel epel-release 
 +  * put n37 tarball in /, unpack 
 +  * remove cuda-9.
   * Nvidia install: files in /usr/local/src   * Nvidia install: files in /usr/local/src
-    * rpm -i cuda-repo-rhel7-10-0-local-10.0.130-410.48-1.0-1.x86_64.rpm+    * remove nouveau 
 +    * disable selinux 
 +    * reboot 
 +    * sh runfile 
 +    * ./runfile -silent -driver 
 +    * install all CHROOT done packages
     * yum clean all     * yum clean all
-    * yum install cuda+    * reboot 
 + 
 +  * custom fstab 
 +  * mount on 10.10 
 +  * authorized_keys 
 +  * scp in place from global archive...make backups 
 +  * passwd, shadow, group, hosts  
 +  * reboot for polkit, check /etc/ssh/ssh_host* perms/owners
  
 +  * /share/apps/src/openlava3 install in centOS7
 +  * systemctl enable
 +  * eth1 on 10.10, mounts ok?
 +  * /etc/default/grub add "nomodeset" and GRUB_RECORDFAIL_TIMEOUT (grub2-mkconfig -o /boot/grub2/grub.cfg)
 +    * did not help the count down
 +    * did fix the text console
 +  * rc.local, crontab
 +  * reboot
  
 +Finished rebuilding n33-n37 based on n37 example.
 + --- //[[hmeij@wesleyan.edu|Henk]] 2018/10/11 10:04//
  
 \\ \\
 **[[cluster:0|Back]]** **[[cluster:0|Back]]**
cluster/172.txt · Last modified: 2020/07/15 17:52 by hmeij07