Differences

This shows you the differences between two versions of the page.

--- cluster:172 [2018/09/20 12:32]
hmeij07 [Lammps]
+++ cluster:172 [2018/09/26 14:34]
hmeij07
@@ Line 34: / Line 34: @@
 yum install kernel-devel kernel-headers (remove old headers after reboot)
 yum install gcc gcc-gfortran gcc-c++  # CHROOT done
+# /etc/modprobe.d/blacklist-nouveau.conf (new file by nvidia)
+# reboot before driver installation # CHROOT done
+blacklist nouveau
+options nouveau modeset=0
+# new kernel initramfs, load
+dracut --force
+reboot
 # download runfiles from https://developer.nvidia.com/cuda-downloads
 # files in /usr/local/src
-sh cuda_name_of_runfile
+sh cuda_9.2.148_396.37_linux.run
-sh cuda_name_of_runfile_patch
 Install NVIDIA Accelerated Graphics Driver for Linux-x86_64 396.26?
@@ Line 54: / Line 65: @@
 Install the CUDA 9.2 Samples?
 (y)es/(n)o/(q)uit: n
-# /etc/modprobe.d/blacklist-nouveau.conf
-# reboot before driver installation # CHROOT done
-blacklist nouveau
-options nouveau modeset=0
-reboot
 # nvidia driver
 ./cuda_name_of_runfile -silent -driver
+# Device files/dev/nvidia* exist with 0666 permissions?
+# They were not
+/usr/local/src/nvidia-modprobe.sh
 # backup
 [root@n37 src]# rpm -qf /usr/lib/libGL.so
 file /usr/lib/libGL.so is not owned by any package
-cp /usr/lib/libGL.so /usr/lib/libGL.so-nvidia
+cp /usr/lib/libGL.so   /usr/lib/libGL.so-nvidia
+cp /usr/lib/libGl.so.1 /usr/lib/libGL.so.1-nvidia
 [root@n37 src]# ls /etc/X11/xorg.conf
@@ Line 74: / Line 84: @@
 [root@n37 src]#
 [root@n37 src]# scp n78:/etc/X11/xorg.conf /etc/X11/  # CHROOT done
-# Device files/dev/nvidia* exist with 0666 permissions?
-# They were not
-/usr/local/src/nvidia-modprobe.sh
-# new kernel initramfs, load
-dracut --force
 # for mapd graphics support needs to be enabled
@@ Line 319: / Line 322: @@
   * consulting the ARCH web page I choose -arch=sm_35 (on n37 for K20)
-Good thing we're doing this now, future versions of CUDA will not support the K20s anymore. In fact on that web site they are not mentioned, only the K40/K80 gpus. So we'll see what testing reveals.  Please double check results against previous runs. Compile as regular user and stage lmp_mpi in /usr/local/lammps-22Aug10/
+Good thing we're doing this now, future versions of CUDA will not support the K20s anymore. In fact on that web site they are not mentioned, only the K40/K80 gpus. So we'll see what testing reveals.  Please double check results against previous runs. Compile as regular user and stage lmp_mpi in /usr/local/lammps-22Aug18/
 <code>
@@ Line 325: / Line 328: @@
 [hmeij@n37 src]$ ll /usr/local/lammps-22Aug18/
 total 104356
--rwxr-xr-x 1 hmeij its 35739800 Aug 23 08:49 lmp_mpi-double-double-with-cuda
+-rwxr-xr-x 1 hmeij its 35739800 Aug 23 08:49 lmp_mpi-double-double-with-gpu
--rwxr-xr-x 1 hmeij its 35555672 Aug 23 09:11 lmp_mpi-single-double-with-cuda
+-rwxr-xr-x 1 hmeij its 35555672 Aug 23 09:11 lmp_mpi-single-double-with-gpu
--rwxr-xr-x 1 hmeij its 35559552 Aug 23 09:53 lmp_mpi-single-single-with-cuda
+-rwxr-xr-x 1 hmeij its 35559552 Aug 23 09:53 lmp_mpi-single-single-with-gpu
 </code>
@@ Line 367: / Line 370: @@
   * Make the final tar file for /usr/local and post with CHROOT # done
   * Install all the packages of this page in CHROOT # marked done
-  * Switch eth1 back to 10.10 and do NFS mounts
+To do another node, the steps are
+  * add node in deploy.txt of n36.chroot/  (centos 7.2)
+  * ./deploy.txt `grep node_name deploy.txt`
+  * scp in place passwd, shadow, group, hosts, fstab from global archive
+  * umount -a
+  * ONBOOT=no, ib0 ??? connectX mlx4_0 IB interface breaks in CentOS 7.3+
+  * bootlocal=EXIT then reboot then check polkit user … screws up systemd-logind
+  * hostnamectl set-hostname node_name (logout/login)
+  * eth1 on 129.133
+  * yum update
+  * yum install kernel-headers kernel-devel
+  * put n37 tarball in /, unpack, remove cuda-9.2
+  * reboot
+  * Nvidia install: files in /usr/local/src
+    * sh runfile
+    * reboot (nouveau)
+    * ./runfile -silent -driver
+    * reboot
 \\
 **[[cluster:0|Back]]**

DokuWiki

User Tools

Site Tools

Differences

Page Tools