User Tools

Site Tools


cluster:222


Back

mdadm recreate array

Something went wrong and a compute is complaining some array is corrupt. It was 4x 1T 7.2K rpm disks arrayed together with mdadm to provide /localscratch2tb for heavy IO Gaussian jobs.

This is the process …

# first wipe the disk and partitions for sd[a-d]

[root@n74 ~]# wipefs --all --force /dev/sda1;  wipefs --all --force /dev/sda
/dev/sda1: 4 bytes were erased at offset 0x00001000 (linux_raid_member): fc 4e 2b a9
/dev/sda: 8 bytes were erased at offset 0x00000200 (gpt): 45 46 49 20 50 41 52 54
/dev/sda: 8 bytes were erased at offset 0xe8e0db5e00 (gpt): 45 46 49 20 50 41 52 54
/dev/sda: 2 bytes were erased at offset 0x000001fe (PMBR): 55 aa
[root@n74 ~]# wipefs --all --force /dev/sdb1;  wipefs --all --force /dev/sdb
/dev/sdb1: 4 bytes were erased at offset 0x00001000 (linux_raid_member): fc 4e 2b a9
/dev/sdb: 8 bytes were erased at offset 0x00000200 (gpt): 45 46 49 20 50 41 52 54
/dev/sdb: 8 bytes were erased at offset 0xe8e0db5e00 (gpt): 45 46 49 20 50 41 52 54
/dev/sdb: 2 bytes were erased at offset 0x000001fe (PMBR): 55 aa
[root@n74 ~]# wipefs --all --force /dev/sdc1;  wipefs --all --force /dev/sdc
/dev/sdc1: 4 bytes were erased at offset 0x00001000 (linux_raid_member): fc 4e 2b a9
/dev/sdc: 8 bytes were erased at offset 0x00000200 (gpt): 45 46 49 20 50 41 52 54
/dev/sdc: 8 bytes were erased at offset 0xe8e0db5e00 (gpt): 45 46 49 20 50 41 52 54
/dev/sdc: 2 bytes were erased at offset 0x000001fe (PMBR): 55 aa
[root@n74 ~]# wipefs --all --force /dev/sdd1;  wipefs --all --force /dev/sdd
/dev/sdd1: 4 bytes were erased at offset 0x00001000 (linux_raid_member): fc 4e 2b a9
/dev/sdd: 8 bytes were erased at offset 0x00000200 (gpt): 45 46 49 20 50 41 52 54
/dev/sdd: 8 bytes were erased at offset 0xe8e0db5e00 (gpt): 45 46 49 20 50 41 52 54
/dev/sdd: 2 bytes were erased at offset 0x000001fe (PMBR): 55 aa

# make sure the partitions exist or create them
sgdisk -n 1:0:0 /dev/sda
sgdisk -n 1:0:0 /dev/sdb
sgdisk -n 1:0:0 /dev/sdc
sgdisk -n 1:0:0 /dev/sdd

# reboot

[root@n74 ~]# lsblk
NAME      MAJ:MIN RM   SIZE RO TYPE  MOUNTPOINT
sda         8:0    0 931.5G  0 disk
└─sda1      8:1    0 931.5G  0 part
  └─md127   9:127  0 931.4G  0 raid1
sdb         8:16   0 931.5G  0 disk
└─sdb1      8:17   0 931.5G  0 part
  └─md127   9:127  0 931.4G  0 raid1
sdc         8:32   0 931.5G  0 disk
└─sdc1      8:33   0 931.5G  0 part
  └─md127   9:127  0 931.4G  0 raid1
sdd         8:48   0 931.5G  0 disk
└─sdd1      8:49   0 931.5G  0 part
  └─md127   9:127  0 931.4G  0 raid1
sde         8:64   0  29.5G  0 disk
├─sde1      8:65   0   524M  0 part
├─sde2      8:66   0   525M  0 part
├─sde3      8:67   0   525M  0 part  [SWAP]
└─sde4      8:68   0    28G  0 part  /

# now we stop and remove md127 device

[root@n74 ~]#  mdadm --stop /dev/md127
mdadm: stopped /dev/md127
[root@n74 ~]# mdadm --remove /dev/md127
mdadm: error opening /dev/md127: No such file or directory <-- that's ok

# md127 gone

[root@n74 ~]# lsblk
NAME   MAJ:MIN RM   SIZE RO TYPE MOUNTPOINT
sda      8:0    0 931.5G  0 disk
└─sda1   8:1    0 931.5G  0 part
sdb      8:16   0 931.5G  0 disk
└─sdb1   8:17   0 931.5G  0 part
sdc      8:32   0 931.5G  0 disk
└─sdc1   8:33   0 931.5G  0 part
sdd      8:48   0 931.5G  0 disk
└─sdd1   8:49   0 931.5G  0 part
sde      8:64   0  29.5G  0 disk
├─sde1   8:65   0   524M  0 part
├─sde2   8:66   0   525M  0 part
├─sde3   8:67   0   525M  0 part [SWAP]
└─sde4   8:68   0    28G  0 part /

# create the array, raid 10, 4 disks

[root@n74 ~]#  mdadm --create --verbose /dev/md127 --level=10 --raid-devices=4 --force /dev/sd[a-d]1
mdadm: layout defaults to n2
mdadm: layout defaults to n2
mdadm: chunk size defaults to 512K
mdadm: size set to 976628736K
mdadm: automatically enabling write-intent bitmap on large array
mdadm: Defaulting to version 1.2 metadata
mdadm: array /dev/md127 started.

# ok, check

[root@n74 ~]# mdadm --detail /dev/md127
/dev/md127:
           Version : 1.2
     Creation Time : Wed Mar  1 13:21:58 2023
        Raid Level : raid10
        Array Size : 1953257472 (1862.77 GiB 2000.14 GB)
     Used Dev Size : 976628736 (931.39 GiB 1000.07 GB)
      Raid Devices : 4
     Total Devices : 4
       Persistence : Superblock is persistent

     Intent Bitmap : Internal

       Update Time : Wed Mar  1 13:22:42 2023
             State : clean, resyncing
    Active Devices : 4
   Working Devices : 4
    Failed Devices : 0
     Spare Devices : 0

            Layout : near=2
        Chunk Size : 512K

Consistency Policy : bitmap

     Resync Status : 0% complete

              Name : n74:127  (local to host n74)
              UUID : 2ebc7191:8b86aeae:e1b3b05e:b7443fe8
            Events : 8

    Number   Major   Minor   RaidDevice State
       0       8        1        0      active sync set-A   /dev/sda1
       1       8       17        1      active sync set-B   /dev/sdb1
       2       8       33        2      active sync set-A   /dev/sdc1
       3       8       49        3      active sync set-B   /dev/sdd1
       
# make mounting persistent across boots

[root@n74 ~]# rm -f /etc/mdadm/mdadm.conf
[root@n74 ~]# mdadm --detail --scan --verbose | sudo tee -a /etc/mdadm/mdadm.conf
ARRAY /dev/md127 level=raid10 num-devices=4 metadata=1.2 name=n74:127 UUID=2ebc7191:8b86aeae:e1b3b05e:b7443fe8
   devices=/dev/sda1,/dev/sdb1,/dev/sdc1,/dev/sdd1

# format 

[root@n74 ~]# mkfs.ext4  /dev/md127
mke2fs 1.45.6 (20-Mar-2020)
/dev/md127 contains a xfs file system
Proceed anyway? (y,N) y
Creating filesystem with 488314368 4k blocks and 122085376 inodes
Filesystem UUID: 3da42aee-c22a-45a3-b026-1ebbe4d17809
Superblock backups stored on blocks:
        32768, 98304, 163840, 229376, 294912, 819200, 884736, 1605632, 2654208,
        4096000, 7962624, 11239424, 20480000, 23887872, 71663616, 78675968,
        102400000, 214990848

Allocating group tables: done
Writing inode tables: done
Creating journal (262144 blocks): done
Writing superblocks and filesystem accounting information: done

# edit /etc/fstab and mount

# 2tb
/dev/md127            /localscratch2tb        ext4     defaults 0 0

[root@n74 ~]# df -h | egrep -i "used|md127"
Filesystem      Size  Used Avail Use% Mounted on
/dev/md127      1.8T   77M  1.7T   1% /localscratch2tb

# finish with permissions
[root@n74 ~]# chmod go+rwx /localscratch2tb
[root@n74 ~]# chmod o+t /localscratch2tb
[root@n74 ~]# ls -ld /localscratch2tb/
drwxrwxrwt 3 root root 4096 Mar  1 13:26 /localscratch2tb/

# test
[root@n74 ~]# reboot


Back

cluster/222.txt · Last modified: 2023/03/06 14:08 by hmeij07