Something went wrong and a compute is complaining some array is corrupt. It was 4x 1T 7.2K rpm disks arrayed together with mdadm
to provide /localscratch2tb
for heavy IO Gaussian jobs.
This is the process …
# first wipe the disk and partitions for sd[a-d] [root@n74 ~]# wipefs --all --force /dev/sda1; wipefs --all --force /dev/sda /dev/sda1: 4 bytes were erased at offset 0x00001000 (linux_raid_member): fc 4e 2b a9 /dev/sda: 8 bytes were erased at offset 0x00000200 (gpt): 45 46 49 20 50 41 52 54 /dev/sda: 8 bytes were erased at offset 0xe8e0db5e00 (gpt): 45 46 49 20 50 41 52 54 /dev/sda: 2 bytes were erased at offset 0x000001fe (PMBR): 55 aa [root@n74 ~]# wipefs --all --force /dev/sdb1; wipefs --all --force /dev/sdb /dev/sdb1: 4 bytes were erased at offset 0x00001000 (linux_raid_member): fc 4e 2b a9 /dev/sdb: 8 bytes were erased at offset 0x00000200 (gpt): 45 46 49 20 50 41 52 54 /dev/sdb: 8 bytes were erased at offset 0xe8e0db5e00 (gpt): 45 46 49 20 50 41 52 54 /dev/sdb: 2 bytes were erased at offset 0x000001fe (PMBR): 55 aa [root@n74 ~]# wipefs --all --force /dev/sdc1; wipefs --all --force /dev/sdc /dev/sdc1: 4 bytes were erased at offset 0x00001000 (linux_raid_member): fc 4e 2b a9 /dev/sdc: 8 bytes were erased at offset 0x00000200 (gpt): 45 46 49 20 50 41 52 54 /dev/sdc: 8 bytes were erased at offset 0xe8e0db5e00 (gpt): 45 46 49 20 50 41 52 54 /dev/sdc: 2 bytes were erased at offset 0x000001fe (PMBR): 55 aa [root@n74 ~]# wipefs --all --force /dev/sdd1; wipefs --all --force /dev/sdd /dev/sdd1: 4 bytes were erased at offset 0x00001000 (linux_raid_member): fc 4e 2b a9 /dev/sdd: 8 bytes were erased at offset 0x00000200 (gpt): 45 46 49 20 50 41 52 54 /dev/sdd: 8 bytes were erased at offset 0xe8e0db5e00 (gpt): 45 46 49 20 50 41 52 54 /dev/sdd: 2 bytes were erased at offset 0x000001fe (PMBR): 55 aa # make sure the partitions exist or create them sgdisk -n 1:0:0 /dev/sda sgdisk -n 1:0:0 /dev/sdb sgdisk -n 1:0:0 /dev/sdc sgdisk -n 1:0:0 /dev/sdd # reboot [root@n74 ~]# lsblk NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT sda 8:0 0 931.5G 0 disk └─sda1 8:1 0 931.5G 0 part └─md127 9:127 0 931.4G 0 raid1 sdb 8:16 0 931.5G 0 disk └─sdb1 8:17 0 931.5G 0 part └─md127 9:127 0 931.4G 0 raid1 sdc 8:32 0 931.5G 0 disk └─sdc1 8:33 0 931.5G 0 part └─md127 9:127 0 931.4G 0 raid1 sdd 8:48 0 931.5G 0 disk └─sdd1 8:49 0 931.5G 0 part └─md127 9:127 0 931.4G 0 raid1 sde 8:64 0 29.5G 0 disk ├─sde1 8:65 0 524M 0 part ├─sde2 8:66 0 525M 0 part ├─sde3 8:67 0 525M 0 part [SWAP] └─sde4 8:68 0 28G 0 part / # now we stop and remove md127 device [root@n74 ~]# mdadm --stop /dev/md127 mdadm: stopped /dev/md127 [root@n74 ~]# mdadm --remove /dev/md127 mdadm: error opening /dev/md127: No such file or directory <-- that's ok # md127 gone [root@n74 ~]# lsblk NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT sda 8:0 0 931.5G 0 disk └─sda1 8:1 0 931.5G 0 part sdb 8:16 0 931.5G 0 disk └─sdb1 8:17 0 931.5G 0 part sdc 8:32 0 931.5G 0 disk └─sdc1 8:33 0 931.5G 0 part sdd 8:48 0 931.5G 0 disk └─sdd1 8:49 0 931.5G 0 part sde 8:64 0 29.5G 0 disk ├─sde1 8:65 0 524M 0 part ├─sde2 8:66 0 525M 0 part ├─sde3 8:67 0 525M 0 part [SWAP] └─sde4 8:68 0 28G 0 part / # create the array, raid 10, 4 disks [root@n74 ~]# mdadm --create --verbose /dev/md127 --level=10 --raid-devices=4 --force /dev/sd[a-d]1 mdadm: layout defaults to n2 mdadm: layout defaults to n2 mdadm: chunk size defaults to 512K mdadm: size set to 976628736K mdadm: automatically enabling write-intent bitmap on large array mdadm: Defaulting to version 1.2 metadata mdadm: array /dev/md127 started. # ok, check [root@n74 ~]# mdadm --detail /dev/md127 /dev/md127: Version : 1.2 Creation Time : Wed Mar 1 13:21:58 2023 Raid Level : raid10 Array Size : 1953257472 (1862.77 GiB 2000.14 GB) Used Dev Size : 976628736 (931.39 GiB 1000.07 GB) Raid Devices : 4 Total Devices : 4 Persistence : Superblock is persistent Intent Bitmap : Internal Update Time : Wed Mar 1 13:22:42 2023 State : clean, resyncing Active Devices : 4 Working Devices : 4 Failed Devices : 0 Spare Devices : 0 Layout : near=2 Chunk Size : 512K Consistency Policy : bitmap Resync Status : 0% complete Name : n74:127 (local to host n74) UUID : 2ebc7191:8b86aeae:e1b3b05e:b7443fe8 Events : 8 Number Major Minor RaidDevice State 0 8 1 0 active sync set-A /dev/sda1 1 8 17 1 active sync set-B /dev/sdb1 2 8 33 2 active sync set-A /dev/sdc1 3 8 49 3 active sync set-B /dev/sdd1 # make mounting persistent across boots [root@n74 ~]# rm -f /etc/mdadm/mdadm.conf [root@n74 ~]# mdadm --detail --scan --verbose | sudo tee -a /etc/mdadm/mdadm.conf ARRAY /dev/md127 level=raid10 num-devices=4 metadata=1.2 name=n74:127 UUID=2ebc7191:8b86aeae:e1b3b05e:b7443fe8 devices=/dev/sda1,/dev/sdb1,/dev/sdc1,/dev/sdd1 # format [root@n74 ~]# mkfs.ext4 /dev/md127 mke2fs 1.45.6 (20-Mar-2020) /dev/md127 contains a xfs file system Proceed anyway? (y,N) y Creating filesystem with 488314368 4k blocks and 122085376 inodes Filesystem UUID: 3da42aee-c22a-45a3-b026-1ebbe4d17809 Superblock backups stored on blocks: 32768, 98304, 163840, 229376, 294912, 819200, 884736, 1605632, 2654208, 4096000, 7962624, 11239424, 20480000, 23887872, 71663616, 78675968, 102400000, 214990848 Allocating group tables: done Writing inode tables: done Creating journal (262144 blocks): done Writing superblocks and filesystem accounting information: done # edit /etc/fstab and mount # 2tb /dev/md127 /localscratch2tb ext4 defaults 0 0 [root@n74 ~]# df -h | egrep -i "used|md127" Filesystem Size Used Avail Use% Mounted on /dev/md127 1.8T 77M 1.7T 1% /localscratch2tb # finish with permissions [root@n74 ~]# chmod go+rwx /localscratch2tb [root@n74 ~]# chmod o+t /localscratch2tb [root@n74 ~]# ls -ld /localscratch2tb/ drwxrwxrwt 3 root root 4096 Mar 1 13:26 /localscratch2tb/ # test [root@n74 ~]# reboot