]> git.hungrycats.org Git - linux/commitdiff
[PATCH] md: factor out MD superblock handling code
authorNeil Brown <neilb@cse.unsw.edu.au>
Wed, 30 Oct 2002 08:04:30 +0000 (00:04 -0800)
committerLinus Torvalds <torvalds@penguin.transmeta.com>
Wed, 30 Oct 2002 08:04:30 +0000 (00:04 -0800)
Define an interface for interpreting and updating superblocks
so we can more easily define new formats.

With this patch, (almost) all superblock layout information is
locating in a small set of routines dedicated to superblock
handling.  This will allow us to provide a similar set for
a different format.

The two exceptions are:
 1/ autostart_array where the devices listed in the superblock
    are searched for.
 2/ raid5 'knows' the maximum number of devices for
     compute_parity.

These will be addressed in a later patch.

drivers/md/md.c
drivers/md/multipath.c
include/linux/raid/md_k.h

index f9f71514541beca2e0947648e79bdf95eff8e530..40b0d5f5e2d15db94766221387f2e563c658d267 100644 (file)
@@ -307,8 +307,6 @@ static int alloc_disk_sb(mdk_rdev_t * rdev)
                printk(OUT_OF_MEM);
                return -EINVAL;
        }
-       rdev->sb = (mdp_super_t *) page_address(rdev->sb_page);
-       clear_page(rdev->sb);
 
        return 0;
 }
@@ -317,7 +315,7 @@ static void free_disk_sb(mdk_rdev_t * rdev)
 {
        if (rdev->sb_page) {
                page_cache_release(rdev->sb_page);
-               rdev->sb = NULL;
+               rdev->sb_loaded = 0;
                rdev->sb_page = NULL;
                rdev->sb_offset = 0;
                rdev->size = 0;
@@ -365,10 +363,12 @@ static int read_disk_sb(mdk_rdev_t * rdev)
 {
        sector_t sb_offset;
 
-       if (!rdev->sb) {
+       if (!rdev->sb_page) {
                MD_BUG();
                return -EINVAL;
        }
+       if (rdev->sb_loaded)
+               return 0;
 
        /*
         * Calculate the position of the superblock,
@@ -381,8 +381,7 @@ static int read_disk_sb(mdk_rdev_t * rdev)
 
        if (!sync_page_io(rdev->bdev, sb_offset<<1, MD_SB_BYTES, rdev->sb_page, READ))
                goto fail;
-
-       printk(KERN_INFO " [events: %08lx]\n", (unsigned long)rdev->sb->events_lo);
+       rdev->sb_loaded = 1;
        return 0;
 
 fail:
@@ -390,6 +389,56 @@ fail:
        return -EINVAL;
 }
 
+static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
+{
+       if (    (sb1->set_uuid0 == sb2->set_uuid0) &&
+               (sb1->set_uuid1 == sb2->set_uuid1) &&
+               (sb1->set_uuid2 == sb2->set_uuid2) &&
+               (sb1->set_uuid3 == sb2->set_uuid3))
+
+               return 1;
+
+       return 0;
+}
+
+
+static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
+{
+       int ret;
+       mdp_super_t *tmp1, *tmp2;
+
+       tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
+       tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
+
+       if (!tmp1 || !tmp2) {
+               ret = 0;
+               printk(KERN_INFO "md.c: sb1 is not equal to sb2!\n");
+               goto abort;
+       }
+
+       *tmp1 = *sb1;
+       *tmp2 = *sb2;
+
+       /*
+        * nr_disks is not constant
+        */
+       tmp1->nr_disks = 0;
+       tmp2->nr_disks = 0;
+
+       if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4))
+               ret = 0;
+       else
+               ret = 1;
+
+abort:
+       if (tmp1)
+               kfree(tmp1);
+       if (tmp2)
+               kfree(tmp2);
+
+       return ret;
+}
+
 static unsigned int calc_sb_csum(mdp_super_t * sb)
 {
        unsigned int disk_csum, csum;
@@ -402,39 +451,284 @@ static unsigned int calc_sb_csum(mdp_super_t * sb)
 }
 
 /*
- * Check one RAID superblock for generic plausibility
+ * Handle superblock details.
+ * We want to be able to handle multiple superblock formats
+ * so we have a common interface to them all, and an array of
+ * different handlers.
+ * We rely on user-space to write the initial superblock, and support
+ * reading and updating of superblocks.
+ * Interface methods are:
+ *   int load_super(mdk_rdev_t *dev, mdk_rdev_t *refdev)
+ *      loads and validates a superblock on dev.
+ *      if refdev != NULL, compare superblocks on both devices
+ *    Return:
+ *      0 - dev has a superblock that is compatible with refdev
+ *      1 - dev has a superblock that is compatible and newer than refdev
+ *          so dev should be used as the refdev in future
+ *     -EINVAL superblock incompatible or invalid
+ *     -othererror e.g. -EIO
+ *
+ *   int validate_super(mddev_t *mddev, mdk_rdev_t *dev)
+ *      Verify that dev is acceptable into mddev.
+ *       The first time, mddev->raid_disks will be 0, and data from
+ *       dev should be merged in.  Subsequent calls check that dev
+ *       is new enough.  Return 0 or -EINVAL
+ *
+ *   void sync_super(mddev_t *mddev, mdk_rdev_t *dev)
+ *     Update the superblock for rdev with data in mddev
+ *     This does not write to disc.
+ *
  */
 
-static int check_disk_sb(mdk_rdev_t * rdev)
+struct super_type  {
+       char            *name;
+       struct module   *owner;
+       int             (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev);
+       int             (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev);
+       void            (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);
+};
+
+/*
+ * load_super for 0.90.0 
+ */
+static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev)
 {
        mdp_super_t *sb;
-       int ret = -EINVAL;
+       int ret;
 
-       sb = rdev->sb;
-       if (!sb) {
-               MD_BUG();
-               goto abort;
-       }
+       ret = read_disk_sb(rdev);
+       if (ret) return ret;
+
+       ret = -EINVAL;
+
+       sb = (mdp_super_t*)page_address(rdev->sb_page);
 
        if (sb->md_magic != MD_SB_MAGIC) {
                printk(BAD_MAGIC, bdev_partition_name(rdev->bdev));
                goto abort;
        }
 
+       if (sb->major_version != 0 ||
+           sb->minor_version != 90) {
+               printk(KERN_WARNING "Bad version number %d.%d on %s\n",
+                      sb->major_version, sb->minor_version,
+                      bdev_partition_name(rdev->bdev));
+               goto abort;
+       }
+
        if (sb->md_minor >= MAX_MD_DEVS) {
                printk(BAD_MINOR, bdev_partition_name(rdev->bdev), sb->md_minor);
                goto abort;
        }
+       if (sb->raid_disks <= 0)
+               goto abort;
 
        if (calc_sb_csum(sb) != sb->sb_csum) {
                printk(BAD_CSUM, bdev_partition_name(rdev->bdev));
                goto abort;
        }
-       ret = 0;
-abort:
+
+       rdev->preferred_minor = sb->md_minor;
+
+       if (refdev == 0)
+               ret = 1;
+       else {
+               __u64 ev1, ev2;
+               mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);
+               if (!uuid_equal(refsb, sb)) {
+                       printk(KERN_WARNING "md: %s has different UUID to %s\n",
+                              bdev_partition_name(rdev->bdev),
+                              bdev_partition_name(refdev->bdev));
+                       goto abort;
+               }
+               if (!sb_equal(refsb, sb)) {
+                       printk(KERN_WARNING "md: %s has same UUID but different superblock to %s\n",
+                              bdev_partition_name(rdev->bdev),
+                              bdev_partition_name(refdev->bdev));
+                       goto abort;
+               }
+               ev1 = md_event(sb);
+               ev2 = md_event(refsb);
+               if (ev1 > ev2)
+                       ret = 1;
+               else 
+                       ret = 0;
+       }
+
+
+ abort:
        return ret;
 }
 
+/*
+ * validate_super for 0.90.0
+ */
+static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+       mdp_disk_t *desc;
+       mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
+
+       if (mddev->raid_disks == 0) {
+               mddev->major_version = sb->major_version;
+               mddev->minor_version = sb->minor_version;
+               mddev->patch_version = sb->patch_version;
+               mddev->persistent = ! sb->not_persistent;
+               mddev->chunk_size = sb->chunk_size;
+               mddev->ctime = sb->ctime;
+               mddev->utime = sb->utime;
+               mddev->level = sb->level;
+               mddev->layout = sb->layout;
+               mddev->raid_disks = sb->raid_disks;
+               mddev->state = sb->state;
+               mddev->size = sb->size;
+               mddev->events = md_event(sb);
+       
+               memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
+               memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
+               memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
+               memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
+
+               mddev->max_disks = MD_SB_DISKS;
+       } else {
+               __u64 ev1;
+               ev1 = md_event(sb);
+               ++ev1;
+               if (ev1 < mddev->events) 
+                       return -EINVAL;
+       }
+       if (mddev->level != LEVEL_MULTIPATH) {
+               rdev->desc_nr = sb->this_disk.number;
+               rdev->raid_disk = -1;
+               rdev->in_sync = rdev->faulty = 0;
+               desc = sb->disks + rdev->desc_nr;
+
+               if (desc->state & (1<<MD_DISK_FAULTY))
+                       rdev->faulty = 1;
+               else if (desc->state & (1<<MD_DISK_SYNC) &&
+                        desc->raid_disk < mddev->raid_disks) {
+                       rdev->in_sync = 1;
+                       rdev->raid_disk = desc->raid_disk;
+               }
+       }
+       return 0;
+}
+
+/*
+ * sync_super for 0.90.0
+ */
+static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+       mdp_super_t *sb;
+       struct list_head *tmp;
+       mdk_rdev_t *rdev2;
+       int next_spare = mddev->raid_disks;
+
+       /* make rdev->sb match mddev data..
+        *
+        * 1/ zero out disks
+        * 2/ Add info for each disk, keeping track of highest desc_nr
+        * 3/ any empty disks < highest become removed
+        *
+        * disks[0] gets initialised to REMOVED because
+        * we cannot be sure from other fields if it has
+        * been initialised or not.
+        */
+       int highest = 0;
+       int i;
+       int active=0, working=0,failed=0,spare=0,nr_disks=0;
+
+       sb = (mdp_super_t*)page_address(rdev->sb_page);
+
+       memset(sb, 0, sizeof(*sb));
+
+       sb->md_magic = MD_SB_MAGIC;
+       sb->major_version = mddev->major_version;
+       sb->minor_version = mddev->minor_version;
+       sb->patch_version = mddev->patch_version;
+       sb->gvalid_words  = 0; /* ignored */
+       memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
+       memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
+       memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
+       memcpy(&sb->set_uuid3, mddev->uuid+12,4);
+
+       sb->ctime = mddev->ctime;
+       sb->level = mddev->level;
+       sb->size  = mddev->size;
+       sb->raid_disks = mddev->raid_disks;
+       sb->md_minor = mddev->__minor;
+       sb->not_persistent = !mddev->persistent;
+       sb->utime = mddev->utime;
+       sb->state = mddev->state;
+       sb->events_hi = (mddev->events>>32);
+       sb->events_lo = (u32)mddev->events;
+
+       sb->layout = mddev->layout;
+       sb->chunk_size = mddev->chunk_size;
+
+       sb->disks[0].state = (1<<MD_DISK_REMOVED);
+       ITERATE_RDEV(mddev,rdev2,tmp) {
+               mdp_disk_t *d;
+               if (rdev2->raid_disk >= 0)
+                       rdev2->desc_nr = rdev2->raid_disk;
+               else
+                       rdev2->desc_nr = next_spare++;
+               d = &sb->disks[rdev2->desc_nr];
+               nr_disks++;
+               d->number = rdev2->desc_nr;
+               d->major = MAJOR(rdev2->bdev->bd_dev);
+               d->minor = MINOR(rdev2->bdev->bd_dev);
+               if (rdev2->raid_disk >= 0)
+                       d->raid_disk = rdev2->raid_disk;
+               else
+                       d->raid_disk = rdev2->desc_nr; /* compatability */
+               if (rdev2->faulty) {
+                       d->state = (1<<MD_DISK_FAULTY);
+                       failed++;
+               } else if (rdev2->in_sync) {
+                       d->state = (1<<MD_DISK_ACTIVE);
+                       d->state |= (1<<MD_DISK_SYNC);
+                       active++;
+                       working++;
+               } else {
+                       d->state = 0;
+                       spare++;
+                       working++;
+               }
+               if (rdev2->desc_nr > highest)
+                       highest = rdev2->desc_nr;
+       }
+       
+       /* now set the "removed" bit on any non-trailing holes */
+       for (i=0; i<highest; i++) {
+               mdp_disk_t *d = &sb->disks[i];
+               if (d->state == 0 && d->number == 0) {
+                       d->number = i;
+                       d->raid_disk = i;
+                       d->state = (1<<MD_DISK_REMOVED);
+               }
+       }
+       sb->nr_disks = nr_disks;
+       sb->active_disks = active;
+       sb->working_disks = working;
+       sb->failed_disks = failed;
+       sb->spare_disks = spare;
+
+       sb->this_disk = sb->disks[rdev->desc_nr];
+       sb->sb_csum = calc_sb_csum(sb);
+}
+
+struct super_type super_types[] = {
+       [0] = {
+               .name   = "0.90.0",
+               .owner  = THIS_MODULE,
+               .load_super     = super_90_load,
+               .validate_super = super_90_validate,
+               .sync_super     = super_90_sync,
+       },
+};
+
+
+       
 static mdk_rdev_t * match_dev_unit(mddev_t *mddev, mdk_rdev_t *dev)
 {
        struct list_head *tmp;
@@ -618,9 +912,9 @@ static void print_rdev(mdk_rdev_t *rdev)
        printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%d ",
                bdev_partition_name(rdev->bdev),
                (unsigned long long)rdev->size, rdev->faulty, rdev->in_sync, rdev->desc_nr);
-       if (rdev->sb) {
+       if (rdev->sb_loaded) {
                printk(KERN_INFO "md: rdev superblock:\n");
-               print_sb(rdev->sb);
+               print_sb((mdp_super_t*)page_address(rdev->sb_page));
        } else
                printk(KERN_INFO "md: no rdev superblock!\n");
 }
@@ -648,61 +942,13 @@ void md_print_devices(void)
        printk("\n");
 }
 
-static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
-{
-       int ret;
-       mdp_super_t *tmp1, *tmp2;
-
-       tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
-       tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
-
-       if (!tmp1 || !tmp2) {
-               ret = 0;
-               printk(KERN_INFO "md.c: sb1 is not equal to sb2!\n");
-               goto abort;
-       }
-
-       *tmp1 = *sb1;
-       *tmp2 = *sb2;
-
-       /*
-        * nr_disks is not constant
-        */
-       tmp1->nr_disks = 0;
-       tmp2->nr_disks = 0;
-
-       if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4))
-               ret = 0;
-       else
-               ret = 1;
-
-abort:
-       if (tmp1)
-               kfree(tmp1);
-       if (tmp2)
-               kfree(tmp2);
-
-       return ret;
-}
-
-static int uuid_equal(mdk_rdev_t *rdev1, mdk_rdev_t *rdev2)
-{
-       if (    (rdev1->sb->set_uuid0 == rdev2->sb->set_uuid0) &&
-               (rdev1->sb->set_uuid1 == rdev2->sb->set_uuid1) &&
-               (rdev1->sb->set_uuid2 == rdev2->sb->set_uuid2) &&
-               (rdev1->sb->set_uuid3 == rdev2->sb->set_uuid3))
-
-               return 1;
-
-       return 0;
-}
 
 static int write_disk_sb(mdk_rdev_t * rdev)
 {
        sector_t sb_offset;
        sector_t size;
 
-       if (!rdev->sb) {
+       if (!rdev->sb_loaded) {
                MD_BUG();
                return 1;
        }
@@ -710,10 +956,6 @@ static int write_disk_sb(mdk_rdev_t * rdev)
                MD_BUG();
                return 1;
        }
-       if (rdev->sb->md_magic != MD_SB_MAGIC) {
-               MD_BUG();
-               return 1;
-       }
 
        sb_offset = calc_dev_sboffset(rdev->bdev);
        if (rdev->sb_offset != sb_offset) {
@@ -751,116 +993,10 @@ fail:
 static void sync_sbs(mddev_t * mddev)
 {
        mdk_rdev_t *rdev;
-       mdp_super_t *sb;
        struct list_head *tmp;
-       int next_spare = mddev->raid_disks;
-
-       /* make all rdev->sb match mddev data..
-        * we setup the data in the first rdev and copy it
-        * to the others.
-        *
-        * 1/ zero out disks
-        * 2/ Add info for each disk, keeping track of highest desc_nr
-        * 3/ any empty disks < highest become removed
-        *
-        * disks[0] gets initialised to REMOVED because
-        * we cannot be sure from other fields if it has
-        * been initialised or not.
-        */
-       int highest = 0;
-       int i;
-       int active=0, working=0,failed=0,spare=0,nr_disks=0;
-
-       if (list_empty(&mddev->disks)) {
-               MD_BUG();
-               return;
-       }
-       rdev = list_entry(mddev->disks.next, mdk_rdev_t, same_set);
-       sb = rdev->sb;
-
-       memset(sb, 0, sizeof(*sb));
-
-       sb->md_magic = MD_SB_MAGIC;
-       sb->major_version = mddev->major_version;
-       sb->minor_version = mddev->minor_version;
-       sb->patch_version = mddev->patch_version;
-       sb->gvalid_words  = 0; /* ignored */
-       memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
-       memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
-       memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
-       memcpy(&sb->set_uuid3, mddev->uuid+12,4);
-
-       sb->ctime = mddev->ctime;
-       sb->level = mddev->level;
-       sb->size  = mddev->size;
-       sb->raid_disks = mddev->raid_disks;
-       sb->md_minor = mddev->__minor;
-       sb->not_persistent = !mddev->persistent;
-       sb->utime = mddev->utime;
-       sb->state = mddev->state;
-       sb->events_hi = (mddev->events>>32);
-       sb->events_lo = (u32)mddev->events;
 
-       sb->layout = mddev->layout;
-       sb->chunk_size = mddev->chunk_size;
-
-       sb->disks[0].state = (1<<MD_DISK_REMOVED);
-       ITERATE_RDEV(mddev,rdev,tmp) {
-               mdp_disk_t *d;
-               if (rdev->raid_disk >= 0)
-                       rdev->desc_nr = rdev->raid_disk;
-               else
-                       rdev->desc_nr = next_spare++;
-               d = &sb->disks[rdev->desc_nr];
-               nr_disks++;
-               d->number = rdev->desc_nr;
-               d->major = MAJOR(rdev->bdev->bd_dev);
-               d->minor = MINOR(rdev->bdev->bd_dev);
-               if (rdev->raid_disk >= 0)
-                       d->raid_disk = rdev->raid_disk;
-               else
-                       d->raid_disk = rdev->desc_nr; /* compatability */
-               if (rdev->faulty) {
-                       d->state = (1<<MD_DISK_FAULTY);
-                       failed++;
-               } else if (rdev->in_sync) {
-                       d->state = (1<<MD_DISK_ACTIVE);
-                       d->state |= (1<<MD_DISK_SYNC);
-                       active++;
-                       working++;
-               } else {
-                       d->state = 0;
-                       spare++;
-                       working++;
-               }
-               if (rdev->desc_nr > highest)
-                       highest = rdev->desc_nr;
-       }
-       
-       /* now set the "removed" bit on any non-trailing holes */
-       for (i=0; i<highest; i++) {
-               mdp_disk_t *d = &sb->disks[i];
-               if (d->state == 0 && d->number == 0) {
-                       d->number = i;
-                       d->raid_disk = i;
-                       d->state = (1<<MD_DISK_REMOVED);
-               }
-       }
-       sb->nr_disks = nr_disks;
-       sb->active_disks = active;
-       sb->working_disks = working;
-       sb->failed_disks = failed;
-       sb->spare_disks = spare;
-
-       ITERATE_RDEV(mddev,rdev,tmp) {
-               mdp_super_t *this_sb;
-               
-               this_sb = rdev->sb;
-               if (this_sb != sb)
-                       *this_sb = *sb;
-               this_sb->this_disk = this_sb->disks[rdev->desc_nr];
-               this_sb->sb_csum = calc_sb_csum(this_sb);
-       }
+       ITERATE_RDEV(mddev,rdev,tmp)
+               super_90_sync(mddev, rdev);
 }
 
 static void md_update_sb(mddev_t * mddev)
@@ -903,8 +1039,6 @@ repeat:
 
                printk("%s ", bdev_partition_name(rdev->bdev));
                if (!rdev->faulty) {
-                       printk("[events: %08lx]",
-                               (unsigned long)rdev->sb->events_lo);
                        err += write_disk_sb(rdev);
                } else
                        printk(")\n");
@@ -968,13 +1102,14 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int on_disk)
        }
 
        if (on_disk) {
-               if ((err = read_disk_sb(rdev))) {
-                       printk(KERN_WARNING "md: could not read %s's sb, not importing!\n",
+               err = super_90_load(rdev, NULL);
+               if (err == -EINVAL) {
+                       printk(KERN_WARNING "md: %s has invalid sb, not importing!\n",
                               bdev_partition_name(rdev->bdev));
                        goto abort_free;
                }
-               if ((err = check_disk_sb(rdev))) {
-                       printk(KERN_WARNING "md: %s has invalid sb, not importing!\n",
+               if (err < 0) {
+                       printk(KERN_WARNING "md: could not read %s's sb, not importing!\n",
                               bdev_partition_name(rdev->bdev));
                        goto abort_free;
                }
@@ -984,7 +1119,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int on_disk)
        return rdev;
 
 abort_free:
-       if (rdev->sb) {
+       if (rdev->sb_page) {
                if (rdev->bdev)
                        unlock_rdev(rdev);
                free_disk_sb(rdev);
@@ -1014,155 +1149,39 @@ abort_free:
 
 static int analyze_sbs(mddev_t * mddev)
 {
-       int out_of_date = 0, i;
+       int i;
        struct list_head *tmp;
        mdk_rdev_t *rdev, *freshest;
-       mdp_super_t *sb;
-
-       /*
-        * Verify the RAID superblock on each real device
-        */
-       ITERATE_RDEV(mddev,rdev,tmp) {
-               if (rdev->faulty) {
-                       MD_BUG();
-                       goto abort;
-               }
-               if (!rdev->sb) {
-                       MD_BUG();
-                       goto abort;
-               }
-               if (check_disk_sb(rdev))
-                       goto abort;
-       }
 
-       /*
-        * The superblock constant part has to be the same
-        * for all disks in the array.
-        */
-       sb = NULL;
-
-       ITERATE_RDEV(mddev,rdev,tmp) {
-               if (!sb) {
-                       sb = rdev->sb;
-                       continue;
-               }
-               if (!sb_equal(sb, rdev->sb)) {
+       freshest = NULL;
+       ITERATE_RDEV(mddev,rdev,tmp)
+               switch (super_90_load(rdev, freshest)) {
+               case 1:
+                       freshest = rdev;
+                       break;
+               case 0:
+                       break;
+               default:
                        printk(INCONSISTENT, bdev_partition_name(rdev->bdev));
                        kick_rdev_from_array(rdev);
-                       continue;
                }
-       }
 
-       /*
-        * OK, we have all disks and the array is ready to run. Let's
-        * find the freshest superblock, that one will be the superblock
-        * that represents the whole array.
-        */
-       freshest = NULL;
 
-       ITERATE_RDEV(mddev,rdev,tmp) {
-               __u64 ev1, ev2;
-               /*
-                * if the checksum is invalid, use the superblock
-                * only as a last resort. (decrease it's age by
-                * one event)
-                */
-               if (calc_sb_csum(rdev->sb) != rdev->sb->sb_csum) {
-                       if (rdev->sb->events_lo || rdev->sb->events_hi)
-                               if (!(rdev->sb->events_lo--))
-                                       rdev->sb->events_hi--;
-               }
+       super_90_validate(mddev, freshest);
 
-               printk(KERN_INFO "md: %s's event counter: %08lx\n",
-                      bdev_partition_name(rdev->bdev),
-                       (unsigned long)rdev->sb->events_lo);
-               if (!freshest) {
-                       freshest = rdev;
-                       continue;
-               }
-               /*
-                * Find the newest superblock version
-                */
-               ev1 = md_event(rdev->sb);
-               ev2 = md_event(freshest->sb);
-               if (ev1 != ev2) {
-                       out_of_date = 1;
-                       if (ev1 > ev2)
-                               freshest = rdev;
-               }
-       }
-       if (out_of_date) {
-               printk(OUT_OF_DATE);
-               printk(KERN_INFO "md: freshest: %s\n", bdev_partition_name(freshest->bdev));
-       }
-
-       sb = freshest->sb;
-
-       mddev->major_version = sb->major_version;
-       mddev->minor_version = sb->minor_version;
-       mddev->patch_version = sb->patch_version;
-       mddev->persistent = ! sb->not_persistent;
-       mddev->chunk_size = sb->chunk_size;
-       mddev->ctime = sb->ctime;
-       mddev->utime = sb->utime;
-       mddev->level = sb->level;
-       mddev->layout = sb->layout;
-       mddev->raid_disks = sb->raid_disks;
-       mddev->state = sb->state;
-       mddev->size = sb->size;
-       mddev->events = md_event(sb);
-       
-       memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
-       memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
-       memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
-       memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
-
-       /*
-        * at this point we have picked the 'best' superblock
-        * from all available superblocks.
-        * now we validate this superblock and kick out possibly
-        * failed disks.
-        */
-       ITERATE_RDEV(mddev,rdev,tmp) {
-               /*
-                * Kick all non-fresh devices
-                */
-               __u64 ev1;
-               ev1 = md_event(rdev->sb);
-               ++ev1;
-               if (ev1 < mddev->events) {
-                       printk(KERN_WARNING "md: kicking non-fresh %s from array!\n",
-                                               bdev_partition_name(rdev->bdev));
-                       kick_rdev_from_array(rdev);
-                       continue;
-               }
-       }
-
-       /* set rdev->desc_nr for each device.
-        * for MULTIPATH, we just us sequential number as
-        * nothing else is meaningful
-        */
        i = 0;
        ITERATE_RDEV(mddev,rdev,tmp) {
+               if (rdev != freshest)
+                       if (super_90_validate(mddev, rdev)) {
+                               printk(KERN_WARNING "md: kicking non-fresh %s from array!\n",
+                                      bdev_partition_name(rdev->bdev));
+                               kick_rdev_from_array(rdev);
+                               continue;
+                       }
                if (mddev->level == LEVEL_MULTIPATH) {
                        rdev->desc_nr = i++;
                        rdev->raid_disk = rdev->desc_nr;
                        rdev->in_sync = 1;
-               } else {
-                       mdp_disk_t *desc;
-                       rdev->desc_nr = rdev->sb->this_disk.number;
-                       desc = sb->disks + rdev->desc_nr;
-                       rdev->raid_disk = -1;
-                       rdev->in_sync = rdev->faulty = 0;
-
-                       if (desc->state & (1<<MD_DISK_FAULTY)) {
-                               rdev->faulty = 1;
-                               kick_rdev_from_array(rdev);
-                       } else if (desc->state & (1<<MD_DISK_SYNC) &&
-                                  desc->raid_disk < mddev->raid_disks) {
-                               rdev->in_sync = 1;
-                               rdev->raid_disk = desc->raid_disk;
-                       }
                }
        }
 
@@ -1579,20 +1598,6 @@ out:
        return err;
 }
 
-/*
- * We have to safely support old arrays too.
- */
-int detect_old_array(mdp_super_t *sb)
-{
-       if (sb->major_version > 0)
-               return 0;
-       if (sb->minor_version >= 90)
-               return 0;
-
-       return -EINVAL;
-}
-
-
 static void autorun_array(mddev_t *mddev)
 {
        mdk_rdev_t *rdev;
@@ -1648,25 +1653,18 @@ static void autorun_devices(void)
 
                printk(KERN_INFO "md: considering %s ...\n", bdev_partition_name(rdev0->bdev));
                INIT_LIST_HEAD(&candidates);
-               ITERATE_RDEV_PENDING(rdev,tmp) {
-                       if (uuid_equal(rdev0, rdev)) {
-                               if (!sb_equal(rdev0->sb, rdev->sb)) {
-                                       printk(KERN_WARNING
-                                              "md: %s has same UUID as %s, but superblocks differ ...\n",
-                                              bdev_partition_name(rdev->bdev), bdev_partition_name(rdev0->bdev));
-                                       continue;
-                               }
+               ITERATE_RDEV_PENDING(rdev,tmp)
+                       if (super_90_load(rdev, rdev0) >= 0) {
                                printk(KERN_INFO "md:  adding %s ...\n", bdev_partition_name(rdev->bdev));
                                list_move(&rdev->same_set, &candidates);
                        }
-               }
                /*
                 * now we have a set of devices, with all of them having
                 * mostly sane superblocks. It's time to allocate the
                 * mddev.
                 */
 
-               mddev = mddev_find(rdev0->sb->md_minor);
+               mddev = mddev_find(rdev0->preferred_minor);
                if (!mddev) {
                        printk(KERN_ERR "md: cannot allocate memory for md drive.\n");
                        break;
@@ -1748,15 +1746,6 @@ static int autostart_array(dev_t startdev)
        }
        list_add(&start_rdev->same_set, &pending_raid_disks);
 
-       sb = start_rdev->sb;
-
-       err = detect_old_array(sb);
-       if (err) {
-               printk(KERN_WARNING "md: array version is too old to be autostarted ,"
-                      "use raidtools 0.90 mkraid --upgrade to upgrade the array "
-                      "without data loss!\n");
-               goto abort;
-       }
 
        for (i = 0; i < MD_SB_DISKS; i++) {
                mdp_disk_t *desc;
@@ -1875,8 +1864,6 @@ static int get_disk_info(mddev_t * mddev, void * arg)
                return -EFAULT;
 
        nr = info.number;
-       if (nr >= MD_SB_DISKS)
-               return -EINVAL;
 
        rdev = find_rdev_nr(mddev, nr);
        if (rdev) {
@@ -1918,18 +1905,13 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
                if (!list_empty(&mddev->disks)) {
                        mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
                                                        mdk_rdev_t, same_set);
-                       if (!uuid_equal(rdev0, rdev)) {
+                       int err = super_90_load(rdev, NULL);
+                       if (err < 0) {
                                printk(KERN_WARNING "md: %s has different UUID to %s\n",
                                       bdev_partition_name(rdev->bdev), bdev_partition_name(rdev0->bdev));
                                export_rdev(rdev);
                                return -EINVAL;
                        }
-                       if (!sb_equal(rdev0->sb, rdev->sb)) {
-                               printk(KERN_WARNING "md: %s has same UUID but different superblock to %s\n",
-                                      bdev_partition_name(rdev->bdev), bdev_partition_name(rdev0->bdev));
-                               export_rdev(rdev);
-                               return -EINVAL;
-                       }
                }
                bind_rdev_to_array(rdev, mddev);
                return 0;
@@ -2080,11 +2062,11 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
        rdev->size = size;
        rdev->sb_offset = calc_dev_sboffset(rdev->bdev);
 
-       for (i = mddev->raid_disks; i < MD_SB_DISKS; i++)
+       for (i = mddev->raid_disks; i < mddev->max_disks; i++)
                if (find_rdev_nr(mddev,i)==NULL)
                        break;
 
-       if (i == MD_SB_DISKS) {
+       if (i == mddev->max_disks) {
                printk(KERN_WARNING "md%d: can not hot-add to full array!\n",
                       mdidx(mddev));
                err = -EBUSY;
index a14b75456e5f185f89d39d92c2704c687a6207b7..137e23ff22e4b4c85bee18b29e149de3b9bb8a46 100644 (file)
@@ -59,7 +59,7 @@ static void mp_pool_free(void *mpb, void *data)
 static int multipath_map (mddev_t *mddev, mdk_rdev_t **rdevp)
 {
        multipath_conf_t *conf = mddev_to_conf(mddev);
-       int i, disks = MD_SB_DISKS;
+       int i, disks = mddev->max_disks;
 
        /*
         * Later we do read balancing on the read side 
@@ -147,7 +147,7 @@ static int multipath_read_balance (multipath_conf_t *conf)
 {
        int disk;
 
-       for (disk = 0; disk < MD_SB_DISKS; disk++) {
+       for (disk = 0; disk < conf->mddev->max_disks; disk++) {
                mdk_rdev_t *rdev = conf->multipaths[disk].rdev;
                if (rdev && rdev->in_sync)
                        return disk;
@@ -259,7 +259,7 @@ static void print_multipath_conf (multipath_conf_t *conf)
        printk(" --- wd:%d rd:%d\n", conf->working_disks,
                         conf->raid_disks);
 
-       for (i = 0; i < MD_SB_DISKS; i++) {
+       for (i = 0; i < conf->mddev->max_disks; i++) {
                tmp = conf->multipaths + i;
                if (tmp->rdev)
                        printk(" disk%d, o:%d, dev:%s\n",
index 453324c18bddd0f7674d7759704bc2d2991586dc..f658735d28b2a6755bbc2a9946df80a6d39c6867 100644 (file)
@@ -151,8 +151,9 @@ struct mdk_rdev_s
        struct block_device *bdev;      /* block device handle */
 
        struct page     *sb_page;
-       mdp_super_t     *sb;
+       int             sb_loaded;
        sector_t        sb_offset;
+       int             preferred_minor;        /* autorun support */
 
        /* A device can be in one of three states based on two flags:
         * Not working:   faulty==1 in_sync==0
@@ -196,6 +197,7 @@ struct mddev_s
        time_t                          ctime, utime;
        int                             level, layout;
        int                             raid_disks;
+       int                             max_disks;
        unsigned long                   state;
        sector_t                        size; /* used size of component devices */
        __u64                           events;