]> git.hungrycats.org Git - linux/commitdiff
md/raid1: delay reads that could overtake behind-writes.
authorNeilBrown <neilb@suse.de>
Wed, 31 Mar 2010 00:21:44 +0000 (11:21 +1100)
committerGreg Kroah-Hartman <gregkh@suse.de>
Fri, 13 Aug 2010 20:27:37 +0000 (13:27 -0700)
commit e555190d82c0f58e825e3cbd9e6ebe2e7ac713bd upstream.

When a raid1 array is configured to support write-behind
on some devices, it normally only reads from other devices.
If all devices are write-behind (because the rest have failed)
it is possible for a read request to be serviced before a
behind-write request, which would appear as data corruption.

So when forced to read from a WriteMostly device, wait for any
write-behind to complete, and don't start any more behind-writes.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
drivers/md/bitmap.c
drivers/md/bitmap.h
drivers/md/raid1.c

index 26ac8aad0b1993dab39cf91fc692f5dca534cdb5..dfbc0eb691860276fcd89acc88452ffc2213cc8f 100644 (file)
@@ -1351,7 +1351,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
 {
        if (!bitmap) return;
        if (behind) {
-               atomic_dec(&bitmap->behind_writes);
+               if (atomic_dec_and_test(&bitmap->behind_writes))
+                       wake_up(&bitmap->behind_wait);
                PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n",
                  atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
        }
@@ -1675,6 +1676,7 @@ int bitmap_create(mddev_t *mddev)
        atomic_set(&bitmap->pending_writes, 0);
        init_waitqueue_head(&bitmap->write_wait);
        init_waitqueue_head(&bitmap->overflow_wait);
+       init_waitqueue_head(&bitmap->behind_wait);
 
        bitmap->mddev = mddev;
 
index cb821d76d1b4ebe7c8ad8aa2e44ab4c10b20f6f2..586688c20ea27a730e7784f0efc0a02fcc412327 100644 (file)
@@ -239,6 +239,7 @@ struct bitmap {
        atomic_t pending_writes; /* pending writes to the bitmap file */
        wait_queue_head_t write_wait;
        wait_queue_head_t overflow_wait;
+       wait_queue_head_t behind_wait;
 
        struct sysfs_dirent *sysfs_can_clear;
 };
index 84d3bf07dca70690ab95961bb7f43e62f27f5501..52c6b5fa4f13de146cffa4b0730fbb08b9de1d28 100644 (file)
@@ -866,6 +866,15 @@ static int make_request(struct request_queue *q, struct bio * bio)
                }
                mirror = conf->mirrors + rdisk;
 
+               if (test_bit(WriteMostly, &mirror->rdev->flags) &&
+                   bitmap) {
+                       /* Reading from a write-mostly device must
+                        * take care not to over-take any writes
+                        * that are 'behind'
+                        */
+                       wait_event(bitmap->behind_wait,
+                                  atomic_read(&bitmap->behind_writes) == 0);
+               }
                r1_bio->read_disk = rdisk;
 
                read_bio = bio_clone(bio, GFP_NOIO);
@@ -943,10 +952,14 @@ static int make_request(struct request_queue *q, struct bio * bio)
                set_bit(R1BIO_Degraded, &r1_bio->state);
        }
 
-       /* do behind I/O ? */
+       /* do behind I/O ?
+        * Not if there are too many, or cannot allocate memory,
+        * or a reader on WriteMostly is waiting for behind writes
+        * to flush */
        if (bitmap &&
            (atomic_read(&bitmap->behind_writes)
             < mddev->bitmap_info.max_write_behind) &&
+           !waitqueue_active(&bitmap->behind_wait) &&
            (behind_pages = alloc_behind_pages(bio)) != NULL)
                set_bit(R1BIO_BehindIO, &r1_bio->state);
 
@@ -2153,15 +2166,13 @@ static int stop(mddev_t *mddev)
 {
        conf_t *conf = mddev->private;
        struct bitmap *bitmap = mddev->bitmap;
-       int behind_wait = 0;
 
        /* wait for behind writes to complete */
-       while (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
-               behind_wait++;
-               printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop (%d)\n", mdname(mddev), behind_wait);
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(HZ); /* wait a second */
+       if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
+               printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop.\n", mdname(mddev));
                /* need to kick something here to make sure I/O goes? */
+               wait_event(bitmap->behind_wait,
+                          atomic_read(&bitmap->behind_writes) == 0);
        }
 
        raise_barrier(conf);