[PATCH] improved I/O scheduling for indirect blocks

author Andrew Morton <akpm@zip.com.au>

Sun, 19 May 2002 09:22:50 +0000 (02:22 -0700)

committer Arnaldo Carvalho de Melo <acme@conectiva.com.br>

Sun, 19 May 2002 09:22:50 +0000 (02:22 -0700)
author Andrew Morton <akpm@zip.com.au>
Sun, 19 May 2002 09:22:50 +0000 (02:22 -0700)
committer Arnaldo Carvalho de Melo <acme@conectiva.com.br>
Sun, 19 May 2002 09:22:50 +0000 (02:22 -0700)
diff --git a/fs/buffer.c b/fs/buffer.c

index d590735164df21ef19da4e0e8590026fd2767ff7..f9923e470bb3a0264bf79f7f509894b81d13deac 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -210,10 +210,7 @@ int sync_blockdev(struct block_device *bdev)
         if (bdev) {
                 int err;
  
-               ret = filemap_fdatawait(bdev->bd_inode->i_mapping);
-               err = filemap_fdatawrite(bdev->bd_inode->i_mapping);
-               if (!ret)
-                       ret = err;
+               ret = filemap_fdatawrite(bdev->bd_inode->i_mapping);
                 err = filemap_fdatawait(bdev->bd_inode->i_mapping);
                 if (!ret)
                         ret = err;
@@ -229,12 +226,14 @@ EXPORT_SYMBOL(sync_blockdev);
   */
  int fsync_super(struct super_block *sb)
  {
-       sync_inodes_sb(sb);     /* All the inodes */
+       sync_inodes_sb(sb, 0);
         DQUOT_SYNC(sb);
         lock_super(sb);
         if (sb->s_dirt && sb->s_op && sb->s_op->write_super)
                 sb->s_op->write_super(sb);
         unlock_super(sb);
+       sync_blockdev(sb->s_bdev);
+       sync_inodes_sb(sb, 1);
  
         return sync_blockdev(sb->s_bdev);
  }
@@ -276,10 +275,10 @@ int fsync_dev(kdev_t dev)
   */
  asmlinkage long sys_sync(void)
  {
-       sync_inodes();  /* All mappings and inodes, including block devices */
+       sync_inodes(0); /* All mappings and inodes, including block devices */
         DQUOT_SYNC(NULL);
         sync_supers();  /* Write the superblocks */
-       sync_inodes();  /* All the mappings and inodes, again. */
+       sync_inodes(1); /* All the mappings and inodes, again. */
         return 0;
  }
  
@@ -775,6 +774,80 @@ int sync_mapping_buffers(struct address_space *mapping)
  }
  EXPORT_SYMBOL(sync_mapping_buffers);
  
+/**
+ * write_mapping_buffers - Start writeout of a mapping's "associated" buffers.
+ * @mapping - the mapping which wants those buffers written.
+ *
+ * Starts I/O against dirty buffers which are on @mapping->private_list.
+ * Those buffers must be backed by @mapping->assoc_mapping.
+ *
+ * The private_list buffers generally contain filesystem indirect blocks.
+ * The idea is that the filesystem can start I/O against the indirects at
+ * the same time as running generic_writeback_mapping(), so the indirect's
+ * I/O will be merged with the data.
+ *
+ * We sneakliy write the buffers in probable tail-to-head order.  This is
+ * because generic_writeback_mapping writes in probable head-to-tail
+ * order.  If the file is so huge that the data or the indirects overflow
+ * the request queue we will at least get some merging this way.
+ *
+ * Any clean+unlocked buffers are de-listed.  clean/locked buffers must be
+ * left on the list for an fsync() to wait on.
+ *
+ * Couldn't think of a smart way of avoiding livelock, so chose the dumb
+ * way instead.
+ *
+ * FIXME: duplicates fsync_inode_buffers() functionality a bit.
+ */
+int write_mapping_buffers(struct address_space *mapping)
+{
+       spinlock_t *lock;
+       struct address_space *buffer_mapping;
+       unsigned nr_to_write;   /* livelock avoidance */
+       struct list_head *lh;
+       int ret = 0;
+
+       if (list_empty(&mapping->private_list))
+               goto out;
+
+       buffer_mapping = mapping->assoc_mapping;
+       lock = &buffer_mapping->private_lock;
+       spin_lock(lock);
+       nr_to_write = 0;
+       lh = mapping->private_list.next;
+       while (lh != &mapping->private_list) {
+               lh = lh->next;
+               nr_to_write++;
+       }
+       nr_to_write *= 2;       /* Allow for some late additions */
+
+       while (nr_to_write-- && !list_empty(&mapping->private_list)) {
+               struct buffer_head *bh;
+
+               bh = BH_ENTRY(mapping->private_list.prev);
+               list_del_init(&bh->b_assoc_buffers);
+               if (!buffer_dirty(bh) && !buffer_locked(bh))
+                       continue;
+               /* Stick it on the far end of the list. Order is preserved. */
+               list_add(&bh->b_assoc_buffers, &mapping->private_list);
+               if (test_set_buffer_locked(bh))
+                       continue;
+               get_bh(bh);
+               spin_unlock(lock);
+               if (test_clear_buffer_dirty(bh)) {
+                       bh->b_end_io = end_buffer_io_sync;
+                       submit_bh(WRITE, bh);
+               } else {
+                       unlock_buffer(bh);
+                       put_bh(bh);
+               }
+               spin_lock(lock);
+       }
+       spin_unlock(lock);
+out:
+       return ret;
+}
+
  void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
  {
         struct address_space *mapping = inode->i_mapping;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c

index b29af3b55ca100c807f1a97d5ad9cea6cc871e1f..55592347a48cba4ac99d034a209c501e044fe730 100644 (file)
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -41,7 +41,7 @@ static int ext2_update_inode(struct inode * inode, int do_sync);
   */
  void ext2_put_inode (struct inode * inode)
  {
-       if (atomic_read(&inode->i_count) < 2)
+       if (atomic_read(&inode->i_count) < 2)   /* final iput? */
                 ext2_discard_prealloc (inode);
  }
  
@@ -584,6 +584,20 @@ static int ext2_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, u
  {
         return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, ext2_get_block);
  }
+
+static int
+ext2_writeback_mapping(struct address_space *mapping, int *nr_to_write)
+{
+       int ret;
+       int err;
+
+       ret = write_mapping_buffers(mapping);
+       err = generic_writeback_mapping(mapping, nr_to_write);
+       if (!ret)
+               ret = err;
+       return ret;
+}
+
  struct address_space_operations ext2_aops = {
         readpage: ext2_readpage,
         writepage: ext2_writepage,
@@ -592,7 +606,7 @@ struct address_space_operations ext2_aops = {
         commit_write: generic_commit_write,
         bmap: ext2_bmap,
         direct_IO: ext2_direct_IO,
-       writeback_mapping: generic_writeback_mapping,
+       writeback_mapping: ext2_writeback_mapping,
         vm_writeback: generic_vm_writeback,
  };
  
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c

index b2d84f68c3da88b77923ff72bc373760263eefeb..5ad90478a54726f82b61caa117ddf8764d13f016 100644 (file)
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -77,14 +77,14 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                  * superblock list, based upon its state.
                  */
                 if (inode->i_state & I_LOCK)
-                       goto same_list;
+                       goto out;
  
                 /*
                  * Only add valid (hashed) inode to the superblock's
                  * dirty list.  Add blockdev inodes as well.
                  */
                 if (list_empty(&inode->i_hash) && !S_ISBLK(inode->i_mode))
-                       goto same_list;
+                       goto out;
  
                 /*
                  * If the inode was already on s_dirty, don't reposition
@@ -95,11 +95,11 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                         list_add(&inode->i_list, &sb->s_dirty);
                 }
         }
-same_list:
+out:
         spin_unlock(&inode_lock);
  }
  
-static inline void write_inode(struct inode *inode, int sync)
+static void write_inode(struct inode *inode, int sync)
  {
         if (inode->i_sb->s_op && inode->i_sb->s_op->write_inode &&
                         !is_bad_inode(inode))
@@ -130,9 +130,10 @@ static void __sync_single_inode(struct inode *inode, int wait, int *nr_to_write)
         unsigned dirty;
         unsigned long orig_dirtied_when;
         struct address_space *mapping = inode->i_mapping;
+       struct super_block *sb = inode->i_sb;
  
         list_del(&inode->i_list);
-       list_add(&inode->i_list, &inode->i_sb->s_locked_inodes);
+       list_add(&inode->i_list, &sb->s_locked_inodes);
  
         BUG_ON(inode->i_state & I_LOCK);
  
@@ -144,13 +145,7 @@ static void __sync_single_inode(struct inode *inode, int wait, int *nr_to_write)
         mapping->dirtied_when = 0;      /* assume it's whole-file writeback */
         spin_unlock(&inode_lock);
  
-       if (wait)
-               filemap_fdatawait(mapping);
-
-       if (mapping->a_ops->writeback_mapping)
-               mapping->a_ops->writeback_mapping(mapping, nr_to_write);
-       else
-               generic_writeback_mapping(mapping, NULL);
+       writeback_mapping(mapping, nr_to_write);
  
         /* Don't write the inode if only I_DIRTY_PAGES was set */
         if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC))
@@ -164,17 +159,20 @@ static void __sync_single_inode(struct inode *inode, int wait, int *nr_to_write)
         inode->i_state &= ~I_LOCK;
         if (!(inode->i_state & I_FREEING)) {
                 list_del(&inode->i_list);
-               if (!list_empty(&mapping->dirty_pages)) {
-                       /* Not a whole-file writeback */
-                       mapping->dirtied_when = orig_dirtied_when;
-                       inode->i_state |= I_DIRTY_PAGES;
-                       list_add_tail(&inode->i_list, &inode->i_sb->s_dirty);
-               } else if (inode->i_state & I_DIRTY) {
-                       list_add(&inode->i_list, &inode->i_sb->s_dirty);
-               } else if (atomic_read(&inode->i_count)) {
-                       list_add(&inode->i_list, &inode_in_use);
+               if (inode->i_state & I_DIRTY) {         /* Redirtied */
+                       list_add(&inode->i_list, &sb->s_dirty);
                 } else {
-                       list_add(&inode->i_list, &inode_unused);
+                       if (!list_empty(&mapping->dirty_pages)) {
+                               /* Not a whole-file writeback */
+                               mapping->dirtied_when = orig_dirtied_when;
+                               inode->i_state |= I_DIRTY_PAGES;
+                               list_add_tail(&inode->i_list,
+                                               &sb->s_dirty);
+                       } else if (atomic_read(&inode->i_count)) {
+                               list_add(&inode->i_list, &inode_in_use);
+                       } else {
+                               list_add(&inode->i_list, &inode_unused);
+                       }
                 }
         }
         if (waitqueue_active(&inode->i_wait))
@@ -200,37 +198,35 @@ __writeback_single_inode(struct inode *inode, int sync, int *nr_to_write)
         __sync_single_inode(inode, sync, nr_to_write);
  }
  
-void writeback_single_inode(struct inode *inode, int sync, int *nr_to_write)
-{
-       spin_lock(&inode_lock);
-       __writeback_single_inode(inode, sync, nr_to_write);
-       spin_unlock(&inode_lock);
-}
-
  /*
- * Write out a list of dirty inodes.
- *
- * If `sync' is true, wait on writeout of the last mapping which we write.
+ * Write out a superblock's list of dirty inodes.  A wait will be performed
+ * upon no inodes, all inodes or the final one, depending upon sync_mode.
   *
   * If older_than_this is non-NULL, then only write out mappings which
   * had their first dirtying at a time earlier than *older_than_this.
   *
- * Called under inode_lock.
- *
   * If we're a pdlfush thread, then implement pdlfush collision avoidance
   * against the entire list.
+ *
+ * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so
+ * that it can be located for waiting on in __writeback_single_inode().
+ *
+ * Called under inode_lock.
   */
-static void __sync_list(struct list_head *head, int sync_mode,
+static void sync_sb_inodes(struct super_block *sb, int sync_mode,
                 int *nr_to_write, unsigned long *older_than_this)
  {
         struct list_head *tmp;
+       struct list_head *head;
         const unsigned long start = jiffies;    /* livelock avoidance */
  
+       list_splice(&sb->s_dirty, &sb->s_io);
+       INIT_LIST_HEAD(&sb->s_dirty);
+       head = &sb->s_io;
         while ((tmp = head->prev) != head) {
                 struct inode *inode = list_entry(tmp, struct inode, i_list);
                 struct address_space *mapping = inode->i_mapping;
                 struct backing_dev_info *bdi;
-
                 int really_sync;
  
                 /* Was this inode dirtied after __sync_list was called? */
@@ -239,7 +235,7 @@ static void __sync_list(struct list_head *head, int sync_mode,
  
                 if (older_than_this &&
                         time_after(mapping->dirtied_when, *older_than_this))
-                       break;
+                       goto out;
  
                 bdi = mapping->backing_dev_info;
                 if (current_is_pdflush() && !writeback_acquire(bdi))
@@ -248,14 +244,29 @@ static void __sync_list(struct list_head *head, int sync_mode,
                 really_sync = (sync_mode == WB_SYNC_ALL);
                 if ((sync_mode == WB_SYNC_LAST) && (head->prev == head))
                         really_sync = 1;
+
                 __writeback_single_inode(inode, really_sync, nr_to_write);
  
+               if (sync_mode == WB_SYNC_HOLD) {
+                       mapping->dirtied_when = jiffies;
+                       list_del(&inode->i_list);
+                       list_add(&inode->i_list, &inode->i_sb->s_dirty);
+               }
+
                 if (current_is_pdflush())
                         writeback_release(bdi);
  
                 if (nr_to_write && *nr_to_write == 0)
                         break;
         }
+out:
+       if (!list_empty(&sb->s_io)) {
+               /*
+                * Put the rest back, in the correct order.
+                */
+               list_splice(&sb->s_io, sb->s_dirty.prev);
+               INIT_LIST_HEAD(&sb->s_io);
+       }
         return;
  }
  
@@ -277,27 +288,16 @@ static void __sync_list(struct list_head *head, int sync_mode,
  void writeback_unlocked_inodes(int *nr_to_write, int sync_mode,
                                 unsigned long *older_than_this)
  {
-       struct super_block * sb;
-       static unsigned short writeback_gen;
+       struct super_block *sb;
  
         spin_lock(&inode_lock);
         spin_lock(&sb_lock);
-
-       /*
-        * We could get into livelock here if someone is dirtying
-        * inodes fast enough.  writeback_gen is used to avoid that.
-        */
-       writeback_gen++;
-
         sb = sb_entry(super_blocks.prev);
         for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
-               if (sb->s_writeback_gen == writeback_gen)
-                       continue;
-               sb->s_writeback_gen = writeback_gen;
                 if (!list_empty(&sb->s_dirty)) {
                         spin_unlock(&sb_lock);
-                       __sync_list(&sb->s_dirty, sync_mode,
-                                       nr_to_write, older_than_this);
+                       sync_sb_inodes(sb, sync_mode, nr_to_write,
+                                       older_than_this);
                         spin_lock(&sb_lock);
                 }
                 if (nr_to_write && *nr_to_write == 0)
@@ -307,42 +307,6 @@ void writeback_unlocked_inodes(int *nr_to_write, int sync_mode,
         spin_unlock(&inode_lock);
  }
  
-/*
- * Called under inode_lock.
- */
-static int __try_to_writeback_unused_list(struct list_head *head, int nr_inodes)
-{
-       struct list_head *tmp = head;
-       struct inode *inode;
-
-       while (nr_inodes && (tmp = tmp->prev) != head) {
-               inode = list_entry(tmp, struct inode, i_list);
-
-               if (!atomic_read(&inode->i_count)) {
-                       struct backing_dev_info *bdi;
-
-                       bdi = inode->i_mapping->backing_dev_info;
-                       if (current_is_pdflush() && !writeback_acquire(bdi))
-                               goto out;
-
-                       __sync_single_inode(inode, 0, NULL);
-
-                       if (current_is_pdflush())
-                               writeback_release(bdi);
-
-                       nr_inodes--;
-
-                       /* 
-                        * __sync_single_inode moved the inode to another list,
-                        * so we have to start looking from the list head.
-                        */
-                       tmp = head;
-               }
-       }
-out:
-       return nr_inodes;
-}
-
  static void __wait_on_locked(struct list_head *head)
  {
         struct list_head * tmp;
@@ -357,104 +321,95 @@ static void __wait_on_locked(struct list_head *head)
  }
  
  /*
- * writeback and wait upon the filesystem's dirty inodes.
- * We do it in two passes - one to write, and one to wait.
+ * writeback and wait upon the filesystem's dirty inodes.  The caller will
+ * do this in two passes - one to write, and one to wait.  WB_SYNC_HOLD is
+ * used to park the written inodes on sb->s_dirty for the wait pass.
   */
-void sync_inodes_sb(struct super_block *sb)
+void sync_inodes_sb(struct super_block *sb, int wait)
  {
         spin_lock(&inode_lock);
-       while (!list_empty(&sb->s_dirty)||!list_empty(&sb->s_locked_inodes)) {
-               __sync_list(&sb->s_dirty, WB_SYNC_NONE, NULL, NULL);
-               __sync_list(&sb->s_dirty, WB_SYNC_ALL, NULL, NULL);
+       sync_sb_inodes(sb, wait ? WB_SYNC_ALL : WB_SYNC_HOLD, NULL, NULL);
+       if (wait)
                 __wait_on_locked(&sb->s_locked_inodes);
-       }
         spin_unlock(&inode_lock);
  }
  
  /*
- * writeback the dirty inodes for this filesystem
+ * Rather lame livelock avoidance.
   */
-void writeback_inodes_sb(struct super_block *sb)
+static void set_sb_syncing(int val)
  {
-       spin_lock(&inode_lock);
-       while (!list_empty(&sb->s_dirty))
-               __sync_list(&sb->s_dirty, WB_SYNC_NONE, NULL, NULL);
-       spin_unlock(&inode_lock);
+       struct super_block *sb;
+       spin_lock(&sb_lock);
+       sb = sb_entry(super_blocks.prev);
+       for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
+               sb->s_syncing = val;
+       }
+       spin_unlock(&sb_lock);
  }
  
  /*
   * Find a superblock with inodes that need to be synced
   */
-
  static struct super_block *get_super_to_sync(void)
  {
-       struct list_head *p;
+       struct super_block *sb;
  restart:
-       spin_lock(&inode_lock);
         spin_lock(&sb_lock);
-       list_for_each(p, &super_blocks) {
-               struct super_block *s = list_entry(p,struct super_block,s_list);
-               if (list_empty(&s->s_dirty) && list_empty(&s->s_locked_inodes))
+       sb = sb_entry(super_blocks.prev);
+       for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
+               if (sb->s_syncing)
                         continue;
-               s->s_count++;
+               sb->s_syncing = 1;
+               sb->s_count++;
                 spin_unlock(&sb_lock);
-               spin_unlock(&inode_lock);
-               down_read(&s->s_umount);
-               if (!s->s_root) {
-                       drop_super(s);
+               down_read(&sb->s_umount);
+               if (!sb->s_root) {
+                       drop_super(sb);
                         goto restart;
                 }
-               return s;
+               return sb;
         }
         spin_unlock(&sb_lock);
-       spin_unlock(&inode_lock);
         return NULL;
  }
  
  /**
- *     sync_inodes
- *     @dev: device to sync the inodes from.
+ * sync_inodes
   *
- *     sync_inodes goes through the super block's dirty list, 
- *     writes them out, waits on the writeout and puts the inodes
- *     back on the normal list.
- */
-
-void sync_inodes(void)
-{
-       struct super_block * s;
-       /*
-        * Search the super_blocks array for the device(s) to sync.
-        */
-       while ((s = get_super_to_sync()) != NULL) {
-               sync_inodes_sb(s);
-               drop_super(s);
-       }
-}
-
-/*
- * FIXME: the try_to_writeback_unused functions look dreadfully similar to
- * writeback_unlocked_inodes...
+ * sync_inodes() goes through each super block's dirty inode list, writes the
+ * inodes out, waits on the writeout and puts the inodes back on the normal
+ * list.
+ *
+ * This is for sys_sync().  fsync_dev() uses the same algorithm.  The subtle
+ * part of the sync functions is that the blockdev "superblock" is processed
+ * last.  This is because the write_inode() function of a typical fs will
+ * perform no I/O, but will mark buffers in the blockdev mapping as dirty.
+ * What we want to do is to perform all that dirtying first, and then write
+ * back all those inode blocks via the blockdev mapping in one sweep.  So the
+ * additional (somewhat redundant) sync_blockdev() calls here are to make
+ * sure that really happens.  Because if we call sync_inodes_sb(wait=1) with
+ * outstanding dirty inodes, the writeback goes block-at-a-time within the
+ * filesystem's write_inode().  This is extremely slow.
   */
-void try_to_writeback_unused_inodes(unsigned long unused)
+void sync_inodes(int wait)
  {
-       struct super_block * sb;
-       int nr_inodes = inodes_stat.nr_unused;
+       struct super_block *sb;
  
-       spin_lock(&inode_lock);
-       spin_lock(&sb_lock);
-       sb = sb_entry(super_blocks.next);
-       for (; nr_inodes && sb != sb_entry(&super_blocks);
-                       sb = sb_entry(sb->s_list.next)) {
-               if (list_empty(&sb->s_dirty))
-                       continue;
-               spin_unlock(&sb_lock);
-               nr_inodes = __try_to_writeback_unused_list(&sb->s_dirty,
-                                                       nr_inodes);
-               spin_lock(&sb_lock);
+       set_sb_syncing(0);
+       while ((sb = get_super_to_sync()) != NULL) {
+               sync_inodes_sb(sb, 0);
+               sync_blockdev(sb->s_bdev);
+               drop_super(sb);
+       }
+       if (wait) {
+               set_sb_syncing(0);
+               while ((sb = get_super_to_sync()) != NULL) {
+                       sync_inodes_sb(sb, 1);
+                       sync_blockdev(sb->s_bdev);
+                       drop_super(sb);
+               }
         }
-       spin_unlock(&sb_lock);
-       spin_unlock(&inode_lock);
  }
  
  /**
diff --git a/fs/inode.c b/fs/inode.c

index b750b108555be24d0eb4fc763aa122e87ba3dd56..503e500b65846eff05bc81949668e129e8f1d0e5 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -311,6 +311,7 @@ int invalidate_inodes(struct super_block * sb)
         busy = invalidate_list(&inode_in_use, sb, &throw_away);
         busy |= invalidate_list(&inode_unused, sb, &throw_away);
         busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
+       busy |= invalidate_list(&sb->s_io, sb, &throw_away);
         busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away);
         spin_unlock(&inode_lock);
  
@@ -896,6 +897,11 @@ void remove_dquot_ref(struct super_block *sb, short type)
                 if (IS_QUOTAINIT(inode))
                         remove_inode_dquot_ref(inode, type, &tofree_head);
         }
+       list_for_each(act_head, &sb->s_io) {
+               inode = list_entry(act_head, struct inode, i_list);
+               if (IS_QUOTAINIT(inode))
+                       remove_inode_dquot_ref(inode, type, &tofree_head);
+       }
         list_for_each(act_head, &sb->s_locked_inodes) {
                 inode = list_entry(act_head, struct inode, i_list);
                 if (IS_QUOTAINIT(inode))
diff --git a/fs/super.c b/fs/super.c

index 9a1be36c2012fb5fa9ca272be84dc799aa1f13c6..52854d399227c441888c14228c581fe28c8d5b9e 100644 (file)
--- a/fs/super.c
+++ b/fs/super.c
@@ -48,6 +48,7 @@ static struct super_block *alloc_super(void)
         if (s) {
                 memset(s, 0, sizeof(struct super_block));
                 INIT_LIST_HEAD(&s->s_dirty);
+               INIT_LIST_HEAD(&s->s_io);
                 INIT_LIST_HEAD(&s->s_locked_inodes);
                 INIT_LIST_HEAD(&s->s_files);
                 INIT_LIST_HEAD(&s->s_instances);
@@ -154,6 +155,9 @@ static int grab_super(struct super_block *s)
   *
   *     Associates superblock with fs type and puts it on per-type and global
   *     superblocks' lists.  Should be called with sb_lock held; drops it.
+ *
+ *     NOTE: the super_blocks ordering here is important: writeback wants
+ *     the blockdev superblock to be at super_blocks.next.
   */
  static void insert_super(struct super_block *s, struct file_system_type *type)
  {
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h

index 328af2a6c27567db44fdc03a48f0739988060ec5..5560b6ee58780432bd628cf733bd2a4fee3dd56b 100644 (file)
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -29,6 +29,7 @@ enum bh_state_bits {
  struct page;
  struct kiobuf;
  struct buffer_head;
+struct address_space;
  typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
  
  /*
@@ -145,14 +146,19 @@ int try_to_free_buffers(struct page *);
  void create_empty_buffers(struct page *, unsigned long,
                         unsigned long b_state);
  void end_buffer_io_sync(struct buffer_head *bh, int uptodate);
+
+/* Things to do with buffers at mapping->private_list */
  void buffer_insert_list(spinlock_t *lock,
                         struct buffer_head *, struct list_head *);
-int sync_mapping_buffers(struct address_space *mapping);
  void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode);
+int write_mapping_buffers(struct address_space *mapping);
+int inode_has_buffers(struct inode *);
+void invalidate_inode_buffers(struct inode *);
+int fsync_buffers_list(spinlock_t *lock, struct list_head *);
+int sync_mapping_buffers(struct address_space *mapping);
  
  void mark_buffer_async_read(struct buffer_head *bh);
  void mark_buffer_async_write(struct buffer_head *bh);
-void invalidate_inode_buffers(struct inode *);
  void invalidate_bdev(struct block_device *, int);
  void __invalidate_buffers(kdev_t dev, int);
  int sync_blockdev(struct block_device *bdev);
@@ -163,8 +169,6 @@ int fsync_dev(kdev_t);
  int fsync_bdev(struct block_device *);
  int fsync_super(struct super_block *);
  int fsync_no_super(struct block_device *);
-int fsync_buffers_list(spinlock_t *lock, struct list_head *);
-int inode_has_buffers(struct inode *);
  struct buffer_head *__get_hash_table(struct block_device *, sector_t, int);
  struct buffer_head * __getblk(struct block_device *, sector_t, int);
  void __brelse(struct buffer_head *);
diff --git a/include/linux/fs.h b/include/linux/fs.h

index b936413f96f2dfceed5f3e1d02b004cf74db6a05..9b2bfa8cc3d624e3a13c04777ee72e5978761532 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -618,7 +618,6 @@ struct super_block {
         kdev_t                  s_dev;
         unsigned long           s_blocksize;
         unsigned long           s_old_blocksize;
-       unsigned short          s_writeback_gen;/* To avoid writeback livelock */
         unsigned char           s_blocksize_bits;
         unsigned char           s_dirt;
         unsigned long long      s_maxbytes;     /* Max file size */
@@ -632,9 +631,11 @@ struct super_block {
         struct rw_semaphore     s_umount;
         struct semaphore        s_lock;
         int                     s_count;
+       int                     s_syncing;
         atomic_t                s_active;
  
         struct list_head        s_dirty;        /* dirty inodes */
+       struct list_head        s_io;           /* parked for writeback */
         struct list_head        s_locked_inodes;/* inodes being synced */
         struct list_head        s_anon;         /* anonymous dentries for (nfs) exporting */
         struct list_head        s_files;
@@ -1116,7 +1117,6 @@ extern int invalidate_device(kdev_t, int);
  extern void invalidate_inode_pages(struct inode *);
  extern void invalidate_inode_pages2(struct address_space *);
  extern void write_inode_now(struct inode *, int);
-extern void sync_inodes_sb(struct super_block *);
  extern int filemap_fdatawrite(struct address_space *);
  extern int filemap_fdatawait(struct address_space *);
  extern void sync_supers(void);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h

index e345205b6d86c18ee33ce54933afcbf34c37e0cb..9dc03210ee6251cca283db4d3f50c170556adf31 100644 (file)
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -27,15 +27,13 @@ static inline int current_is_pdflush(void)
  #define WB_SYNC_NONE   0       /* Don't wait on anything */
  #define WB_SYNC_LAST   1       /* Wait on the last-written mapping */
  #define WB_SYNC_ALL    2       /* Wait on every mapping */
+#define WB_SYNC_HOLD   3       /* Hold the inode on sb_dirty for sys_sync() */
  
-void try_to_writeback_unused_inodes(unsigned long pexclusive);
-void writeback_single_inode(struct inode *inode,
-                               int sync, int *nr_to_write);
  void writeback_unlocked_inodes(int *nr_to_write, int sync_mode,
                                 unsigned long *older_than_this);
-void writeback_inodes_sb(struct super_block *);
  void __wait_on_inode(struct inode * inode);
-void sync_inodes(void);
+void sync_inodes_sb(struct super_block *, int wait);
+void sync_inodes(int wait);
  
  static inline void wait_on_inode(struct inode *inode)
  {
author	Andrew Morton <akpm@zip.com.au>
	Sun, 19 May 2002 09:22:50 +0000 (02:22 -0700)
committer	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
	Sun, 19 May 2002 09:22:50 +0000 (02:22 -0700)
fs/buffer.c		patch \| blob \| history
fs/ext2/inode.c		patch \| blob \| history
fs/fs-writeback.c		patch \| blob \| history
fs/inode.c		patch \| blob \| history
fs/super.c		patch \| blob \| history
include/linux/buffer_head.h		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
include/linux/writeback.h		patch \| blob \| history