[PATCH] pdflush exclusion

author Andrew Morton <akpm@zip.com.au>

Sun, 19 May 2002 09:22:12 +0000 (02:22 -0700)

committer Arnaldo Carvalho de Melo <acme@conectiva.com.br>

Sun, 19 May 2002 09:22:12 +0000 (02:22 -0700)
author Andrew Morton <akpm@zip.com.au>
Sun, 19 May 2002 09:22:12 +0000 (02:22 -0700)
committer Arnaldo Carvalho de Melo <acme@conectiva.com.br>
Sun, 19 May 2002 09:22:12 +0000 (02:22 -0700)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c

index 139283a310a66e8eefac17fab2d7d88ad794e6cd..b2d84f68c3da88b77923ff72bc373760263eefeb 100644 (file)
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -187,6 +187,9 @@ static void __sync_single_inode(struct inode *inode, int wait, int *nr_to_write)
  static void
  __writeback_single_inode(struct inode *inode, int sync, int *nr_to_write)
  {
+       if (current_is_pdflush() && (inode->i_state & I_LOCK))
+               return;
+
         while (inode->i_state & I_LOCK) {
                 __iget(inode);
                 spin_unlock(&inode_lock);
@@ -213,6 +216,9 @@ void writeback_single_inode(struct inode *inode, int sync, int *nr_to_write)
   * had their first dirtying at a time earlier than *older_than_this.
   *
   * Called under inode_lock.
+ *
+ * If we're a pdlfush thread, then implement pdlfush collision avoidance
+ * against the entire list.
   */
  static void __sync_list(struct list_head *head, int sync_mode,
                 int *nr_to_write, unsigned long *older_than_this)
@@ -223,6 +229,8 @@ static void __sync_list(struct list_head *head, int sync_mode,
         while ((tmp = head->prev) != head) {
                 struct inode *inode = list_entry(tmp, struct inode, i_list);
                 struct address_space *mapping = inode->i_mapping;
+               struct backing_dev_info *bdi;
+
                 int really_sync;
  
                 /* Was this inode dirtied after __sync_list was called? */
@@ -233,10 +241,18 @@ static void __sync_list(struct list_head *head, int sync_mode,
                         time_after(mapping->dirtied_when, *older_than_this))
                         break;
  
+               bdi = mapping->backing_dev_info;
+               if (current_is_pdflush() && !writeback_acquire(bdi))
+                       break;
+
                 really_sync = (sync_mode == WB_SYNC_ALL);
                 if ((sync_mode == WB_SYNC_LAST) && (head->prev == head))
                         really_sync = 1;
                 __writeback_single_inode(inode, really_sync, nr_to_write);
+
+               if (current_is_pdflush())
+                       writeback_release(bdi);
+
                 if (nr_to_write && *nr_to_write == 0)
                         break;
         }
@@ -255,6 +271,8 @@ static void __sync_list(struct list_head *head, int sync_mode,
   *
   * If `older_than_this' is non-zero then only flush inodes which have a
   * flushtime older than *older_than_this.
+ *
+ * This is a "memory cleansing" operation, not a "data integrity" operation.
   */
  void writeback_unlocked_inodes(int *nr_to_write, int sync_mode,
                                 unsigned long *older_than_this)
@@ -276,29 +294,12 @@ void writeback_unlocked_inodes(int *nr_to_write, int sync_mode,
                 if (sb->s_writeback_gen == writeback_gen)
                         continue;
                 sb->s_writeback_gen = writeback_gen;
-
-               if (current->flags & PF_FLUSHER) {
-                       if (sb->s_flags & MS_FLUSHING) {
-                               /*
-                                * There's no point in two pdflush threads
-                                * flushing the same device.  But for other
-                                * callers, we want to perform the flush
-                                * because the fdatasync is how we implement
-                                * writer throttling.
-                                */
-                               continue;
-                       }
-                       sb->s_flags |= MS_FLUSHING;
-               }
-
                 if (!list_empty(&sb->s_dirty)) {
                         spin_unlock(&sb_lock);
                         __sync_list(&sb->s_dirty, sync_mode,
                                         nr_to_write, older_than_this);
                         spin_lock(&sb_lock);
                 }
-               if (current->flags & PF_FLUSHER)
-                       sb->s_flags &= ~MS_FLUSHING;
                 if (nr_to_write && *nr_to_write == 0)
                         break;
         }
@@ -307,7 +308,7 @@ void writeback_unlocked_inodes(int *nr_to_write, int sync_mode,
  }
  
  /*
- * Called under inode_lock
+ * Called under inode_lock.
   */
  static int __try_to_writeback_unused_list(struct list_head *head, int nr_inodes)
  {
@@ -318,7 +319,17 @@ static int __try_to_writeback_unused_list(struct list_head *head, int nr_inodes)
                 inode = list_entry(tmp, struct inode, i_list);
  
                 if (!atomic_read(&inode->i_count)) {
+                       struct backing_dev_info *bdi;
+
+                       bdi = inode->i_mapping->backing_dev_info;
+                       if (current_is_pdflush() && !writeback_acquire(bdi))
+                               goto out;
+
                         __sync_single_inode(inode, 0, NULL);
+
+                       if (current_is_pdflush())
+                               writeback_release(bdi);
+
                         nr_inodes--;
  
                         /* 
@@ -328,7 +339,7 @@ static int __try_to_writeback_unused_list(struct list_head *head, int nr_inodes)
                         tmp = head;
                 }
         }
-
+out:
         return nr_inodes;
  }
  
@@ -421,7 +432,11 @@ void sync_inodes(void)
         }
  }
  
-void try_to_writeback_unused_inodes(unsigned long pexclusive)
+/*
+ * FIXME: the try_to_writeback_unused functions look dreadfully similar to
+ * writeback_unlocked_inodes...
+ */
+void try_to_writeback_unused_inodes(unsigned long unused)
  {
         struct super_block * sb;
         int nr_inodes = inodes_stat.nr_unused;
@@ -440,7 +455,6 @@ void try_to_writeback_unused_inodes(unsigned long pexclusive)
         }
         spin_unlock(&sb_lock);
         spin_unlock(&inode_lock);
-       clear_bit(0, (unsigned long *)pexclusive);
  }
  
  /**
diff --git a/fs/inode.c b/fs/inode.c

index 1c1256a5f799cd913911d0807feb7b90259beca2..68c1ee16125211c5bd0c44919e59a1d8f897879f 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -404,21 +404,14 @@ void prune_icache(int goal)
         dispose_list(freeable);
  
         /* 
-        * If we didn't freed enough clean inodes schedule
-        * a sync of the dirty inodes, we cannot do it
-        * from here or we're either synchronously dogslow
-        * or we deadlock with oom.
+        * If we didn't free enough clean inodes then schedule writeback of
+        * the dirty inodes.  We cannot do it from here or we're either
+        * synchronously dogslow or we deadlock with oom.
          */
-       if (goal) {
-               static unsigned long exclusive;
-
-               if (!test_and_set_bit(0, &exclusive)) {
-                       if (pdflush_operation(try_to_writeback_unused_inodes,
-                                               (unsigned long)&exclusive))
-                               clear_bit(0, &exclusive);
-               }
-       }
+       if (goal)
+               pdflush_operation(try_to_writeback_unused_inodes, 0);
  }
+
  /*
   * This is called from kswapd when we think we need some
   * more memory, but aren't really sure how much. So we
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 374045884cb87a0045bde066d8632e0b6e93f943..b936413f96f2dfceed5f3e1d02b004cf74db6a05 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -112,7 +112,6 @@ extern int leases_enable, dir_notify_enable, lease_break_time;
  #define MS_MOVE                8192
  #define MS_REC         16384
  #define MS_VERBOSE     32768
-#define MS_FLUSHING    (1<<16) /* inodes are currently under writeout */
  #define MS_ACTIVE      (1<<30)
  #define MS_NOUSER      (1<<31)
  
@@ -156,7 +155,6 @@ extern int leases_enable, dir_notify_enable, lease_break_time;
  #define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY)
  #define IS_SYNC(inode)         (__IS_FLG(inode, MS_SYNCHRONOUS) || ((inode)->i_flags & S_SYNC))
  #define IS_MANDLOCK(inode)     __IS_FLG(inode, MS_MANDLOCK)
-#define IS_FLUSHING(inode)     __IS_FLG(inode, MS_FLUSHING)
  
  #define IS_QUOTAINIT(inode)    ((inode)->i_flags & S_QUOTA)
  #define IS_NOQUOTA(inode)      ((inode)->i_flags & S_NOQUOTA)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h

index 1978e06d11319791d8a5f1e1307a709665761ff2..a089dd009fc15479adfe5af7ef46cb768795e6be 100644 (file)
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -12,6 +12,15 @@ extern spinlock_t inode_lock;
  extern struct list_head inode_in_use;
  extern struct list_head inode_unused;
  
+/*
+ * Yes, writeback.h requires sched.h
+ * No, sched.h is not included from here.
+ */
+static inline int current_is_pdflush(void)
+{
+       return current->flags & PF_FLUSHER;
+}
+
  /*
   * fs/fs-writeback.c
   */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c

index e2c65e1057dfe861728ef5c5c7d929be90e2cf3c..defc6988a30515913ddbf33ae54a34763c133630 100644 (file)
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -20,6 +20,7 @@
  #include <linux/writeback.h>
  #include <linux/init.h>
  #include <linux/sysrq.h>
+#include <linux/backing-dev.h>
  
  /*
   * Memory thresholds, in percentages
@@ -86,10 +87,7 @@ void balance_dirty_pages(struct address_space *mapping)
                 wake_pdflush = 1;
         }
  
-       if (wake_pdflush && !IS_FLUSHING(mapping->host)) {
-               /*
-                * There is no flush thread against this device. Start one now.
-                */
+       if (wake_pdflush && !writeback_in_progress(mapping->backing_dev_info)) {
                 if (dirty_and_writeback > async_thresh) {
                         pdflush_flush(dirty_and_writeback - async_thresh);
                         yield();
author	Andrew Morton <akpm@zip.com.au>
	Sun, 19 May 2002 09:22:12 +0000 (02:22 -0700)
committer	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
	Sun, 19 May 2002 09:22:12 +0000 (02:22 -0700)
fs/fs-writeback.c		patch \| blob \| history
fs/inode.c		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
include/linux/writeback.h		patch \| blob \| history
mm/page-writeback.c		patch \| blob \| history