[PATCH] pdflush exclusion infrastructure

author Andrew Morton <akpm@zip.com.au>

Sun, 19 May 2002 09:22:01 +0000 (02:22 -0700)

committer Arnaldo Carvalho de Melo <acme@conectiva.com.br>

Sun, 19 May 2002 09:22:01 +0000 (02:22 -0700)
author Andrew Morton <akpm@zip.com.au>
Sun, 19 May 2002 09:22:01 +0000 (02:22 -0700)
committer Arnaldo Carvalho de Melo <acme@conectiva.com.br>
Sun, 19 May 2002 09:22:01 +0000 (02:22 -0700)
diff --git a/drivers/block/blkpg.c b/drivers/block/blkpg.c

index e8059084b8f06642789eff992fbbb74dc6a466e1..595fa49af3ef7a362fdb44e25e129600d9fbc91e 100644 (file)
--- a/drivers/block/blkpg.c
+++ b/drivers/block/blkpg.c
@@ -35,6 +35,7 @@
  #include <linux/blkpg.h>
  #include <linux/genhd.h>
  #include <linux/module.h>               /* for EXPORT_SYMBOL */
+#include <linux/backing-dev.h>
  
  #include <asm/uaccess.h>
  
@@ -219,7 +220,7 @@ int blk_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg)
         unsigned short usval;
         kdev_t dev = to_kdev_t(bdev->bd_dev);
         int holder;
-       unsigned long *ra_pages;
+       struct backing_dev_info *bdi;
  
         intval = block_ioctl(bdev, cmd, arg);
         if (intval != -ENOTTY)
@@ -241,20 +242,20 @@ int blk_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg)
                 case BLKFRASET:
                         if(!capable(CAP_SYS_ADMIN))
                                 return -EACCES;
-                       ra_pages = blk_get_ra_pages(bdev);
-                       if (ra_pages == NULL)
+                       bdi = blk_get_backing_dev_info(bdev);
+                       if (bdi == NULL)
                                 return -ENOTTY;
-                       *ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
+                       bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
                         return 0;
  
                 case BLKRAGET:
                 case BLKFRAGET:
                         if (!arg)
                                 return -EINVAL;
-                       ra_pages = blk_get_ra_pages(bdev);
-                       if (ra_pages == NULL)
+                       bdi = blk_get_backing_dev_info(bdev);
+                       if (bdi == NULL)
                                 return -ENOTTY;
-                       return put_user((*ra_pages * PAGE_CACHE_SIZE) / 512,
+                       return put_user((bdi->ra_pages * PAGE_CACHE_SIZE) / 512,
                                                 (long *)arg);
  
                 case BLKSECTGET:
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c

index 5430dea71325b32028838bc606a58d7d0bfb3660..51fd5be009956dcf7c17ff342b7f8cceb65c6051 100644 (file)
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -27,6 +27,7 @@
  #include <linux/completion.h>
  #include <linux/compiler.h>
  #include <scsi/scsi.h>
+#include <linux/backing-dev.h>
  
  #include <asm/system.h>
  #include <asm/io.h>
@@ -100,21 +101,21 @@ inline request_queue_t *blk_get_queue(kdev_t dev)
  }
  
  /**
- * blk_get_ra_pages - get the address of a queue's readahead tunable
+ * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
   * @dev:       device
   *
   * Locates the passed device's request queue and returns the address of its
- * readahead setting.
+ * backing_dev_info
   *
   * Will return NULL if the request queue cannot be located.
   */
-unsigned long *blk_get_ra_pages(struct block_device *bdev)
+struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
  {
-       unsigned long *ret = NULL;
+       struct backing_dev_info *ret = NULL;
         request_queue_t *q = blk_get_queue(to_kdev_t(bdev->bd_dev));
  
         if (q)
-               ret = &q->ra_pages;
+               ret = &q->backing_dev_info;
         return ret;
  }
  
@@ -153,7 +154,8 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
         q->max_phys_segments = MAX_PHYS_SEGMENTS;
         q->max_hw_segments = MAX_HW_SEGMENTS;
         q->make_request_fn = mfn;
-       q->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+       q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+       q->backing_dev_info.state = 0;
         blk_queue_max_sectors(q, MAX_SECTORS);
         blk_queue_hardsect_size(q, 512);
  
diff --git a/fs/block_dev.c b/fs/block_dev.c

index f9326d65a756854f847099f26ea8cb656e29a0d3..76c5e5cf05550537424a3aed91631c115f931367 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -331,7 +331,7 @@ struct block_device *bdget(dev_t dev)
                         inode->i_bdev = new_bdev;
                         inode->i_data.a_ops = &def_blk_aops;
                         inode->i_data.gfp_mask = GFP_USER;
-                       inode->i_data.ra_pages = &default_ra_pages;
+                       inode->i_data.backing_dev_info = &default_backing_dev_info;
                         spin_lock(&bdev_lock);
                         bdev = bdfind(dev, head);
                         if (!bdev) {
@@ -594,11 +594,12 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
                         }
                 }
         }
-       if (bdev->bd_inode->i_data.ra_pages == &default_ra_pages) {
-               unsigned long *ra_pages = blk_get_ra_pages(bdev);
-               if (ra_pages == NULL)
-                       ra_pages = &default_ra_pages;
-               inode->i_data.ra_pages = ra_pages;
+       if (bdev->bd_inode->i_data.backing_dev_info ==
+                               &default_backing_dev_info) {
+               struct backing_dev_info *bdi = blk_get_backing_dev_info(bdev);
+               if (bdi == NULL)
+                       bdi = &default_backing_dev_info;
+               inode->i_data.backing_dev_info = bdi;
         }
         if (bdev->bd_op->open) {
                 ret = bdev->bd_op->open(inode, file);
@@ -624,7 +625,7 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file *
  out2:
         if (!bdev->bd_openers) {
                 bdev->bd_op = NULL;
-               bdev->bd_inode->i_data.ra_pages = &default_ra_pages;
+               bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
                 if (bdev != bdev->bd_contains) {
                         blkdev_put(bdev->bd_contains, BDEV_RAW);
                         bdev->bd_contains = NULL;
@@ -698,7 +699,7 @@ int blkdev_put(struct block_device *bdev, int kind)
                 __MOD_DEC_USE_COUNT(bdev->bd_op->owner);
         if (!bdev->bd_openers) {
                 bdev->bd_op = NULL;
-               bdev->bd_inode->i_data.ra_pages = &default_ra_pages;
+               bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
                 if (bdev != bdev->bd_contains) {
                         blkdev_put(bdev->bd_contains, BDEV_RAW);
                         bdev->bd_contains = NULL;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c

index d1880be27437227859e28be43df8ce6d25aec169..139283a310a66e8eefac17fab2d7d88ad794e6cd 100644 (file)
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -19,6 +19,7 @@
  #include <linux/fs.h>
  #include <linux/mm.h>
  #include <linux/writeback.h>
+#include <linux/backing-dev.h>
  
  /**
   *     __mark_inode_dirty -    internal function
@@ -508,3 +509,40 @@ int generic_osync_inode(struct inode *inode, int what)
  
         return err;
  }
+
+/**
+ * writeback_acquire: attempt to get exclusive writeback access to a device
+ * @bdi: the device's backing_dev_info structure
+ *
+ * It is a waste of resources to have more than one pdflush thread blocked on
+ * a single request queue.  Exclusion at the request_queue level is obtained
+ * via a flag in the request_queue's backing_dev_info.state.
+ *
+ * Non-request_queue-backed address_spaces will share default_backing_dev_info,
+ * unless they implement their own.  Which is somewhat inefficient, as this
+ * may prevent concurrent writeback against multiple devices.
+ */
+int writeback_acquire(struct backing_dev_info *bdi)
+{
+       return !test_and_set_bit(BDI_pdflush, &bdi->state);
+}
+
+/**
+ * writeback_in_progress: determine whether there is writeback in progress
+ *                        against a backing device.
+ * @bdi: the device's backing_dev_info structure.
+ */
+int writeback_in_progress(struct backing_dev_info *bdi)
+{
+       return test_bit(BDI_pdflush, &bdi->state);
+}
+
+/**
+ * writeback_release: relinquish exclusive writeback access against a device.
+ * @bdi: the device's backing_dev_info structure
+ */
+void writeback_release(struct backing_dev_info *bdi)
+{
+       BUG_ON(!writeback_in_progress(bdi));
+       clear_bit(BDI_pdflush, &bdi->state);
+}
diff --git a/fs/inode.c b/fs/inode.c

index fc748da51c0c896aa4d551a46923be87b226a12d..1c1256a5f799cd913911d0807feb7b90259beca2 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -12,6 +12,7 @@
  #include <linux/quotaops.h>
  #include <linux/slab.h>
  #include <linux/writeback.h>
+#include <linux/backing-dev.h>
  
  /*
   * New inode.c implementation.
@@ -83,6 +84,8 @@ static struct inode *alloc_inode(struct super_block *sb)
                 inode = (struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL);
  
         if (inode) {
+               struct address_space * const mapping = &inode->i_data;
+
                 inode->i_sb = sb;
                 inode->i_dev = sb->s_dev;
                 inode->i_blkbits = sb->s_blocksize_bits;
@@ -100,16 +103,17 @@ static struct inode *alloc_inode(struct super_block *sb)
                 inode->i_pipe = NULL;
                 inode->i_bdev = NULL;
                 inode->i_cdev = NULL;
-               inode->i_data.a_ops = &empty_aops;
-               inode->i_data.host = inode;
-               inode->i_data.gfp_mask = GFP_HIGHUSER;
-               inode->i_data.dirtied_when = 0;
-               inode->i_mapping = &inode->i_data;
-               inode->i_data.ra_pages = &default_ra_pages;
-               inode->i_data.assoc_mapping = NULL;
+
+               mapping->a_ops = &empty_aops;
+               mapping->host = inode;
+               mapping->gfp_mask = GFP_HIGHUSER;
+               mapping->dirtied_when = 0;
+               mapping->assoc_mapping = NULL;
+               mapping->backing_dev_info = &default_backing_dev_info;
                 if (sb->s_bdev)
-                       inode->i_data.ra_pages = sb->s_bdev->bd_inode->i_mapping->ra_pages;
+                       inode->i_data.backing_dev_info = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
                 memset(&inode->u, 0, sizeof(inode->u));
+               inode->i_mapping = mapping;
         }
         return inode;
  }
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c

index b51def4b641cd1325d9987c5c4787f2650b78d3c..546eb46bb51a131a6e4cd3ce1e63b60e7f1c9ab6 100644 (file)
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -26,6 +26,7 @@
  #include <linux/locks.h>
  #include <linux/spinlock.h>
  #include <linux/blkdev.h>      /* For bdev_hardsect_size(). */
+#include <linux/backing-dev.h>
  
  #include "ntfs.h"
  #include "sysctl.h"
@@ -1519,8 +1520,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
         vol->mftbmp_mapping.assoc_mapping = NULL;
         vol->mftbmp_mapping.dirtied_when = 0;
         vol->mftbmp_mapping.gfp_mask = GFP_HIGHUSER;
-       vol->mftbmp_mapping.ra_pages =
-                       sb->s_bdev->bd_inode->i_mapping->ra_pages;
+       vol->mftbmp_mapping.backing_dev_info =
+                       sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
  
         /*
          * Default is group and other don't have any access to files or
diff --git a/fs/open.c b/fs/open.c

index e0231b191336932d17928ec586683d682268c556..2ef917feadb73bdd4a140c9f035d441fece41f7c 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -15,6 +15,7 @@
  #include <linux/slab.h>
  #include <linux/tty.h>
  #include <linux/iobuf.h>
+#include <linux/backing-dev.h>
  
  #include <asm/uaccess.h>
  
@@ -632,7 +633,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
                         goto cleanup_file;
         }
  
-       f->f_ra.ra_pages = *inode->i_mapping->ra_pages;
+       f->f_ra.ra_pages = inode->i_mapping->backing_dev_info->ra_pages;
         f->f_dentry = dentry;
         f->f_vfsmnt = mnt;
         f->f_pos = 0;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h

new file mode 100644 (file)

index 0000000..075cacc
--- /dev/null
+++ b/include/linux/backing-dev.h
@@ -0,0 +1,30 @@
+/*
+ * include/linux/backing-dev.h
+ *
+ * low-level device information and state which is propagated up through
+ * to high-level code.
+ */
+
+#ifndef _LINUX_BACKING_DEV_H
+#define _LINUX_BACKING_DEV_H
+
+/*
+ * Bits in backing_dev_info.state
+ */
+enum bdi_state {
+       BDI_pdflush,            /* A pdflush thread is working this device */
+       BDI_unused,             /* Available bits start here */
+};
+
+struct backing_dev_info {
+       unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */
+       unsigned long state;    /* Always use atomic bitops on this */
+};
+
+extern struct backing_dev_info default_backing_dev_info;
+
+int writeback_acquire(struct backing_dev_info *bdi);
+int writeback_in_progress(struct backing_dev_info *bdi);
+void writeback_release(struct backing_dev_info *bdi);
+
+#endif         /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index d8175ccc104ce6f36628f30b8d72ffc1437ff0e2..ac373e6a2454b888d21142b08cf65c1bc262d914 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -7,6 +7,7 @@
  #include <linux/tqueue.h>
  #include <linux/list.h>
  #include <linux/pagemap.h>
+#include <linux/backing-dev.h>
  
  #include <asm/scatterlist.h>
  
@@ -162,11 +163,7 @@ struct request_queue
         make_request_fn         *make_request_fn;
         prep_rq_fn              *prep_rq_fn;
  
-       /*
-        * The VM-level readahead tunable for this device.  In
-        * units of PAGE_CACHE_SIZE pages.
-        */
-       unsigned long ra_pages;
+       struct backing_dev_info backing_dev_info;
  
         /*
          * The queue owner gets to use this for whatever they like.
@@ -328,7 +325,7 @@ extern void blk_queue_hardsect_size(request_queue_t *q, unsigned short);
  extern void blk_queue_segment_boundary(request_queue_t *q, unsigned long);
  extern void blk_queue_assign_lock(request_queue_t *q, spinlock_t *);
  extern void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn);
-extern unsigned long *blk_get_ra_pages(struct block_device *bdev);
+extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
  
  extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *);
  extern void blk_dump_rq_flags(struct request *, char *);
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 25578c7a5e6232ca564e0b292cfe4ddf05ed1f5d..374045884cb87a0045bde066d8632e0b6e93f943 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -305,6 +305,7 @@ struct address_space_operations {
         int (*direct_IO)(int, struct inode *, struct kiobuf *, unsigned long, int);
  };
  
+struct backing_dev_info;
  struct address_space {
         struct inode            *host;          /* owner: inode, block_device */
         struct radix_tree_root  page_tree;      /* radix tree of all pages */
@@ -320,7 +321,7 @@ struct address_space {
         spinlock_t              i_shared_lock;  /* and spinlock protecting it */
         unsigned long           dirtied_when;   /* jiffies of first page dirtying */
         int                     gfp_mask;       /* how to allocate the pages */
-       unsigned long           *ra_pages;      /* device readahead */
+       struct backing_dev_info *backing_dev_info; /* device readahead, etc */
         spinlock_t              private_lock;   /* for use by the address_space */
         struct list_head        private_list;   /* ditto */
         struct address_space    *assoc_mapping; /* ditto */
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 2f0b56f0183be2506e0d47c7f8412ce387b88b0d..451cdff1ec1628b87e7408a68de0d6197c3af2e4 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -454,7 +454,6 @@ void do_page_cache_readahead(struct file *file,
  void page_cache_readahead(struct file *file, unsigned long offset);
  void page_cache_readaround(struct file *file, unsigned long offset);
  void handle_ra_thrashing(struct file *file);
-extern unsigned long default_ra_pages;
  
  /* vma is the first one with  address < vma->vm_end,
   * and even  address < vma->vm_start. Have to extend vma. */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c

index 661f1860880ce9da6fec6bf1d28937a8df16fa21..e2c65e1057dfe861728ef5c5c7d929be90e2cf3c 100644 (file)
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -166,6 +166,7 @@ int pdflush_flush(unsigned long nr_pages)
   * to perform their I/O against a large file.
   */
  static int wb_writeback_jifs = 5 * HZ;
+static struct timer_list wb_timer;
  
  /*
   * Periodic writeback of "old" data.
@@ -206,16 +207,11 @@ static void wb_kupdate(unsigned long arg)
                 yield();
         }
         run_task_queue(&tq_disk);
+       mod_timer(&wb_timer, jiffies + wb_writeback_jifs);
  }
  
-/*
- * The writeback timer, for kupdate-style functionality
- */
-static struct timer_list wb_timer;
-
  static void wb_timer_fn(unsigned long unused)
  {
-       mod_timer(&wb_timer, jiffies + wb_writeback_jifs);
         pdflush_operation(wb_kupdate, 0);
  }
  
diff --git a/mm/pdflush.c b/mm/pdflush.c

index 07ceb439e9aef43fe7ab482ace5452424c79ff42..5e7d0125c39d0bfed9f945af155bf6c579343a93 100644 (file)
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -103,6 +103,7 @@ static int __pdflush(struct pdflush_work *my_work)
         preempt_disable();
         spin_lock_irq(&pdflush_lock);
         nr_pdflush_threads++;
+//     printk("pdflush %d [%d] starts\n", nr_pdflush_threads, current->pid);
         for ( ; ; ) {
                 struct pdflush_work *pdf;
  
@@ -124,7 +125,7 @@ static int __pdflush(struct pdflush_work *my_work)
                 if (jiffies - last_empty_jifs > 1 * HZ) {
                         /* unlocked list_empty() test is OK here */
                         if (list_empty(&pdflush_list)) {
-                               /* unlocked nr_pdflush_threads test is OK here */
+                               /* unlocked test is OK here */
                                 if (nr_pdflush_threads < MAX_PDFLUSH_THREADS)
                                         start_one_pdflush_thread();
                         }
@@ -147,6 +148,7 @@ static int __pdflush(struct pdflush_work *my_work)
                 }
         }
         nr_pdflush_threads--;
+//     printk("pdflush %d [%d] ends\n", nr_pdflush_threads, current->pid);
         spin_unlock_irq(&pdflush_lock);
         preempt_enable();
         return 0;
diff --git a/mm/readahead.c b/mm/readahead.c

index b59f8f4c57bcee02059ea4a7c33a67a79f8cf2d8..03fd19c23bbb3d6ec7fa6ba21bf28c401c7e75a2 100644 (file)
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -11,8 +11,12 @@
  #include <linux/fs.h>
  #include <linux/mm.h>
  #include <linux/blkdev.h>
+#include <linux/backing-dev.h>
  
-unsigned long default_ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+struct backing_dev_info default_backing_dev_info = {
+       ra_pages:       (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE,
+       state:          0,
+};
  
  /*
   * Return max readahead size for this inode in number-of-pages.
author	Andrew Morton <akpm@zip.com.au>
	Sun, 19 May 2002 09:22:01 +0000 (02:22 -0700)
committer	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
	Sun, 19 May 2002 09:22:01 +0000 (02:22 -0700)
drivers/block/blkpg.c		patch \| blob \| history
drivers/block/ll_rw_blk.c		patch \| blob \| history
fs/block_dev.c		patch \| blob \| history
fs/fs-writeback.c		patch \| blob \| history
fs/inode.c		patch \| blob \| history
fs/ntfs/super.c		patch \| blob \| history
fs/open.c		patch \| blob \| history
include/linux/backing-dev.h	[new file with mode: 0644]	patch \| blob
include/linux/blkdev.h		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
include/linux/mm.h		patch \| blob \| history
mm/page-writeback.c		patch \| blob \| history
mm/pdflush.c		patch \| blob \| history
mm/readahead.c		patch \| blob \| history