]> git.hungrycats.org Git - linux/commitdiff
v2.4.10 -> v2.4.10.0.1
authorLinus Torvalds <torvalds@athlon.transmeta.com>
Tue, 5 Feb 2002 04:19:52 +0000 (20:19 -0800)
committerLinus Torvalds <torvalds@athlon.transmeta.com>
Tue, 5 Feb 2002 04:19:52 +0000 (20:19 -0800)
  - me/al/andrea: buffers-in-pagecache coherency, buffer.c cleanups

fs/block_dev.c
fs/buffer.c
fs/ext2/inode.c
include/linux/blkdev.h
include/linux/fs.h
include/linux/pagemap.h
include/linux/swap.h
kernel/ksyms.c
mm/filemap.c
mm/memory.c

index 871666e8d236fb4492c454d92810c74675989685..b731a11a09a76c03c27e22963e74e5a115d594c9 100644 (file)
 
 #include <asm/uaccess.h>
 
-static inline int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh_result)
-{
-       int err;
+#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
 
-       err = -EIO;
-       if (iblock >= buffered_blk_size(inode->i_rdev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS))
-               goto out;
+static inline unsigned int blksize_bits(unsigned int size)
+{
+       unsigned int bits = 8;
+       do {
+               bits++;
+               size >>= 1;
+       } while (size > 256);
+       return bits;
+}
 
-       bh_result->b_blocknr = iblock;
-       bh_result->b_state |= 1UL << BH_Mapped;
-       err = 0;
+static inline unsigned int block_size(kdev_t dev)
+{
+       int retval = BLOCK_SIZE;
+       int major = MAJOR(dev);
 
- out:
-       return err;
+       if (blksize_size[major]) {
+               int minor = MINOR(dev);
+               if (blksize_size[major][minor])
+                       retval = blksize_size[major][minor];
+       }
+       return retval;
 }
 
-static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
+static unsigned int max_block(kdev_t dev)
 {
-       int i, nr_blocks, retval, dev = inode->i_rdev;
-       unsigned long * blocks = iobuf->blocks;
+       unsigned int retval = ~0U;
+       int major = MAJOR(dev);
 
-       if (blocksize != BUFFERED_BLOCKSIZE)
-               BUG();
+       if (blk_size[major]) {
+               int minor = MINOR(dev);
+               unsigned int blocks = blk_size[major][minor];
+               if (blocks) {
+                       unsigned int size = block_size(dev);
+                       unsigned int sizebits = blksize_bits(size);
+                       blocks += (size-1) >> BLOCK_SIZE_BITS;
+                       retval = blocks << (BLOCK_SIZE_BITS - sizebits);
+                       if (sizebits > BLOCK_SIZE_BITS)
+                               retval = blocks >> (sizebits - BLOCK_SIZE_BITS);
+               }
+       }
+       return retval;
+}
 
-       nr_blocks = iobuf->length >> BUFFERED_BLOCKSIZE_BITS;
-       /* build the blocklist */
-       for (i = 0; i < nr_blocks; i++, blocknr++) {
-               struct buffer_head bh;
 
-               retval = blkdev_get_block(inode, blocknr, &bh);
-               if (retval)
-                       goto out;
+static inline int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh_result)
+{
+       int err;
 
-               blocks[i] = bh.b_blocknr;
-       }
+       err = -EIO;
+       if (iblock >= max_block(inode->i_rdev))
+               goto out;
 
-       retval = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, blocksize);
+       bh_result->b_blocknr = iblock;
+       bh_result->b_state |= 1UL << BH_Mapped;
+       err = 0;
 
  out:
-       return retval;
+       return err;
 }
 
 static int blkdev_writepage(struct page * page)
 {
        int err, i;
+       unsigned int blocksize;
        unsigned long block;
        struct buffer_head *bh, *head;
        struct inode *inode = page->mapping->host;
 
        if (!PageLocked(page))
                BUG();
+       blocksize = block_size(inode->i_rdev);  
 
        if (!page->buffers)
-               create_empty_buffers(page, inode->i_rdev, BUFFERED_BLOCKSIZE);
+               create_empty_buffers(page, inode->i_rdev, blocksize);
        head = page->buffers;
 
-       block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
+       block = page->index << (PAGE_CACHE_SHIFT - blksize_bits(blocksize));
 
        bh = head;
        i = 0;
@@ -132,19 +154,21 @@ static int blkdev_readpage(struct file * file, struct page * page)
        struct inode *inode = page->mapping->host;
        kdev_t dev = inode->i_rdev;
        unsigned long iblock, lblock;
-       struct buffer_head *bh, *head, *arr[1 << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS)];
-       unsigned int blocks;
+       struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
+       unsigned int blocks, blocksize, blocksize_bits;
        int nr, i;
 
        if (!PageLocked(page))
                PAGE_BUG(page);
+       blocksize = block_size(dev);
+       blocksize_bits = blksize_bits(blocksize);
        if (!page->buffers)
-               create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE);
+               create_empty_buffers(page, dev, blocksize);
        head = page->buffers;
 
-       blocks = PAGE_CACHE_SIZE >> BUFFERED_BLOCKSIZE_BITS;
-       iblock = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
-       lblock = buffered_blk_size(dev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS);
+       blocks = PAGE_CACHE_SIZE >> blocksize_bits;
+       iblock = page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
+       lblock = max_block(dev);
        bh = head;
        nr = 0;
        i = 0;
@@ -159,7 +183,7 @@ static int blkdev_readpage(struct file * file, struct page * page)
                                        continue;
                        }
                        if (!buffer_mapped(bh)) {
-                               memset(kmap(page) + i * BUFFERED_BLOCKSIZE, 0, BUFFERED_BLOCKSIZE);
+                               memset(kmap(page) + i * blocksize, 0, blocksize);
                                flush_dcache_page(page);
                                kunmap(page);
                                set_bit(BH_Uptodate, &bh->b_state);
@@ -206,19 +230,21 @@ static int __blkdev_prepare_write(struct inode *inode, struct page *page,
        unsigned long block;
        int err = 0;
        struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
-       kmap(page);
+       unsigned int blocksize, blocksize_bits;
 
+       blocksize = block_size(dev);
+       blocksize_bits = blksize_bits(blocksize);
        if (!page->buffers)
-               create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE);
+               create_empty_buffers(page, dev, blocksize);
        head = page->buffers;
 
-       block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
+       block = page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
 
        for(bh = head, block_start = 0; bh != head || !block_start;
            block++, block_start=block_end, bh = bh->b_this_page) {
                if (!bh)
                        BUG();
-               block_end = block_start + BUFFERED_BLOCKSIZE;
+               block_end = block_start + blocksize;
                if (block_end <= from)
                        continue;
                if (block_start >= to)
@@ -258,7 +284,6 @@ static int blkdev_prepare_write(struct file *file, struct page *page, unsigned f
        int err = __blkdev_prepare_write(inode, page, from, to);
        if (err) {
                ClearPageUptodate(page);
-               kunmap(page);
        }
        return err;
 }
@@ -269,11 +294,13 @@ static int __blkdev_commit_write(struct inode *inode, struct page *page,
        unsigned block_start, block_end;
        int partial = 0, need_balance_dirty = 0;
        struct buffer_head *bh, *head;
+       unsigned int blocksize;
 
+       blocksize = block_size(inode->i_rdev);
        for(bh = head = page->buffers, block_start = 0;
            bh != head || !block_start;
            block_start=block_end, bh = bh->b_this_page) {
-               block_end = block_start + BUFFERED_BLOCKSIZE;
+               block_end = block_start + blocksize;
                if (block_end <= from || block_start >= to) {
                        if (!buffer_uptodate(bh))
                                partial = 1;
@@ -305,7 +332,6 @@ static int blkdev_commit_write(struct file *file, struct page *page,
 {
        struct inode *inode = page->mapping->host;
        __blkdev_commit_write(inode,page,from,to);
-       kunmap(page);
        return 0;
 }
 
@@ -797,8 +823,6 @@ int blkdev_put(struct block_device *bdev, int kind)
                                invalidate_buffers(bd_inode->i_rdev);
                        }
                        lock_super(sb);
-                       if (sb->s_flags & MS_RDONLY)
-                               update_buffers(bd_inode->i_rdev);
                        unlock_super(sb);
                        drop_super(sb);
                }
@@ -837,7 +861,6 @@ struct address_space_operations def_blk_aops = {
        sync_page: block_sync_page,
        prepare_write: blkdev_prepare_write,
        commit_write: blkdev_commit_write,
-       direct_IO: blkdev_direct_IO,
 };
 
 struct file_operations def_blk_fops = {
index 96a500c8bc2b7a5a568324b90ab5161deb5eb409..6a2433203dee24181a901dfe62d1ee080f929b16 100644 (file)
@@ -96,7 +96,8 @@ struct bh_free_head {
 };
 static struct bh_free_head free_list[NR_SIZES];
 
-static int grow_buffers(int size);
+static void truncate_buffers(kdev_t dev);
+static int grow_buffers(kdev_t dev, int block, int size);
 static void __refile_buffer(struct buffer_head *);
 
 /* This is used by some architectures to estimate available memory. */
@@ -559,59 +560,28 @@ static void __insert_into_queues(struct buffer_head *bh)
        __insert_into_lru_list(bh, bh->b_list);
 }
 
-/* This function must only run if there are no other
- * references _anywhere_ to this buffer head.
- */
-static void put_last_free(struct buffer_head * bh)
+struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
 {
-       struct bh_free_head *head = &free_list[BUFSIZE_INDEX(bh->b_size)];
-       struct buffer_head **bhp = &head->list;
-
-       bh->b_state = 0;
-
-       spin_lock(&head->lock);
-       bh->b_dev = B_FREE;
-       if(!*bhp) {
-               *bhp = bh;
-               bh->b_prev_free = bh;
-       }
-       bh->b_next_free = *bhp;
-       bh->b_prev_free = (*bhp)->b_prev_free;
-       (*bhp)->b_prev_free->b_next_free = bh;
-       (*bhp)->b_prev_free = bh;
-       spin_unlock(&head->lock);
-}
+       struct buffer_head *bh, **p = &hash(dev, block);
 
-/*
- * Why like this, I hear you say... The reason is race-conditions.
- * As we don't lock buffers (unless we are reading them, that is),
- * something might happen to it while we sleep (ie a read-error
- * will force it bad). This shouldn't really happen currently, but
- * the code is ready.
- */
-static inline struct buffer_head * __get_hash_table(kdev_t dev, int block, int size)
-{
-       struct buffer_head *bh = hash(dev, block);
+       read_lock(&hash_table_lock);
 
-       for (; bh; bh = bh->b_next)
-               if (bh->b_blocknr == block      &&
-                   bh->b_size    == size       &&
-                   bh->b_dev     == dev)
+       for (;;) {
+               bh = *p;
+               if (!bh)
                        break;
-       if (bh)
+               p = &bh->b_next;
+               if (bh->b_blocknr != block)
+                       continue;
+               if (bh->b_size != size)
+                       continue;
+               if (bh->b_dev != dev)
+                       continue;
                get_bh(bh);
+               break;
+       }
 
-       return bh;
-}
-
-struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
-{
-       struct buffer_head *bh;
-
-       read_lock(&hash_table_lock);
-       bh = __get_hash_table(dev, block, size);
        read_unlock(&hash_table_lock);
-
        return bh;
 }
 
@@ -688,7 +658,7 @@ int inode_has_buffers(struct inode *inode)
    we think the disk contains more recent information than the buffercache.
    The update == 1 pass marks the buffers we need to update, the update == 2
    pass does the actual I/O. */
-void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers, int update)
+void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers)
 {
        int i, nlist, slept;
        struct buffer_head * bh, * bh_next;
@@ -722,33 +692,18 @@ void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers, int update)
                        /* All buffers in the lru lists are mapped */
                        if (!buffer_mapped(bh))
                                BUG();
+                       if (buffer_dirty(bh))
+                               printk("invalidate: dirty buffer\n");
                        if (!atomic_read(&bh->b_count)) {
                                if (destroy_dirty_buffers || !buffer_dirty(bh)) {
                                        remove_inode_queue(bh);
+#if 0
                                        __remove_from_queues(bh);
                                        put_last_free(bh);
+#endif
                                }
-                       } else if (update) {
-                               if ((update == 2) ^ buffer_uptodate(bh)  &&
-                                   (update == 2) ^ buffer_req(bh)) {
-                                       write_unlock(&hash_table_lock);
-                                       atomic_inc(&bh->b_count);
-                                       spin_unlock(&lru_list_lock);
-
-                                       if (update == 2) {
-                                               ll_rw_block(READ, 1, &bh);
-                                               wait_on_buffer(bh);
-                                       } else {
-                                               lock_buffer(bh);
-                                               clear_bit(BH_Uptodate, &bh->b_state);
-                                               clear_bit(BH_Req, &bh->b_state);
-                                               unlock_buffer(bh);
-                                       }                                               
-
-                                       atomic_dec(&bh->b_count);
-                                       goto retry;
-                               }
-                       }
+                       } else
+                               printk("invalidate: busy buffer\n");
 
                        write_unlock(&hash_table_lock);
                        if (slept)
@@ -759,13 +714,14 @@ out:
        spin_unlock(&lru_list_lock);
        if (slept)
                goto retry;
+
+       /* Get rid of the page cache */
+       truncate_buffers(dev);
 }
 
 void set_blocksize(kdev_t dev, int size)
 {
        extern int *blksize_size[];
-       int i, nlist, slept;
-       struct buffer_head * bh, * bh_next;
 
        if (!blksize_size[MAJOR(dev)])
                return;
@@ -780,60 +736,10 @@ void set_blocksize(kdev_t dev, int size)
        }
        if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
                return;
+
        sync_buffers(dev, 2);
        blksize_size[MAJOR(dev)][MINOR(dev)] = size;
-
- retry:
-       slept = 0;
-       spin_lock(&lru_list_lock);
-       for(nlist = 0; nlist < NR_LIST; nlist++) {
-               bh = lru_list[nlist];
-               if (!bh)
-                       continue;
-               for (i = nr_buffers_type[nlist]; i > 0 ; bh = bh_next, i--) {
-                       bh_next = bh->b_next_free;
-                       if (bh->b_dev != dev || bh->b_size == size)
-                               continue;
-                       /* Unhashed? */
-                       if (!bh->b_pprev)
-                               continue;
-                       if (buffer_locked(bh)) {
-                               get_bh(bh);
-                               spin_unlock(&lru_list_lock);
-                               wait_on_buffer(bh);
-                               slept = 1;
-                               spin_lock(&lru_list_lock);
-                               put_bh(bh);
-                       }
-
-                       write_lock(&hash_table_lock);
-                       if (!atomic_read(&bh->b_count)) {
-                               if (buffer_dirty(bh))
-                                       printk(KERN_WARNING
-                                              "set_blocksize: dev %s buffer_dirty %lu size %hu\n",
-                                              kdevname(dev), bh->b_blocknr, bh->b_size);
-                               remove_inode_queue(bh);
-                               __remove_from_queues(bh);
-                               put_last_free(bh);
-                       } else {
-                               if (atomic_set_buffer_clean(bh))
-                                       __refile_buffer(bh);
-                               clear_bit(BH_Uptodate, &bh->b_state);
-                               printk(KERN_WARNING
-                                      "set_blocksize: "
-                                      "b_count %d, dev %s, block %lu, from %p\n",
-                                      atomic_read(&bh->b_count), bdevname(bh->b_dev),
-                                      bh->b_blocknr, __builtin_return_address(0));
-                       }
-                       write_unlock(&hash_table_lock);
-                       if (slept)
-                               goto out;
-               }
-       }
- out:
-       spin_unlock(&lru_list_lock);
-       if (slept)
-               goto retry;
+       invalidate_buffers(dev);
 }
 
 static void free_more_memory(void)
@@ -1137,57 +1043,16 @@ void invalidate_inode_buffers(struct inode *inode)
  */
 struct buffer_head * getblk(kdev_t dev, int block, int size)
 {
-       struct buffer_head * bh;
-       int isize;
-
-repeat:
-       spin_lock(&lru_list_lock);
-       write_lock(&hash_table_lock);
-       bh = __get_hash_table(dev, block, size);
-       if (bh)
-               goto out;
-
-       isize = BUFSIZE_INDEX(size);
-       spin_lock(&free_list[isize].lock);
-       bh = free_list[isize].list;
-       if (bh) {
-               __remove_from_free_list(bh, isize);
-               atomic_set(&bh->b_count, 1);
-       }
-       spin_unlock(&free_list[isize].lock);
+       for (;;) {
+               struct buffer_head * bh;
 
-       /*
-        * OK, FINALLY we know that this buffer is the only one of
-        * its kind, we hold a reference (b_count>0), it is unlocked,
-        * and it is clean.
-        */
-       if (bh) {
-               init_buffer(bh, NULL, NULL);
-               bh->b_dev = dev;
-               bh->b_blocknr = block;
-               bh->b_state = 1 << BH_Mapped;
+               bh = get_hash_table(dev, block, size);
+               if (bh)
+                       return bh;
 
-               /* Insert the buffer into the regular lists */
-               __insert_into_queues(bh);
-       out:
-               write_unlock(&hash_table_lock);
-               spin_unlock(&lru_list_lock);
-               touch_buffer(bh);
-               return bh;
+               if (!grow_buffers(dev, block, size))
+                       free_more_memory();
        }
-
-       /*
-        * If we block while refilling the free list, somebody may
-        * create the buffer first ... search the hashes again.
-        */
-       write_unlock(&hash_table_lock);
-       spin_unlock(&lru_list_lock);
-
-       if (!grow_buffers(size))
-               free_more_memory();
-
-       /* FIXME: getblk should fail if there's no enough memory */
-       goto repeat;
 }
 
 /* -1 -> no need to flush
@@ -1313,22 +1178,7 @@ void __brelse(struct buffer_head * buf)
  */
 void __bforget(struct buffer_head * buf)
 {
-       /* grab the lru lock here to block bdflush. */
-       spin_lock(&lru_list_lock);
-       write_lock(&hash_table_lock);
-       if (!atomic_dec_and_test(&buf->b_count) || buffer_locked(buf))
-               goto in_use;
-       __hash_unlink(buf);
-       write_unlock(&hash_table_lock);
-       remove_inode_queue(buf);
-       __remove_from_lru_list(buf, buf->b_list);
-       spin_unlock(&lru_list_lock);
-       put_last_free(buf);
-       return;
-
- in_use:
-       write_unlock(&hash_table_lock);
-       spin_unlock(&lru_list_lock);
+       __brelse(buf);
 }
 
 /**
@@ -1524,17 +1374,17 @@ no_grow:
        goto try_again;
 }
 
-static void unmap_buffer(struct buffer_head * bh)
+/*
+ * Called when truncating a buffer on a page completely.
+ *
+ * We can avoid IO by marking it clean.
+ * FIXME!! FIXME!! FIXME!! We need to unmap it too,
+ * so that the filesystem won't write to it. There's
+ * some bug somewhere..
+ */
+static void discard_buffer(struct buffer_head * bh)
 {
-       if (buffer_mapped(bh)) {
-               mark_buffer_clean(bh);
-               lock_buffer(bh);
-               clear_bit(BH_Uptodate, &bh->b_state);
-               clear_bit(BH_Mapped, &bh->b_state);
-               clear_bit(BH_Req, &bh->b_state);
-               clear_bit(BH_New, &bh->b_state);
-               unlock_buffer(bh);
-       }
+       mark_buffer_clean(bh);
 }
 
 /*
@@ -1564,7 +1414,7 @@ int discard_bh_page(struct page *page, unsigned long offset, int drop_pagecache)
                 * is this block fully flushed?
                 */
                if (offset <= curr_off)
-                       unmap_buffer(bh);
+                       discard_buffer(bh);
                curr_off = next_off;
                bh = next;
        } while (bh != head);
@@ -2141,47 +1991,6 @@ int generic_block_bmap(struct address_space *mapping, long block, get_block_t *g
        return tmp.b_blocknr;
 }
 
-int generic_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize, get_block_t * get_block)
-{
-       int i, nr_blocks, retval;
-       unsigned long * blocks = iobuf->blocks;
-
-       nr_blocks = iobuf->length / blocksize;
-       /* build the blocklist */
-       for (i = 0; i < nr_blocks; i++, blocknr++) {
-               struct buffer_head bh;
-
-               bh.b_state = 0;
-               bh.b_dev = inode->i_dev;
-               bh.b_size = blocksize;
-
-               retval = get_block(inode, blocknr, &bh, rw == READ ? 0 : 1);
-               if (retval)
-                       goto out;
-
-               if (rw == READ) {
-                       if (buffer_new(&bh))
-                               BUG();
-                       if (!buffer_mapped(&bh)) {
-                               /* there was an hole in the filesystem */
-                               blocks[i] = -1UL;
-                               continue;
-                       }
-               } else {
-                       if (buffer_new(&bh))
-                               unmap_underlying_metadata(&bh);
-                       if (!buffer_mapped(&bh))
-                               BUG();
-               }
-               blocks[i] = bh.b_blocknr;
-       }
-
-       retval = brw_kiovec(rw, 1, &iobuf, inode->i_dev, iobuf->blocks, blocksize);
-
- out:
-       return retval;
-}
-
 /*
  * IO completion routine for a buffer_head being used for kiobuf IO: we
  * can't dispatch the kiobuf callback until io_count reaches 0.  
@@ -2447,67 +2256,125 @@ fail:
        return err;
 }
 
+/*
+ * Create the page-cache page that contains the requested block
+ */
+static struct page * grow_dev_page(struct block_device *bdev, unsigned long index, int size)
+{
+       struct page * page;
+
+       page = find_or_create_page(bdev->bd_inode->i_mapping, index, GFP_NOFS);
+       if (IS_ERR(page))
+               return NULL;
+
+       if (!PageLocked(page))
+               BUG();
+
+       if (!page->buffers) {
+               struct buffer_head *bh, *tail;
+               struct buffer_head *head = create_buffers(page, size, 0);
+               if (!head)
+                       goto failed;
+
+               bh = head;
+               do {
+                       tail = bh;
+                       bh = bh->b_this_page;
+               } while (bh);
+               tail->b_this_page = head;
+               page->buffers = head;
+               page_cache_get(page);
+               atomic_inc(&buffermem_pages);
+       }
+       return page;
+
+failed:
+       UnlockPage(page);
+       page_cache_release(page);
+       return NULL;
+}
+
+static void hash_page_buffers(struct page *page, kdev_t dev, int block, int size)
+{
+       struct buffer_head *head = page->buffers;
+       struct buffer_head *bh = head;
+       unsigned int uptodate;
+
+       uptodate = 1 << BH_Mapped;
+       if (Page_Uptodate(page))
+               uptodate |= 1 << BH_Uptodate;
+
+       spin_lock(&lru_list_lock);
+       write_lock(&hash_table_lock);
+       do {
+               if (!(bh->b_state & (1 << BH_Mapped))) {
+                       init_buffer(bh, NULL, NULL);
+                       bh->b_dev = dev;
+                       bh->b_blocknr = block;
+                       bh->b_state = uptodate;
+               }
+
+               /* Insert the buffer into the regular lists */
+               if (!bh->b_pprev) {
+                       __insert_into_queues(bh);
+               }
+
+               block++;
+               bh = bh->b_this_page;
+       } while (bh != head);
+       write_unlock(&hash_table_lock);
+       spin_unlock(&lru_list_lock);
+}
+
 /*
  * Try to increase the number of buffers available: the size argument
  * is used to determine what kind of buffers we want.
  */
-static int grow_buffers(int size)
+static int grow_buffers(kdev_t dev, int block, int size)
 {
        struct page * page;
-       struct buffer_head *bh, *tmp;
-       struct buffer_head * insert_point;
-       int isize;
+       struct block_device *bdev;
+       unsigned long index;
+       int sizebits;
 
        if ((size & 511) || (size > PAGE_SIZE)) {
                printk(KERN_ERR "VFS: grow_buffers: size = %d\n",size);
                return 0;
        }
+       sizebits = -1;
+       do {
+               sizebits++;
+       } while ((size << sizebits) < PAGE_SIZE);
 
-       page = alloc_page(GFP_NOFS);
-       if (!page)
-               goto out;
-       LockPage(page);
-       bh = create_buffers(page, size, 0);
-       if (!bh)
-               goto no_buffer_head;
-
-       isize = BUFSIZE_INDEX(size);
+       index = block >> sizebits;
+       block = index << sizebits;
 
-       spin_lock(&free_list[isize].lock);
-       insert_point = free_list[isize].list;
-       tmp = bh;
-       while (1) {
-               if (insert_point) {
-                       tmp->b_next_free = insert_point->b_next_free;
-                       tmp->b_prev_free = insert_point;
-                       insert_point->b_next_free->b_prev_free = tmp;
-                       insert_point->b_next_free = tmp;
-               } else {
-                       tmp->b_prev_free = tmp;
-                       tmp->b_next_free = tmp;
-               }
-               insert_point = tmp;
-               if (tmp->b_this_page)
-                       tmp = tmp->b_this_page;
-               else
-                       break;
+       bdev = bdget(kdev_t_to_nr(dev));
+       if (!bdev) {
+               printk("No block device for %s\n", kdevname(dev));
+               BUG();
        }
-       tmp->b_this_page = bh;
-       free_list[isize].list = bh;
-       spin_unlock(&free_list[isize].lock);
 
-       page->buffers = bh;
-       page->flags &= ~(1 << PG_referenced);
-       lru_cache_add(page);
-       UnlockPage(page);
-       atomic_inc(&buffermem_pages);
-       return 1;
+       /* Create a page with the proper size buffers.. */
+       page = grow_dev_page(bdev, index, size);
+
+       /* This is "wrong" - talk to Al Viro */
+       atomic_dec(&bdev->bd_count);
+       if (!page)
+               return 0;
 
-no_buffer_head:
+       /* Hash in the buffers on the hash list */
+       hash_page_buffers(page, dev, block, size);
        UnlockPage(page);
        page_cache_release(page);
-out:
-       return 0;
+       return 1;
+}
+
+static void truncate_buffers(kdev_t dev)
+{
+       struct block_device *bdev = bdget(kdev_t_to_nr(dev));
+       truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
+       atomic_dec(&bdev->bd_count);
 }
 
 static int sync_page_buffers(struct buffer_head *bh, unsigned int gfp_mask)
index 0d06dededeb8898af794c6b5475049f5acace06d..1ff30e5085c96aa50f282dde40912a7a1377061f 100644 (file)
@@ -586,10 +586,6 @@ static int ext2_bmap(struct address_space *mapping, long block)
 {
        return generic_block_bmap(mapping,block,ext2_get_block);
 }
-static int ext2_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
-{
-       return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, ext2_get_block);
-}
 struct address_space_operations ext2_aops = {
        readpage: ext2_readpage,
        writepage: ext2_writepage,
@@ -597,7 +593,6 @@ struct address_space_operations ext2_aops = {
        prepare_write: ext2_prepare_write,
        commit_write: generic_commit_write,
        bmap: ext2_bmap,
-       direct_IO: ext2_direct_IO,
 };
 
 /*
index f266229c340ce0ece20a9132c2ef59e0a4fe86ec..86ea92ae5a94436edcbb908ef552f83b93e7a4d1 100644 (file)
@@ -203,15 +203,4 @@ static inline int get_hardsect_size(kdev_t dev)
 #define blk_finished_io(nsects)        do { } while (0)
 #define blk_started_io(nsects) do { } while (0)
 
-static inline int buffered_blk_size(kdev_t dev)
-{
-       int ret = INT_MAX;
-       int major = MAJOR(dev);
-
-       if (blk_size[major])
-               ret = blk_size[major][MINOR(dev)] + ((BUFFERED_BLOCKSIZE-1) >> BLOCK_SIZE_BITS);
-
-       return ret;
-}
-
 #endif
index 1600704891a622f45de6fef5daefe21513a790ff..686fb7160c2531d5809b79760191cb7540d1fd5e 100644 (file)
@@ -46,10 +46,6 @@ struct poll_table_struct;
 #define BLOCK_SIZE_BITS 10
 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
 
-/* buffer header fixed size for the blkdev I/O through pagecache */
-#define BUFFERED_BLOCKSIZE_BITS 10
-#define BUFFERED_BLOCKSIZE (1 << BUFFERED_BLOCKSIZE_BITS)
-
 /* And dynamically-tunable limits and defaults: */
 struct files_stat_struct {
        int nr_files;           /* read only */
@@ -1174,14 +1170,9 @@ extern int invalidate_device(kdev_t, int);
 extern void invalidate_inode_pages(struct inode *);
 extern void invalidate_inode_pages2(struct address_space *);
 extern void invalidate_inode_buffers(struct inode *);
-#define invalidate_buffers(dev)        __invalidate_buffers((dev), 0, 0)
-#define destroy_buffers(dev)   __invalidate_buffers((dev), 1, 0)
-#define update_buffers(dev)                    \
-do {                                           \
-       __invalidate_buffers((dev), 0, 1);      \
-       __invalidate_buffers((dev), 0, 2);      \
-} while (0)
-extern void __invalidate_buffers(kdev_t dev, int, int);
+#define invalidate_buffers(dev)        __invalidate_buffers((dev), 0)
+#define destroy_buffers(dev)   __invalidate_buffers((dev), 1)
+extern void __invalidate_buffers(kdev_t dev, int);
 extern void sync_inodes(kdev_t);
 extern void sync_unlocked_inodes(void);
 extern void write_inode_now(struct inode *, int);
@@ -1367,7 +1358,6 @@ extern int block_sync_page(struct page *);
 int generic_block_bmap(struct address_space *, long, get_block_t *);
 int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
 int block_truncate_page(struct address_space *, loff_t, get_block_t *);
-extern int generic_direct_IO(int, struct inode *, struct kiobuf *, unsigned long, int, get_block_t *);
 extern void create_empty_buffers(struct page *, kdev_t, unsigned long);
 
 extern int waitfor_one_page(struct page*);
index 88366342a2c9c14be96f86328ad3ec08fbd321b6..05e2f7c1a055f18baf16f6a25809e3671a8f139b 100644 (file)
@@ -76,6 +76,9 @@ extern struct page * __find_get_page(struct address_space *mapping,
        __find_get_page(mapping, index, page_hash(mapping, index))
 extern struct page * __find_lock_page (struct address_space * mapping,
                                unsigned long index, struct page **hash);
+extern struct page * find_or_create_page(struct address_space *mapping,
+                               unsigned long index, unsigned int gfp_mask);
+
 extern void lock_page(struct page *page);
 #define find_lock_page(mapping, index) \
        __find_lock_page(mapping, index, page_hash(mapping, index))
index 0282b6bac60c5037141e6720b7d3436d5ea67a66..22f533a9956440b288ef2de67f7268037a1f4279 100644 (file)
@@ -131,6 +131,7 @@ extern struct page * read_swap_cache_async(swp_entry_t);
 extern void oom_kill(void);
 
 /* linux/mm/swapfile.c */
+extern int total_swap_pages;
 extern unsigned int nr_swapfiles;
 extern struct swap_info_struct swap_info[];
 extern int is_swap_partition(kdev_t);
index 928d2239d3ba398de5eb62f17f841105cae479ab..766a454f44a3c09fa73923aa19798d01fb8e43bb 100644 (file)
@@ -210,7 +210,6 @@ EXPORT_SYMBOL(waitfor_one_page);
 EXPORT_SYMBOL(generic_file_read);
 EXPORT_SYMBOL(do_generic_file_read);
 EXPORT_SYMBOL(generic_file_write);
-EXPORT_SYMBOL(generic_direct_IO);
 EXPORT_SYMBOL(generic_file_mmap);
 EXPORT_SYMBOL(generic_ro_fops);
 EXPORT_SYMBOL(generic_buffer_fdatasync);
index 609e3bb04d945e5048ae3829da69fc9f73ce801d..42cc4bfd73ed0d4097ed014dd5e3b446fa7f660a 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/iobuf.h>
+#include <linux/compiler.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -56,6 +57,7 @@ spinlock_t pagemap_lru_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 #define CLUSTER_PAGES          (1 << page_cluster)
 #define CLUSTER_OFFSET(x)      (((x) >> page_cluster) << page_cluster)
 
+static void FASTCALL(add_page_to_hash_queue(struct page * page, struct page **p));
 static void add_page_to_hash_queue(struct page * page, struct page **p)
 {
        struct page *next = *p;
@@ -792,11 +794,13 @@ struct page * __find_get_page(struct address_space *mapping,
 }
 
 /*
- * Same as the above, but lock the page too, verifying that
- * it's still valid once we own it.
+ * Must be called with the pagecache lock held,
+ * will return with it held (but it may be dropped
+ * during blocking operations..
  */
-struct page * __find_lock_page (struct address_space *mapping,
-                               unsigned long offset, struct page **hash)
+static struct page * FASTCALL(__find_lock_page_helper(struct address_space *, unsigned long, struct page *));
+static struct page * __find_lock_page_helper(struct address_space *mapping,
+                                       unsigned long offset, struct page *hash)
 {
        struct page *page;
 
@@ -805,27 +809,72 @@ struct page * __find_lock_page (struct address_space *mapping,
         * the hash-list needs a held write-lock.
         */
 repeat:
-       spin_lock(&pagecache_lock);
-       page = __find_page_nolock(mapping, offset, *hash);
+       page = __find_page_nolock(mapping, offset, hash);
        if (page) {
                page_cache_get(page);
-               spin_unlock(&pagecache_lock);
+               if (TryLockPage(page)) {
+                       spin_unlock(&pagecache_lock);
+                       lock_page(page);
+                       spin_lock(&pagecache_lock);
 
-               lock_page(page);
+                       /* Has the page been re-allocated while we slept? */
+                       if (page->mapping != mapping || page->index != offset) {
+                               UnlockPage(page);
+                               page_cache_release(page);
+                               goto repeat;
+                       }
+               }
+       }
+       return page;
+}
 
-               /* Is the page still hashed? Ok, good.. */
-               if (page->mapping == mapping && page->index == offset)
-                       return page;
+/*
+ * Same as the above, but lock the page too, verifying that
+ * it's still valid once we own it.
+ */
+struct page * __find_lock_page (struct address_space *mapping,
+                               unsigned long offset, struct page **hash)
+{
+       struct page *page;
 
-               /* Nope: we raced. Release and try again.. */
-               UnlockPage(page);
-               page_cache_release(page);
-               goto repeat;
-       }
+       spin_lock(&pagecache_lock);
+       page = __find_lock_page_helper(mapping, offset, *hash);
        spin_unlock(&pagecache_lock);
-       return NULL;
+       return page;
 }
 
+/*
+ * Same as above, but create the page if required..
+ */
+struct page * find_or_create_page(struct address_space *mapping, unsigned long index, unsigned int gfp_mask)
+{
+       struct page *page;
+       struct page **hash = page_hash(mapping, index);
+
+       spin_lock(&pagecache_lock);
+       page = __find_lock_page_helper(mapping, index, *hash);
+       spin_unlock(&pagecache_lock);
+       if (!page) {
+               struct page *newpage = alloc_page(gfp_mask);
+               page = ERR_PTR(-ENOMEM);
+               if (newpage) {
+                       spin_lock(&pagecache_lock);
+                       page = __find_lock_page_helper(mapping, index, *hash);
+                       if (likely(!page)) {
+                               page = newpage;
+                               __add_to_page_cache(page, mapping, index, hash);
+                               newpage = NULL;
+                       }
+                       spin_unlock(&pagecache_lock);
+                       if (unlikely(newpage != NULL))
+                               page_cache_release(newpage);
+               }
+       }
+       return page;    
+}
+
+
+
 #if 0
 #define PROFILE_READAHEAD
 #define DEBUG_READAHEAD
@@ -960,10 +1009,7 @@ static inline unsigned long calc_end_index(struct inode * inode)
 {
        unsigned long end_index;
 
-       if (!S_ISBLK(inode->i_mode))
-               end_index = inode->i_size >> PAGE_CACHE_SHIFT;
-       else
-               end_index = buffered_blk_size(inode->i_rdev) >> (PAGE_CACHE_SHIFT - BLOCK_SIZE_BITS);
+       end_index = inode->i_size >> PAGE_CACHE_SHIFT;
 
        return end_index;
 }
@@ -972,10 +1018,7 @@ static inline loff_t calc_rsize(struct inode * inode)
 {
        loff_t rsize;
 
-       if (!S_ISBLK(inode->i_mode))
-               rsize = inode->i_size;
-       else
-               rsize = (loff_t) buffered_blk_size(inode->i_rdev) << BLOCK_SIZE_BITS;
+       rsize = inode->i_size;
 
        return rsize;
 }
@@ -1316,92 +1359,6 @@ no_cached_page:
        UPDATE_ATIME(inode);
 }
 
-static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset)
-{
-       ssize_t retval;
-       int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress;
-       struct kiobuf * iobuf;
-       struct inode * inode = filp->f_dentry->d_inode;
-       struct address_space * mapping = inode->i_mapping;
-
-       new_iobuf = 0;
-       iobuf = filp->f_iobuf;
-       if (test_and_set_bit(0, &filp->f_iobuf_lock)) {
-               /*
-                * A parallel read/write is using the preallocated iobuf
-                * so just run slow and allocate a new one.
-                */
-               retval = alloc_kiovec(1, &iobuf);
-               if (retval)
-                       goto out;
-               new_iobuf = 1;
-       }
-
-       if (!S_ISBLK(inode->i_mode)) {
-               blocksize = inode->i_sb->s_blocksize;
-               blocksize_bits = inode->i_sb->s_blocksize_bits;
-       } else {
-               blocksize = BUFFERED_BLOCKSIZE;
-               blocksize_bits = BUFFERED_BLOCKSIZE_BITS;
-       }
-       blocksize_mask = blocksize - 1;
-       chunk_size = KIO_MAX_ATOMIC_IO << 10;
-
-       retval = -EINVAL;
-       if ((offset & blocksize_mask) || (count & blocksize_mask))
-               goto out_free;
-       if (!mapping->a_ops->direct_IO)
-               goto out_free;
-
-       /*
-        * Flush to disk exlusively the _data_, metadata must remains
-        * completly asynchronous or performance will go to /dev/null.
-        */
-       filemap_fdatasync(mapping);
-       retval = fsync_inode_data_buffers(inode);
-       filemap_fdatawait(mapping);
-       if (retval < 0)
-               goto out_free;
-
-       progress = retval = 0;
-       while (count > 0) {
-               iosize = count;
-               if (iosize > chunk_size)
-                       iosize = chunk_size;
-
-               retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
-               if (retval)
-                       break;
-
-               retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize);
-
-               if (rw == READ && retval > 0)
-                       mark_dirty_kiobuf(iobuf, retval);
-               
-               if (retval >= 0) {
-                       count -= retval;
-                       buf += retval;
-                       progress += retval;
-               }
-
-               unmap_kiobuf(iobuf);
-
-               if (retval != iosize)
-                       break;
-       }
-
-       if (progress)
-               retval = progress;
-
- out_free:
-       if (!new_iobuf)
-               clear_bit(0, &filp->f_iobuf_lock);
-       else
-               free_kiovec(1, &iobuf);
- out:  
-       return retval;
-}
-
 int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
 {
        char *kaddr;
@@ -1435,9 +1392,6 @@ ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *
        if ((ssize_t) count < 0)
                return -EINVAL;
 
-       if (filp->f_flags & O_DIRECT)
-               goto o_direct;
-
        retval = -EFAULT;
        if (access_ok(VERIFY_WRITE, buf, count)) {
                retval = 0;
@@ -1456,28 +1410,7 @@ ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *
                                retval = desc.error;
                }
        }
- out:
        return retval;
-
- o_direct:
-       {
-               loff_t pos = *ppos, size;
-               struct inode * inode = filp->f_dentry->d_inode;
-
-               retval = 0;
-               if (!count)
-                       goto out; /* skip atime */
-               size = calc_rsize(inode);
-               if (pos < size) {
-                       if (pos + count > size)
-                               count = size - pos;
-                       retval = generic_file_direct_IO(READ, filp, buf, count, pos);
-                       if (retval > 0)
-                               *ppos = pos + retval;
-               }
-               UPDATE_ATIME(filp->f_dentry->d_inode);
-               goto out;
-       }
 }
 
 static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size)
@@ -2778,9 +2711,6 @@ generic_file_write(struct file *file,const char *buf,size_t count, loff_t *ppos)
        inode->i_ctime = inode->i_mtime = CURRENT_TIME;
        mark_inode_dirty_sync(inode);
 
-       if (file->f_flags & O_DIRECT)
-               goto o_direct;
-
        do {
                unsigned long index, offset;
                long page_fault;
@@ -2855,7 +2785,6 @@ unlock:
        if ((status >= 0) && (file->f_flags & O_SYNC))
                status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA);
        
-out_status:    
        err = written ? written : status;
 out:
 
@@ -2864,25 +2793,6 @@ out:
 fail_write:
        status = -EFAULT;
        goto unlock;
-
-o_direct:
-       written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos);
-       if (written > 0) {
-               loff_t end = pos + written;
-               if (end > inode->i_size && !S_ISBLK(inode->i_mode)) {
-                       inode->i_size = end;
-                       mark_inode_dirty(inode);
-               }
-               *ppos = end;
-               invalidate_inode_pages2(mapping);
-       }
-       /*
-        * Sync the fs metadata but not the minor inode changes and
-        * of course not the data as we did direct DMA for the IO.
-        */
-       if (written >= 0 && file->f_flags & O_SYNC)
-               status = generic_osync_inode(inode, OSYNC_METADATA);
-       goto out_status;
 }
 
 void __init page_cache_init(unsigned long mempages)
index efd520264e75b9168b3d23cd06522502b6af1da3..440ed1127de1fd2fb23ee4f18228b36d4ccf77c9 100644 (file)
@@ -1101,6 +1101,10 @@ void swapin_readahead(swp_entry_t entry)
        return;
 }
 
+/* Swap 80% full? Release the pages as they are paged in.. */
+#define vm_swap_full() \
+       (swapper_space.nrpages*5 > total_swap_pages*4)
+
 /*
  * We hold the mm semaphore and the page_table_lock on entry and exit.
  */
@@ -1158,10 +1162,12 @@ static int do_swap_page(struct mm_struct * mm,
        swap_free(entry);
        mark_page_accessed(page);
        if (exclusive_swap_page(page)) {
-               if (vma->vm_flags & VM_WRITE)
-                       pte = pte_mkwrite(pte);
-               pte = pte_mkdirty(pte);
-               delete_from_swap_cache(page);
+               if (write_access || vm_swap_full()) {
+                       pte = pte_mkdirty(pte);
+                       if (vma->vm_flags & VM_WRITE)
+                               pte = pte_mkwrite(pte);
+                       delete_from_swap_cache(page);
+               }
        }
        UnlockPage(page);