]> git.hungrycats.org Git - linux/commitdiff
[PATCH] direct_io mopup
authorAndrew Morton <akpm@zip.com.au>
Fri, 19 Jul 2002 04:10:30 +0000 (21:10 -0700)
committerLinus Torvalds <torvalds@home.transmeta.com>
Fri, 19 Jul 2002 04:10:30 +0000 (21:10 -0700)
Some cleanup from the surprise direct-to-bio for O_DIRECT merge.

- Remove bits and pieces from the kiobuf implementation

- Replace the waitqueue in struct dio with just a task_struct pointer
  and use wake_up_process.  (Ben).

- Only take mmap_sem around the individual calls to get_user_pages().
   (It pins the vmas, yes?)

- Remove some debug code.

- Fix JFS.

fs/buffer.c
fs/direct-io.c
fs/fcntl.c
fs/file_table.c
fs/jfs/inode.c
fs/open.c
include/linux/fs.h
mm/filemap.c

index 27300f9a6ee0ec2cfa439527d3098e236bf0e1d9..dfb38f4a6f4497ec6f7a7d1b7bf909f1461024e6 100644 (file)
@@ -2309,55 +2309,6 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
        return tmp.b_blocknr;
 }
 
-#if 0
-int generic_direct_IO(int rw, struct inode *inode,
-                       struct kiobuf *iobuf, unsigned long blocknr,
-                       int blocksize, get_block_t *get_block)
-{
-       int i, nr_blocks, retval = 0;
-       sector_t *blocks = iobuf->blocks;
-       struct block_device *bdev = NULL;
-
-       nr_blocks = iobuf->length / blocksize;
-       /* build the blocklist */
-       for (i = 0; i < nr_blocks; i++, blocknr++) {
-               struct buffer_head bh;
-
-               bh.b_state = 0;
-               bh.b_size = blocksize;
-
-               retval = get_block(inode, blocknr, &bh, rw & 1);
-               if (retval)
-                       goto out;
-
-               if (rw == READ) {
-                       if (buffer_new(&bh))
-                               BUG();
-                       if (!buffer_mapped(&bh)) {
-                               /* there was an hole in the filesystem */
-                               blocks[i] = -1UL;
-                               continue;
-                       }
-               } else {
-                       if (buffer_new(&bh))
-                               unmap_underlying_metadata(bh.b_bdev,
-                                                       bh.b_blocknr);
-                       if (!buffer_mapped(&bh))
-                               BUG();
-               }
-               blocks[i] = bh.b_blocknr;
-               bdev = bh.b_bdev;
-       }
-
-       /* This does not understand multi-device filesystems currently */
-       if (bdev)
-               retval = brw_kiovec(rw, 1, &iobuf, bdev, blocks, blocksize);
-
- out:
-       return retval;
-}
-#endif
-
 /*
  * Start I/O on a physical range of kernel memory, defined by a vector
  * of kiobuf structs (much like a user-space iovec list).
index 32c2b10975405847a144a0470634a9dde0b8e16d..9a40e00ec45262c87aff3390b03dad1e0a1943c6 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * mm/direct-io.c
+ * fs/direct-io.c
  *
  * Copyright (C) 2002, Linus Torvalds.
  *
@@ -61,7 +61,7 @@ struct dio {
        atomic_t bio_count;
        spinlock_t bio_list_lock;
        struct bio *bio_list;           /* singly linked via bi_private */
-       wait_queue_head_t wait_q;
+       struct task_struct *waiter;
 };
 
 /*
@@ -81,6 +81,7 @@ static int dio_refill_pages(struct dio *dio)
        int nr_pages;
 
        nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES);
+       down_read(&current->mm->mmap_sem);
        ret = get_user_pages(
                current,                        /* Task for fault acounting */
                current->mm,                    /* whose pages? */
@@ -90,6 +91,7 @@ static int dio_refill_pages(struct dio *dio)
                0,                              /* force (?) */
                &dio->pages[0],
                NULL);                          /* vmas */
+       up_read(&current->mm->mmap_sem);
 
        if (ret >= 0) {
                dio->curr_user_address += ret * PAGE_SIZE;
@@ -139,7 +141,7 @@ static void dio_bio_end_io(struct bio *bio)
        bio->bi_private = dio->bio_list;
        dio->bio_list = bio;
        spin_unlock_irqrestore(&dio->bio_list_lock, flags);
-       wake_up(&dio->wait_q);
+       wake_up_process(dio->waiter);
 }
 
 static int
@@ -193,13 +195,11 @@ static void dio_cleanup(struct dio *dio)
  */
 static struct bio *dio_await_one(struct dio *dio)
 {
-       DECLARE_WAITQUEUE(wait, current);
        unsigned long flags;
        struct bio *bio;
 
        spin_lock_irqsave(&dio->bio_list_lock, flags);
        while (dio->bio_list == NULL) {
-               add_wait_queue(&dio->wait_q, &wait);
                set_current_state(TASK_UNINTERRUPTIBLE);
                if (dio->bio_list == NULL) {
                        spin_unlock_irqrestore(&dio->bio_list_lock, flags);
@@ -208,7 +208,6 @@ static struct bio *dio_await_one(struct dio *dio)
                        spin_lock_irqsave(&dio->bio_list_lock, flags);
                }
                set_current_state(TASK_RUNNING);
-               remove_wait_queue(&dio->wait_q, &wait);
        }
        bio = dio->bio_list;
        dio->bio_list = bio->bi_private;
@@ -224,23 +223,17 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
        const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct bio_vec *bvec = bio->bi_io_vec;
        int page_no;
-       int ret = 0;
 
        for (page_no = 0; page_no < bio->bi_vcnt; page_no++) {
                struct page *page = bvec[page_no].bv_page;
 
-               if (!uptodate) {
-                       if (ret == 0)
-                               ret = -EIO;
-               }
-
                if (dio->rw == READ)
                        set_page_dirty(page);
                page_cache_release(page);
        }
        atomic_dec(&dio->bio_count);
        bio_put(bio);
-       return ret;
+       return uptodate ? 0 : -EIO;
 }
 
 /*
@@ -265,7 +258,7 @@ static int dio_await_completion(struct dio *dio)
  * to keep the memory consumption sane we periodically reap any completed BIOs
  * during the BIO generation phase.
  *
- * This also helps to limis the peak amount of pinned userspace memory.
+ * This also helps to limit the peak amount of pinned userspace memory.
  */
 static int dio_bio_reap(struct dio *dio)
 {
@@ -388,15 +381,13 @@ out:
        return ret;
 }
 
-struct dio *g_dio;
-
 int
 generic_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset,
                        size_t count, get_block_t get_block)
 {
        const unsigned blocksize_mask = (1 << inode->i_blkbits) - 1;
        const unsigned long user_addr = (unsigned long)buf;
-       int ret = 0;
+       int ret;
        int ret2;
        struct dio dio;
        size_t bytes;
@@ -407,8 +398,6 @@ generic_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset,
                goto out;
        }
 
-       g_dio = &dio;
-
        /* BIO submission state */
        dio.bio = NULL;
        dio.bvec = NULL;
@@ -444,11 +433,9 @@ generic_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset,
        atomic_set(&dio.bio_count, 0);
        spin_lock_init(&dio.bio_list_lock);
        dio.bio_list = NULL;
-       init_waitqueue_head(&dio.wait_q);
+       dio.waiter = current;
 
-       down_read(&current->mm->mmap_sem);
        ret = do_direct_IO(&dio);
-       up_read(&current->mm->mmap_sem);
 
        if (dio.bio)
                dio_bio_submit(&dio);
index 9d9df23dceb91280625401e3f35e808f26ba632b..98392f716b2a2ab16e9826d3aeec9d98e4de1bfb 100644 (file)
@@ -248,23 +248,6 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
                if (!inode->i_mapping || !inode->i_mapping->a_ops ||
                        !inode->i_mapping->a_ops->direct_IO)
                                return -EINVAL;
-
-               /*
-                * alloc_kiovec() can sleep and we are only serialized by
-                * the big kernel lock here, so abuse the i_sem to serialize
-                * this case too. We of course wouldn't need to go deep down
-                * to the inode layer, we could stay at the file layer, but
-                * we don't want to pay for the memory of a semaphore in each
-                * file structure too and we use the inode semaphore that we just
-                * pay for anyways.
-                */
-               error = 0;
-               down(&inode->i_sem);
-               if (!filp->f_iobuf)
-                       error = alloc_kiovec(1, &filp->f_iobuf);
-               up(&inode->i_sem);
-               if (error < 0)
-                       return error;
        }
 
        /* required for strict SunOS emulation */
index 5734437cd75093726c9426d9bd07a15ee4cb1abe..cdaf93dd6716ee0b1a7bfa9e921e018b3514fd0d 100644 (file)
@@ -115,9 +115,6 @@ void __fput(struct file * file)
 
        locks_remove_flock(file);
 
-       if (file->f_iobuf)
-               free_kiovec(1, &file->f_iobuf);
-
        if (file->f_op && file->f_op->release)
                file->f_op->release(inode, file);
        fops_put(file->f_op);
index 7e8fbd68824d5756b00bb5d3ef71554ac79cc680..b3b2217261558085117e48d126fa19cd390380eb 100644 (file)
@@ -293,11 +293,10 @@ static int jfs_bmap(struct address_space *mapping, long block)
        return generic_block_bmap(mapping, block, jfs_get_block);
 }
 
-static int jfs_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
-                        unsigned long blocknr, int blocksize)
+static int jfs_direct_IO(int rw, struct inode *inode, char *buf,
+                       loff_t offset, size_t count)
 {
-       return generic_direct_IO(rw, inode, iobuf, blocknr,
-                                blocksize, jfs_get_block);
+       return generic_direct_IO(rw, inode, buf, offset, count, jfs_get_block);
 }
 
 struct address_space_operations jfs_aops = {
index 5ad6630562cf976cc821796b6344fe4ac0197f70..c3e5935440896cc397f7f4b2bb89a797767819f4 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -647,15 +647,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
        f->f_op = fops_get(inode->i_fop);
        file_move(f, &inode->i_sb->s_files);
 
-       /* preallocate kiobuf for O_DIRECT */
-       f->f_iobuf = NULL;
-       f->f_iobuf_lock = 0;
-       if (f->f_flags & O_DIRECT) {
-               error = alloc_kiovec(1, &f->f_iobuf);
-               if (error)
-                       goto cleanup_all;
-       }
-
        if (f->f_op && f->f_op->open) {
                error = f->f_op->open(inode,f);
                if (error)
@@ -675,8 +666,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
        return f;
 
 cleanup_all:
-       if (f->f_iobuf)
-               free_kiovec(1, &f->f_iobuf);
        fops_put(f->f_op);
        if (f->f_mode & FMODE_WRITE)
                put_write_access(inode);
index 2ac85b8e28a3c25d1328fb02506281b600e059b1..84413138923e8f5749b0bef99b6d52eb315c2d85 100644 (file)
@@ -274,7 +274,6 @@ struct iattr {
  */
 struct page;
 struct address_space;
-struct kiobuf;
 
 struct address_space_operations {
        int (*writepage)(struct page *);
@@ -493,10 +492,6 @@ struct file {
 
        /* needed for tty driver, and maybe others */
        void                    *private_data;
-
-       /* preallocated helper kiobuf to speedup O_DIRECT */
-       struct kiobuf           *f_iobuf;
-       long                    f_iobuf_lock;
 };
 extern spinlock_t files_lock;
 #define file_list_lock() spin_lock(&files_lock);
index b11dcb824da50c311fac79da9258cf90b5a764fc..a3691b828cc09040141ec5b532aaea33bad9f925 100644 (file)
@@ -1102,89 +1102,6 @@ no_cached_page:
        UPDATE_ATIME(inode);
 }
 
-#if 0
-static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset)
-{
-       ssize_t retval;
-       int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress;
-       struct kiobuf * iobuf;
-       struct address_space * mapping = filp->f_dentry->d_inode->i_mapping;
-       struct inode * inode = mapping->host;
-
-       new_iobuf = 0;
-       iobuf = filp->f_iobuf;
-       if (test_and_set_bit(0, &filp->f_iobuf_lock)) {
-               /*
-                * A parallel read/write is using the preallocated iobuf
-                * so just run slow and allocate a new one.
-                */
-               retval = alloc_kiovec(1, &iobuf);
-               if (retval)
-                       goto out;
-               new_iobuf = 1;
-       }
-
-       blocksize = 1 << inode->i_blkbits;
-       blocksize_bits = inode->i_blkbits;
-       blocksize_mask = blocksize - 1;
-       chunk_size = KIO_MAX_ATOMIC_IO << 10;
-
-       retval = -EINVAL;
-       if ((offset & blocksize_mask) || (count & blocksize_mask))
-               goto out_free;
-
-       /*
-        * Flush to disk exclusively the _data_, metadata must remain
-        * completly asynchronous or performance will go to /dev/null.
-        */
-       retval = filemap_fdatawait(mapping);
-       if (retval == 0)
-               retval = filemap_fdatawrite(mapping);
-       if (retval == 0)
-               retval = filemap_fdatawait(mapping);
-       if (retval < 0)
-               goto out_free;
-
-       progress = retval = 0;
-       while (count > 0) {
-               iosize = count;
-               if (iosize > chunk_size)
-                       iosize = chunk_size;
-
-               retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
-               if (retval)
-                       break;
-
-               retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize);
-
-               if (rw == READ && retval > 0)
-                       mark_dirty_kiobuf(iobuf, retval);
-               
-               if (retval >= 0) {
-                       count -= retval;
-                       buf += retval;
-                       progress += retval;
-               }
-
-               unmap_kiobuf(iobuf);
-
-               if (retval != iosize)
-                       break;
-       }
-
-       if (progress)
-               retval = progress;
-
- out_free:
-       if (!new_iobuf)
-               clear_bit(0, &filp->f_iobuf_lock);
-       else
-               free_kiovec(1, &iobuf);
- out:  
-       return retval;
-}
-#endif
-
 int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
 {
        char *kaddr;