#include <asm/uaccess.h>
-static inline int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh_result)
-{
- int err;
+#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
- err = -EIO;
- if (iblock >= buffered_blk_size(inode->i_rdev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS))
- goto out;
+static inline unsigned int blksize_bits(unsigned int size)
+{
+ unsigned int bits = 8;
+ do {
+ bits++;
+ size >>= 1;
+ } while (size > 256);
+ return bits;
+}
- bh_result->b_blocknr = iblock;
- bh_result->b_state |= 1UL << BH_Mapped;
- err = 0;
+static inline unsigned int block_size(kdev_t dev)
+{
+ int retval = BLOCK_SIZE;
+ int major = MAJOR(dev);
- out:
- return err;
+ if (blksize_size[major]) {
+ int minor = MINOR(dev);
+ if (blksize_size[major][minor])
+ retval = blksize_size[major][minor];
+ }
+ return retval;
}
-static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
+static unsigned int max_block(kdev_t dev)
{
- int i, nr_blocks, retval, dev = inode->i_rdev;
- unsigned long * blocks = iobuf->blocks;
+ unsigned int retval = ~0U;
+ int major = MAJOR(dev);
- if (blocksize != BUFFERED_BLOCKSIZE)
- BUG();
+ if (blk_size[major]) {
+ int minor = MINOR(dev);
+ unsigned int blocks = blk_size[major][minor];
+ if (blocks) {
+ unsigned int size = block_size(dev);
+ unsigned int sizebits = blksize_bits(size);
+ blocks += (size-1) >> BLOCK_SIZE_BITS;
+ retval = blocks << (BLOCK_SIZE_BITS - sizebits);
+ if (sizebits > BLOCK_SIZE_BITS)
+ retval = blocks >> (sizebits - BLOCK_SIZE_BITS);
+ }
+ }
+ return retval;
+}
- nr_blocks = iobuf->length >> BUFFERED_BLOCKSIZE_BITS;
- /* build the blocklist */
- for (i = 0; i < nr_blocks; i++, blocknr++) {
- struct buffer_head bh;
- retval = blkdev_get_block(inode, blocknr, &bh);
- if (retval)
- goto out;
+static inline int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh_result)
+{
+ int err;
- blocks[i] = bh.b_blocknr;
- }
+ err = -EIO;
+ if (iblock >= max_block(inode->i_rdev))
+ goto out;
- retval = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, blocksize);
+ bh_result->b_blocknr = iblock;
+ bh_result->b_state |= 1UL << BH_Mapped;
+ err = 0;
out:
- return retval;
+ return err;
}
static int blkdev_writepage(struct page * page)
{
int err, i;
+ unsigned int blocksize;
unsigned long block;
struct buffer_head *bh, *head;
struct inode *inode = page->mapping->host;
if (!PageLocked(page))
BUG();
+ blocksize = block_size(inode->i_rdev);
if (!page->buffers)
- create_empty_buffers(page, inode->i_rdev, BUFFERED_BLOCKSIZE);
+ create_empty_buffers(page, inode->i_rdev, blocksize);
head = page->buffers;
- block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
+ block = page->index << (PAGE_CACHE_SHIFT - blksize_bits(blocksize));
bh = head;
i = 0;
struct inode *inode = page->mapping->host;
kdev_t dev = inode->i_rdev;
unsigned long iblock, lblock;
- struct buffer_head *bh, *head, *arr[1 << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS)];
- unsigned int blocks;
+ struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
+ unsigned int blocks, blocksize, blocksize_bits;
int nr, i;
if (!PageLocked(page))
PAGE_BUG(page);
+ blocksize = block_size(dev);
+ blocksize_bits = blksize_bits(blocksize);
if (!page->buffers)
- create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE);
+ create_empty_buffers(page, dev, blocksize);
head = page->buffers;
- blocks = PAGE_CACHE_SIZE >> BUFFERED_BLOCKSIZE_BITS;
- iblock = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
- lblock = buffered_blk_size(dev) >> (BUFFERED_BLOCKSIZE_BITS - BLOCK_SIZE_BITS);
+ blocks = PAGE_CACHE_SIZE >> blocksize_bits;
+ iblock = page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
+ lblock = max_block(dev);
bh = head;
nr = 0;
i = 0;
continue;
}
if (!buffer_mapped(bh)) {
- memset(kmap(page) + i * BUFFERED_BLOCKSIZE, 0, BUFFERED_BLOCKSIZE);
+ memset(kmap(page) + i * blocksize, 0, blocksize);
flush_dcache_page(page);
kunmap(page);
set_bit(BH_Uptodate, &bh->b_state);
unsigned long block;
int err = 0;
struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
- kmap(page);
+ unsigned int blocksize, blocksize_bits;
+ blocksize = block_size(dev);
+ blocksize_bits = blksize_bits(blocksize);
if (!page->buffers)
- create_empty_buffers(page, dev, BUFFERED_BLOCKSIZE);
+ create_empty_buffers(page, dev, blocksize);
head = page->buffers;
- block = page->index << (PAGE_CACHE_SHIFT - BUFFERED_BLOCKSIZE_BITS);
+ block = page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
for(bh = head, block_start = 0; bh != head || !block_start;
block++, block_start=block_end, bh = bh->b_this_page) {
if (!bh)
BUG();
- block_end = block_start + BUFFERED_BLOCKSIZE;
+ block_end = block_start + blocksize;
if (block_end <= from)
continue;
if (block_start >= to)
int err = __blkdev_prepare_write(inode, page, from, to);
if (err) {
ClearPageUptodate(page);
- kunmap(page);
}
return err;
}
unsigned block_start, block_end;
int partial = 0, need_balance_dirty = 0;
struct buffer_head *bh, *head;
+ unsigned int blocksize;
+ blocksize = block_size(inode->i_rdev);
for(bh = head = page->buffers, block_start = 0;
bh != head || !block_start;
block_start=block_end, bh = bh->b_this_page) {
- block_end = block_start + BUFFERED_BLOCKSIZE;
+ block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh))
partial = 1;
{
struct inode *inode = page->mapping->host;
__blkdev_commit_write(inode,page,from,to);
- kunmap(page);
return 0;
}
invalidate_buffers(bd_inode->i_rdev);
}
lock_super(sb);
- if (sb->s_flags & MS_RDONLY)
- update_buffers(bd_inode->i_rdev);
unlock_super(sb);
drop_super(sb);
}
sync_page: block_sync_page,
prepare_write: blkdev_prepare_write,
commit_write: blkdev_commit_write,
- direct_IO: blkdev_direct_IO,
};
struct file_operations def_blk_fops = {
};
static struct bh_free_head free_list[NR_SIZES];
-static int grow_buffers(int size);
+static void truncate_buffers(kdev_t dev);
+static int grow_buffers(kdev_t dev, int block, int size);
static void __refile_buffer(struct buffer_head *);
/* This is used by some architectures to estimate available memory. */
__insert_into_lru_list(bh, bh->b_list);
}
-/* This function must only run if there are no other
- * references _anywhere_ to this buffer head.
- */
-static void put_last_free(struct buffer_head * bh)
+struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
{
- struct bh_free_head *head = &free_list[BUFSIZE_INDEX(bh->b_size)];
- struct buffer_head **bhp = &head->list;
-
- bh->b_state = 0;
-
- spin_lock(&head->lock);
- bh->b_dev = B_FREE;
- if(!*bhp) {
- *bhp = bh;
- bh->b_prev_free = bh;
- }
- bh->b_next_free = *bhp;
- bh->b_prev_free = (*bhp)->b_prev_free;
- (*bhp)->b_prev_free->b_next_free = bh;
- (*bhp)->b_prev_free = bh;
- spin_unlock(&head->lock);
-}
+ struct buffer_head *bh, **p = &hash(dev, block);
-/*
- * Why like this, I hear you say... The reason is race-conditions.
- * As we don't lock buffers (unless we are reading them, that is),
- * something might happen to it while we sleep (ie a read-error
- * will force it bad). This shouldn't really happen currently, but
- * the code is ready.
- */
-static inline struct buffer_head * __get_hash_table(kdev_t dev, int block, int size)
-{
- struct buffer_head *bh = hash(dev, block);
+ read_lock(&hash_table_lock);
- for (; bh; bh = bh->b_next)
- if (bh->b_blocknr == block &&
- bh->b_size == size &&
- bh->b_dev == dev)
+ for (;;) {
+ bh = *p;
+ if (!bh)
break;
- if (bh)
+ p = &bh->b_next;
+ if (bh->b_blocknr != block)
+ continue;
+ if (bh->b_size != size)
+ continue;
+ if (bh->b_dev != dev)
+ continue;
get_bh(bh);
+ break;
+ }
- return bh;
-}
-
-struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
-{
- struct buffer_head *bh;
-
- read_lock(&hash_table_lock);
- bh = __get_hash_table(dev, block, size);
read_unlock(&hash_table_lock);
-
return bh;
}
we think the disk contains more recent information than the buffercache.
The update == 1 pass marks the buffers we need to update, the update == 2
pass does the actual I/O. */
-void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers, int update)
+void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers)
{
int i, nlist, slept;
struct buffer_head * bh, * bh_next;
/* All buffers in the lru lists are mapped */
if (!buffer_mapped(bh))
BUG();
+ if (buffer_dirty(bh))
+ printk("invalidate: dirty buffer\n");
if (!atomic_read(&bh->b_count)) {
if (destroy_dirty_buffers || !buffer_dirty(bh)) {
remove_inode_queue(bh);
+#if 0
__remove_from_queues(bh);
put_last_free(bh);
+#endif
}
- } else if (update) {
- if ((update == 2) ^ buffer_uptodate(bh) &&
- (update == 2) ^ buffer_req(bh)) {
- write_unlock(&hash_table_lock);
- atomic_inc(&bh->b_count);
- spin_unlock(&lru_list_lock);
-
- if (update == 2) {
- ll_rw_block(READ, 1, &bh);
- wait_on_buffer(bh);
- } else {
- lock_buffer(bh);
- clear_bit(BH_Uptodate, &bh->b_state);
- clear_bit(BH_Req, &bh->b_state);
- unlock_buffer(bh);
- }
-
- atomic_dec(&bh->b_count);
- goto retry;
- }
- }
+ } else
+ printk("invalidate: busy buffer\n");
write_unlock(&hash_table_lock);
if (slept)
spin_unlock(&lru_list_lock);
if (slept)
goto retry;
+
+ /* Get rid of the page cache */
+ truncate_buffers(dev);
}
void set_blocksize(kdev_t dev, int size)
{
extern int *blksize_size[];
- int i, nlist, slept;
- struct buffer_head * bh, * bh_next;
if (!blksize_size[MAJOR(dev)])
return;
}
if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
return;
+
sync_buffers(dev, 2);
blksize_size[MAJOR(dev)][MINOR(dev)] = size;
-
- retry:
- slept = 0;
- spin_lock(&lru_list_lock);
- for(nlist = 0; nlist < NR_LIST; nlist++) {
- bh = lru_list[nlist];
- if (!bh)
- continue;
- for (i = nr_buffers_type[nlist]; i > 0 ; bh = bh_next, i--) {
- bh_next = bh->b_next_free;
- if (bh->b_dev != dev || bh->b_size == size)
- continue;
- /* Unhashed? */
- if (!bh->b_pprev)
- continue;
- if (buffer_locked(bh)) {
- get_bh(bh);
- spin_unlock(&lru_list_lock);
- wait_on_buffer(bh);
- slept = 1;
- spin_lock(&lru_list_lock);
- put_bh(bh);
- }
-
- write_lock(&hash_table_lock);
- if (!atomic_read(&bh->b_count)) {
- if (buffer_dirty(bh))
- printk(KERN_WARNING
- "set_blocksize: dev %s buffer_dirty %lu size %hu\n",
- kdevname(dev), bh->b_blocknr, bh->b_size);
- remove_inode_queue(bh);
- __remove_from_queues(bh);
- put_last_free(bh);
- } else {
- if (atomic_set_buffer_clean(bh))
- __refile_buffer(bh);
- clear_bit(BH_Uptodate, &bh->b_state);
- printk(KERN_WARNING
- "set_blocksize: "
- "b_count %d, dev %s, block %lu, from %p\n",
- atomic_read(&bh->b_count), bdevname(bh->b_dev),
- bh->b_blocknr, __builtin_return_address(0));
- }
- write_unlock(&hash_table_lock);
- if (slept)
- goto out;
- }
- }
- out:
- spin_unlock(&lru_list_lock);
- if (slept)
- goto retry;
+ invalidate_buffers(dev);
}
static void free_more_memory(void)
*/
struct buffer_head * getblk(kdev_t dev, int block, int size)
{
- struct buffer_head * bh;
- int isize;
-
-repeat:
- spin_lock(&lru_list_lock);
- write_lock(&hash_table_lock);
- bh = __get_hash_table(dev, block, size);
- if (bh)
- goto out;
-
- isize = BUFSIZE_INDEX(size);
- spin_lock(&free_list[isize].lock);
- bh = free_list[isize].list;
- if (bh) {
- __remove_from_free_list(bh, isize);
- atomic_set(&bh->b_count, 1);
- }
- spin_unlock(&free_list[isize].lock);
+ for (;;) {
+ struct buffer_head * bh;
- /*
- * OK, FINALLY we know that this buffer is the only one of
- * its kind, we hold a reference (b_count>0), it is unlocked,
- * and it is clean.
- */
- if (bh) {
- init_buffer(bh, NULL, NULL);
- bh->b_dev = dev;
- bh->b_blocknr = block;
- bh->b_state = 1 << BH_Mapped;
+ bh = get_hash_table(dev, block, size);
+ if (bh)
+ return bh;
- /* Insert the buffer into the regular lists */
- __insert_into_queues(bh);
- out:
- write_unlock(&hash_table_lock);
- spin_unlock(&lru_list_lock);
- touch_buffer(bh);
- return bh;
+ if (!grow_buffers(dev, block, size))
+ free_more_memory();
}
-
- /*
- * If we block while refilling the free list, somebody may
- * create the buffer first ... search the hashes again.
- */
- write_unlock(&hash_table_lock);
- spin_unlock(&lru_list_lock);
-
- if (!grow_buffers(size))
- free_more_memory();
-
- /* FIXME: getblk should fail if there's no enough memory */
- goto repeat;
}
/* -1 -> no need to flush
*/
void __bforget(struct buffer_head * buf)
{
- /* grab the lru lock here to block bdflush. */
- spin_lock(&lru_list_lock);
- write_lock(&hash_table_lock);
- if (!atomic_dec_and_test(&buf->b_count) || buffer_locked(buf))
- goto in_use;
- __hash_unlink(buf);
- write_unlock(&hash_table_lock);
- remove_inode_queue(buf);
- __remove_from_lru_list(buf, buf->b_list);
- spin_unlock(&lru_list_lock);
- put_last_free(buf);
- return;
-
- in_use:
- write_unlock(&hash_table_lock);
- spin_unlock(&lru_list_lock);
+ __brelse(buf);
}
/**
goto try_again;
}
-static void unmap_buffer(struct buffer_head * bh)
+/*
+ * Called when truncating a buffer on a page completely.
+ *
+ * We can avoid IO by marking it clean.
+ * FIXME!! FIXME!! FIXME!! We need to unmap it too,
+ * so that the filesystem won't write to it. There's
+ * some bug somewhere..
+ */
+static void discard_buffer(struct buffer_head * bh)
{
- if (buffer_mapped(bh)) {
- mark_buffer_clean(bh);
- lock_buffer(bh);
- clear_bit(BH_Uptodate, &bh->b_state);
- clear_bit(BH_Mapped, &bh->b_state);
- clear_bit(BH_Req, &bh->b_state);
- clear_bit(BH_New, &bh->b_state);
- unlock_buffer(bh);
- }
+ mark_buffer_clean(bh);
}
/*
* is this block fully flushed?
*/
if (offset <= curr_off)
- unmap_buffer(bh);
+ discard_buffer(bh);
curr_off = next_off;
bh = next;
} while (bh != head);
return tmp.b_blocknr;
}
-int generic_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize, get_block_t * get_block)
-{
- int i, nr_blocks, retval;
- unsigned long * blocks = iobuf->blocks;
-
- nr_blocks = iobuf->length / blocksize;
- /* build the blocklist */
- for (i = 0; i < nr_blocks; i++, blocknr++) {
- struct buffer_head bh;
-
- bh.b_state = 0;
- bh.b_dev = inode->i_dev;
- bh.b_size = blocksize;
-
- retval = get_block(inode, blocknr, &bh, rw == READ ? 0 : 1);
- if (retval)
- goto out;
-
- if (rw == READ) {
- if (buffer_new(&bh))
- BUG();
- if (!buffer_mapped(&bh)) {
- /* there was an hole in the filesystem */
- blocks[i] = -1UL;
- continue;
- }
- } else {
- if (buffer_new(&bh))
- unmap_underlying_metadata(&bh);
- if (!buffer_mapped(&bh))
- BUG();
- }
- blocks[i] = bh.b_blocknr;
- }
-
- retval = brw_kiovec(rw, 1, &iobuf, inode->i_dev, iobuf->blocks, blocksize);
-
- out:
- return retval;
-}
-
/*
* IO completion routine for a buffer_head being used for kiobuf IO: we
* can't dispatch the kiobuf callback until io_count reaches 0.
return err;
}
+/*
+ * Create the page-cache page that contains the requested block
+ */
+static struct page * grow_dev_page(struct block_device *bdev, unsigned long index, int size)
+{
+ struct page * page;
+
+ page = find_or_create_page(bdev->bd_inode->i_mapping, index, GFP_NOFS);
+ if (IS_ERR(page))
+ return NULL;
+
+ if (!PageLocked(page))
+ BUG();
+
+ if (!page->buffers) {
+ struct buffer_head *bh, *tail;
+ struct buffer_head *head = create_buffers(page, size, 0);
+ if (!head)
+ goto failed;
+
+ bh = head;
+ do {
+ tail = bh;
+ bh = bh->b_this_page;
+ } while (bh);
+ tail->b_this_page = head;
+ page->buffers = head;
+ page_cache_get(page);
+ atomic_inc(&buffermem_pages);
+ }
+ return page;
+
+failed:
+ UnlockPage(page);
+ page_cache_release(page);
+ return NULL;
+}
+
+static void hash_page_buffers(struct page *page, kdev_t dev, int block, int size)
+{
+ struct buffer_head *head = page->buffers;
+ struct buffer_head *bh = head;
+ unsigned int uptodate;
+
+ uptodate = 1 << BH_Mapped;
+ if (Page_Uptodate(page))
+ uptodate |= 1 << BH_Uptodate;
+
+ spin_lock(&lru_list_lock);
+ write_lock(&hash_table_lock);
+ do {
+ if (!(bh->b_state & (1 << BH_Mapped))) {
+ init_buffer(bh, NULL, NULL);
+ bh->b_dev = dev;
+ bh->b_blocknr = block;
+ bh->b_state = uptodate;
+ }
+
+ /* Insert the buffer into the regular lists */
+ if (!bh->b_pprev) {
+ __insert_into_queues(bh);
+ }
+
+ block++;
+ bh = bh->b_this_page;
+ } while (bh != head);
+ write_unlock(&hash_table_lock);
+ spin_unlock(&lru_list_lock);
+}
+
/*
* Try to increase the number of buffers available: the size argument
* is used to determine what kind of buffers we want.
*/
-static int grow_buffers(int size)
+static int grow_buffers(kdev_t dev, int block, int size)
{
struct page * page;
- struct buffer_head *bh, *tmp;
- struct buffer_head * insert_point;
- int isize;
+ struct block_device *bdev;
+ unsigned long index;
+ int sizebits;
if ((size & 511) || (size > PAGE_SIZE)) {
printk(KERN_ERR "VFS: grow_buffers: size = %d\n",size);
return 0;
}
+ sizebits = -1;
+ do {
+ sizebits++;
+ } while ((size << sizebits) < PAGE_SIZE);
- page = alloc_page(GFP_NOFS);
- if (!page)
- goto out;
- LockPage(page);
- bh = create_buffers(page, size, 0);
- if (!bh)
- goto no_buffer_head;
-
- isize = BUFSIZE_INDEX(size);
+ index = block >> sizebits;
+ block = index << sizebits;
- spin_lock(&free_list[isize].lock);
- insert_point = free_list[isize].list;
- tmp = bh;
- while (1) {
- if (insert_point) {
- tmp->b_next_free = insert_point->b_next_free;
- tmp->b_prev_free = insert_point;
- insert_point->b_next_free->b_prev_free = tmp;
- insert_point->b_next_free = tmp;
- } else {
- tmp->b_prev_free = tmp;
- tmp->b_next_free = tmp;
- }
- insert_point = tmp;
- if (tmp->b_this_page)
- tmp = tmp->b_this_page;
- else
- break;
+ bdev = bdget(kdev_t_to_nr(dev));
+ if (!bdev) {
+ printk("No block device for %s\n", kdevname(dev));
+ BUG();
}
- tmp->b_this_page = bh;
- free_list[isize].list = bh;
- spin_unlock(&free_list[isize].lock);
- page->buffers = bh;
- page->flags &= ~(1 << PG_referenced);
- lru_cache_add(page);
- UnlockPage(page);
- atomic_inc(&buffermem_pages);
- return 1;
+ /* Create a page with the proper size buffers.. */
+ page = grow_dev_page(bdev, index, size);
+
+ /* This is "wrong" - talk to Al Viro */
+ atomic_dec(&bdev->bd_count);
+ if (!page)
+ return 0;
-no_buffer_head:
+ /* Hash in the buffers on the hash list */
+ hash_page_buffers(page, dev, block, size);
UnlockPage(page);
page_cache_release(page);
-out:
- return 0;
+ return 1;
+}
+
+static void truncate_buffers(kdev_t dev)
+{
+ struct block_device *bdev = bdget(kdev_t_to_nr(dev));
+ truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
+ atomic_dec(&bdev->bd_count);
}
static int sync_page_buffers(struct buffer_head *bh, unsigned int gfp_mask)
{
return generic_block_bmap(mapping,block,ext2_get_block);
}
-static int ext2_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
-{
- return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, ext2_get_block);
-}
struct address_space_operations ext2_aops = {
readpage: ext2_readpage,
writepage: ext2_writepage,
prepare_write: ext2_prepare_write,
commit_write: generic_commit_write,
bmap: ext2_bmap,
- direct_IO: ext2_direct_IO,
};
/*
#define blk_finished_io(nsects) do { } while (0)
#define blk_started_io(nsects) do { } while (0)
-static inline int buffered_blk_size(kdev_t dev)
-{
- int ret = INT_MAX;
- int major = MAJOR(dev);
-
- if (blk_size[major])
- ret = blk_size[major][MINOR(dev)] + ((BUFFERED_BLOCKSIZE-1) >> BLOCK_SIZE_BITS);
-
- return ret;
-}
-
#endif
#define BLOCK_SIZE_BITS 10
#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
-/* buffer header fixed size for the blkdev I/O through pagecache */
-#define BUFFERED_BLOCKSIZE_BITS 10
-#define BUFFERED_BLOCKSIZE (1 << BUFFERED_BLOCKSIZE_BITS)
-
/* And dynamically-tunable limits and defaults: */
struct files_stat_struct {
int nr_files; /* read only */
extern void invalidate_inode_pages(struct inode *);
extern void invalidate_inode_pages2(struct address_space *);
extern void invalidate_inode_buffers(struct inode *);
-#define invalidate_buffers(dev) __invalidate_buffers((dev), 0, 0)
-#define destroy_buffers(dev) __invalidate_buffers((dev), 1, 0)
-#define update_buffers(dev) \
-do { \
- __invalidate_buffers((dev), 0, 1); \
- __invalidate_buffers((dev), 0, 2); \
-} while (0)
-extern void __invalidate_buffers(kdev_t dev, int, int);
+#define invalidate_buffers(dev) __invalidate_buffers((dev), 0)
+#define destroy_buffers(dev) __invalidate_buffers((dev), 1)
+extern void __invalidate_buffers(kdev_t dev, int);
extern void sync_inodes(kdev_t);
extern void sync_unlocked_inodes(void);
extern void write_inode_now(struct inode *, int);
int generic_block_bmap(struct address_space *, long, get_block_t *);
int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
int block_truncate_page(struct address_space *, loff_t, get_block_t *);
-extern int generic_direct_IO(int, struct inode *, struct kiobuf *, unsigned long, int, get_block_t *);
extern void create_empty_buffers(struct page *, kdev_t, unsigned long);
extern int waitfor_one_page(struct page*);
__find_get_page(mapping, index, page_hash(mapping, index))
extern struct page * __find_lock_page (struct address_space * mapping,
unsigned long index, struct page **hash);
+extern struct page * find_or_create_page(struct address_space *mapping,
+ unsigned long index, unsigned int gfp_mask);
+
extern void lock_page(struct page *page);
#define find_lock_page(mapping, index) \
__find_lock_page(mapping, index, page_hash(mapping, index))
extern void oom_kill(void);
/* linux/mm/swapfile.c */
+extern int total_swap_pages;
extern unsigned int nr_swapfiles;
extern struct swap_info_struct swap_info[];
extern int is_swap_partition(kdev_t);
EXPORT_SYMBOL(generic_file_read);
EXPORT_SYMBOL(do_generic_file_read);
EXPORT_SYMBOL(generic_file_write);
-EXPORT_SYMBOL(generic_direct_IO);
EXPORT_SYMBOL(generic_file_mmap);
EXPORT_SYMBOL(generic_ro_fops);
EXPORT_SYMBOL(generic_buffer_fdatasync);
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/iobuf.h>
+#include <linux/compiler.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
#define CLUSTER_PAGES (1 << page_cluster)
#define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster)
+static void FASTCALL(add_page_to_hash_queue(struct page * page, struct page **p));
static void add_page_to_hash_queue(struct page * page, struct page **p)
{
struct page *next = *p;
}
/*
- * Same as the above, but lock the page too, verifying that
- * it's still valid once we own it.
+ * Must be called with the pagecache lock held,
+ * will return with it held (but it may be dropped
+ * during blocking operations..
*/
-struct page * __find_lock_page (struct address_space *mapping,
- unsigned long offset, struct page **hash)
+static struct page * FASTCALL(__find_lock_page_helper(struct address_space *, unsigned long, struct page *));
+static struct page * __find_lock_page_helper(struct address_space *mapping,
+ unsigned long offset, struct page *hash)
{
struct page *page;
* the hash-list needs a held write-lock.
*/
repeat:
- spin_lock(&pagecache_lock);
- page = __find_page_nolock(mapping, offset, *hash);
+ page = __find_page_nolock(mapping, offset, hash);
if (page) {
page_cache_get(page);
- spin_unlock(&pagecache_lock);
+ if (TryLockPage(page)) {
+ spin_unlock(&pagecache_lock);
+ lock_page(page);
+ spin_lock(&pagecache_lock);
- lock_page(page);
+ /* Has the page been re-allocated while we slept? */
+ if (page->mapping != mapping || page->index != offset) {
+ UnlockPage(page);
+ page_cache_release(page);
+ goto repeat;
+ }
+ }
+ }
+ return page;
+}
- /* Is the page still hashed? Ok, good.. */
- if (page->mapping == mapping && page->index == offset)
- return page;
+/*
+ * Same as the above, but lock the page too, verifying that
+ * it's still valid once we own it.
+ */
+struct page * __find_lock_page (struct address_space *mapping,
+ unsigned long offset, struct page **hash)
+{
+ struct page *page;
- /* Nope: we raced. Release and try again.. */
- UnlockPage(page);
- page_cache_release(page);
- goto repeat;
- }
+ spin_lock(&pagecache_lock);
+ page = __find_lock_page_helper(mapping, offset, *hash);
spin_unlock(&pagecache_lock);
- return NULL;
+ return page;
}
+/*
+ * Same as above, but create the page if required..
+ */
+struct page * find_or_create_page(struct address_space *mapping, unsigned long index, unsigned int gfp_mask)
+{
+ struct page *page;
+ struct page **hash = page_hash(mapping, index);
+
+ spin_lock(&pagecache_lock);
+ page = __find_lock_page_helper(mapping, index, *hash);
+ spin_unlock(&pagecache_lock);
+ if (!page) {
+ struct page *newpage = alloc_page(gfp_mask);
+ page = ERR_PTR(-ENOMEM);
+ if (newpage) {
+ spin_lock(&pagecache_lock);
+ page = __find_lock_page_helper(mapping, index, *hash);
+ if (likely(!page)) {
+ page = newpage;
+ __add_to_page_cache(page, mapping, index, hash);
+ newpage = NULL;
+ }
+ spin_unlock(&pagecache_lock);
+ if (unlikely(newpage != NULL))
+ page_cache_release(newpage);
+ }
+ }
+ return page;
+}
+
+
+
#if 0
#define PROFILE_READAHEAD
#define DEBUG_READAHEAD
{
unsigned long end_index;
- if (!S_ISBLK(inode->i_mode))
- end_index = inode->i_size >> PAGE_CACHE_SHIFT;
- else
- end_index = buffered_blk_size(inode->i_rdev) >> (PAGE_CACHE_SHIFT - BLOCK_SIZE_BITS);
+ end_index = inode->i_size >> PAGE_CACHE_SHIFT;
return end_index;
}
{
loff_t rsize;
- if (!S_ISBLK(inode->i_mode))
- rsize = inode->i_size;
- else
- rsize = (loff_t) buffered_blk_size(inode->i_rdev) << BLOCK_SIZE_BITS;
+ rsize = inode->i_size;
return rsize;
}
UPDATE_ATIME(inode);
}
-static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset)
-{
- ssize_t retval;
- int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress;
- struct kiobuf * iobuf;
- struct inode * inode = filp->f_dentry->d_inode;
- struct address_space * mapping = inode->i_mapping;
-
- new_iobuf = 0;
- iobuf = filp->f_iobuf;
- if (test_and_set_bit(0, &filp->f_iobuf_lock)) {
- /*
- * A parallel read/write is using the preallocated iobuf
- * so just run slow and allocate a new one.
- */
- retval = alloc_kiovec(1, &iobuf);
- if (retval)
- goto out;
- new_iobuf = 1;
- }
-
- if (!S_ISBLK(inode->i_mode)) {
- blocksize = inode->i_sb->s_blocksize;
- blocksize_bits = inode->i_sb->s_blocksize_bits;
- } else {
- blocksize = BUFFERED_BLOCKSIZE;
- blocksize_bits = BUFFERED_BLOCKSIZE_BITS;
- }
- blocksize_mask = blocksize - 1;
- chunk_size = KIO_MAX_ATOMIC_IO << 10;
-
- retval = -EINVAL;
- if ((offset & blocksize_mask) || (count & blocksize_mask))
- goto out_free;
- if (!mapping->a_ops->direct_IO)
- goto out_free;
-
- /*
- * Flush to disk exlusively the _data_, metadata must remains
- * completly asynchronous or performance will go to /dev/null.
- */
- filemap_fdatasync(mapping);
- retval = fsync_inode_data_buffers(inode);
- filemap_fdatawait(mapping);
- if (retval < 0)
- goto out_free;
-
- progress = retval = 0;
- while (count > 0) {
- iosize = count;
- if (iosize > chunk_size)
- iosize = chunk_size;
-
- retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
- if (retval)
- break;
-
- retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize);
-
- if (rw == READ && retval > 0)
- mark_dirty_kiobuf(iobuf, retval);
-
- if (retval >= 0) {
- count -= retval;
- buf += retval;
- progress += retval;
- }
-
- unmap_kiobuf(iobuf);
-
- if (retval != iosize)
- break;
- }
-
- if (progress)
- retval = progress;
-
- out_free:
- if (!new_iobuf)
- clear_bit(0, &filp->f_iobuf_lock);
- else
- free_kiovec(1, &iobuf);
- out:
- return retval;
-}
-
int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
{
char *kaddr;
if ((ssize_t) count < 0)
return -EINVAL;
- if (filp->f_flags & O_DIRECT)
- goto o_direct;
-
retval = -EFAULT;
if (access_ok(VERIFY_WRITE, buf, count)) {
retval = 0;
retval = desc.error;
}
}
- out:
return retval;
-
- o_direct:
- {
- loff_t pos = *ppos, size;
- struct inode * inode = filp->f_dentry->d_inode;
-
- retval = 0;
- if (!count)
- goto out; /* skip atime */
- size = calc_rsize(inode);
- if (pos < size) {
- if (pos + count > size)
- count = size - pos;
- retval = generic_file_direct_IO(READ, filp, buf, count, pos);
- if (retval > 0)
- *ppos = pos + retval;
- }
- UPDATE_ATIME(filp->f_dentry->d_inode);
- goto out;
- }
}
static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size)
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
mark_inode_dirty_sync(inode);
- if (file->f_flags & O_DIRECT)
- goto o_direct;
-
do {
unsigned long index, offset;
long page_fault;
if ((status >= 0) && (file->f_flags & O_SYNC))
status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA);
-out_status:
err = written ? written : status;
out:
fail_write:
status = -EFAULT;
goto unlock;
-
-o_direct:
- written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos);
- if (written > 0) {
- loff_t end = pos + written;
- if (end > inode->i_size && !S_ISBLK(inode->i_mode)) {
- inode->i_size = end;
- mark_inode_dirty(inode);
- }
- *ppos = end;
- invalidate_inode_pages2(mapping);
- }
- /*
- * Sync the fs metadata but not the minor inode changes and
- * of course not the data as we did direct DMA for the IO.
- */
- if (written >= 0 && file->f_flags & O_SYNC)
- status = generic_osync_inode(inode, OSYNC_METADATA);
- goto out_status;
}
void __init page_cache_init(unsigned long mempages)
return;
}
+/* Swap 80% full? Release the pages as they are paged in.. */
+#define vm_swap_full() \
+ (swapper_space.nrpages*5 > total_swap_pages*4)
+
/*
* We hold the mm semaphore and the page_table_lock on entry and exit.
*/
swap_free(entry);
mark_page_accessed(page);
if (exclusive_swap_page(page)) {
- if (vma->vm_flags & VM_WRITE)
- pte = pte_mkwrite(pte);
- pte = pte_mkdirty(pte);
- delete_from_swap_cache(page);
+ if (write_access || vm_swap_full()) {
+ pte = pte_mkdirty(pte);
+ if (vma->vm_flags & VM_WRITE)
+ pte = pte_mkwrite(pte);
+ delete_from_swap_cache(page);
+ }
}
UnlockPage(page);