page_cache_release(page);
}
+static void buffer_io_error(struct buffer_head *bh)
+{
+ printk(KERN_ERR "Buffer I/O error on device %s, logical block %ld\n",
+ bdevname(bh->b_bdev), bh->b_blocknr);
+}
+
/*
* Default synchronous end-of-IO handler.. Just mark it up-to-date and
* unlock the buffer. This is what ll_rw_block uses too.
void end_buffer_io_sync(struct buffer_head *bh, int uptodate)
{
if (!uptodate)
- printk("%s: I/O error\n", __FUNCTION__);
+ buffer_io_error(bh);
if (uptodate)
set_buffer_uptodate(bh);
else
yield();
}
-static void end_buffer_io_async(struct buffer_head *bh, int uptodate)
+/*
+ * I/O completion handler for block_read_full_page() and brw_page() - pages
+ * which come unlocked at the end of I/O.
+ */
+static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
{
static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED;
unsigned long flags;
struct page *page;
int page_uptodate = 1;
+ BUG_ON(!buffer_async_read(bh));
+
if (!uptodate)
- printk("%s: I/O error\n", __FUNCTION__);
+ buffer_io_error(bh);
- if (uptodate)
+ page = bh->b_page;
+ if (uptodate) {
set_buffer_uptodate(bh);
- else
+ } else {
clear_buffer_uptodate(bh);
- page = bh->b_page;
- if (!uptodate)
SetPageError(page);
+ }
/*
* Be _very_ careful from here on. Bad things can happen if
* decide that the page is now completely done.
*/
spin_lock_irqsave(&page_uptodate_lock, flags);
- clear_buffer_async(bh);
+ clear_buffer_async_read(bh);
unlock_buffer(bh);
tmp = bh;
do {
if (!buffer_uptodate(tmp))
page_uptodate = 0;
- if (buffer_async(tmp)) {
+ if (buffer_async_read(tmp)) {
if (buffer_locked(tmp))
goto still_busy;
if (!buffer_mapped(bh))
*/
if (page_uptodate && !PageError(page))
SetPageUptodate(page);
- if (PageWriteback(page)) {
- /* It was a write */
- end_page_writeback(page);
+ unlock_page(page);
+ return;
+
+still_busy:
+ spin_unlock_irqrestore(&page_uptodate_lock, flags);
+ return;
+}
+
+/*
+ * Completion handler for block_write_full_page() - pages which are unlocked
+ * during I/O, and which have PageWriteback cleared upon I/O completion.
+ */
+static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
+{
+ static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED;
+ unsigned long flags;
+ struct buffer_head *tmp;
+ struct page *page;
+
+ BUG_ON(!buffer_async_write(bh));
+
+ if (!uptodate)
+ buffer_io_error(bh);
+
+ page = bh->b_page;
+ if (uptodate) {
+ set_buffer_uptodate(bh);
} else {
- /* read */
- unlock_page(page);
+ clear_buffer_uptodate(bh);
+ SetPageError(page);
+ }
+
+ spin_lock_irqsave(&page_uptodate_lock, flags);
+ clear_buffer_async_write(bh);
+ unlock_buffer(bh);
+ tmp = bh->b_this_page;
+ while (tmp != bh) {
+ if (buffer_async_write(tmp)) {
+ if (buffer_locked(tmp))
+ goto still_busy;
+ if (!buffer_mapped(bh))
+ BUG();
+ }
+ tmp = tmp->b_this_page;
}
+ spin_unlock_irqrestore(&page_uptodate_lock, flags);
+ end_page_writeback(page);
return;
still_busy:
}
/*
- * If a page's buffers are under async writeout (end_buffer_io_async
+ * If a page's buffers are under async readin (end_buffer_async_read
* completion) then there is a possibility that another thread of
* control could lock one of the buffers after it has completed
* but while some of the other buffers have not completed. This
- * locked buffer would confuse end_buffer_io_async() into not unlocking
- * the page. So the absence of BH_Async tells end_buffer_io_async()
+ * locked buffer would confuse end_buffer_async_read() into not unlocking
+ * the page. So the absence of BH_Async_Read tells end_buffer_async_read()
* that this buffer is not under async I/O.
*
* The page comes unlocked when it has no locked buffer_async buffers
* left.
*
- * The page lock prevents anyone starting new async I/O against any of
+ * PageLocked prevents anyone starting new async I/O reads any of
* the buffers.
+ *
+ * PageWriteback is used to prevent simultaneous writeout of the same
+ * page.
+ *
+ * PageLocked prevents anyone from starting writeback of a page which is
+ * under read I/O (PageWriteback is only ever set against a locked page).
*/
-inline void set_buffer_async_io(struct buffer_head *bh)
+inline void mark_buffer_async_read(struct buffer_head *bh)
{
- bh->b_end_io = end_buffer_io_async;
- set_buffer_async(bh);
+ bh->b_end_io = end_buffer_async_read;
+ set_buffer_async_read(bh);
}
-EXPORT_SYMBOL(set_buffer_async_io);
+EXPORT_SYMBOL(mark_buffer_async_read);
+
+inline void mark_buffer_async_write(struct buffer_head *bh)
+{
+ bh->b_end_io = end_buffer_async_write;
+ set_buffer_async_write(bh);
+}
+EXPORT_SYMBOL(mark_buffer_async_write);
/*
* osync is designed to support O_SYNC io. It waits synchronously for
tail->b_this_page = head;
spin_lock(&page->mapping->host->i_bufferlist_lock);
- if (PageDirty(page)) {
+ if (PageUptodate(page) || PageDirty(page)) {
bh = head;
do {
- set_buffer_dirty(bh);
+ if (PageDirty(page))
+ set_buffer_dirty(bh);
+ if (PageUptodate(page))
+ set_buffer_uptodate(bh);
bh = bh->b_this_page;
} while (bh != head);
}
*/
do {
if (block > last_block) {
- clear_buffer_dirty(bh);
- if (buffer_mapped(bh))
- buffer_error();
+ /*
+ * mapped buffers outside i_size will occur, because
+ * this page can be outside i_size when there is a
+ * truncate in progress.
+ *
+ * if (buffer_mapped(bh))
+ * buffer_error();
+ */
/*
* The buffer was zeroed by block_write_full_page()
*/
+ clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
} else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
if (buffer_new(bh))
if (test_clear_buffer_dirty(bh)) {
if (!buffer_uptodate(bh))
buffer_error();
- set_buffer_async_io(bh);
+ mark_buffer_async_write(bh);
} else {
unlock_buffer(bh);
}
*/
do {
struct buffer_head *next = bh->b_this_page;
- if (buffer_async(bh)) {
+ if (buffer_async_write(bh)) {
submit_bh(WRITE, bh);
nr_underway++;
}
do {
if (buffer_mapped(bh)) {
lock_buffer(bh);
- set_buffer_async_io(bh);
+ mark_buffer_async_write(bh);
} else {
/*
* The buffer may have been set dirty during
} while (bh != head);
do {
struct buffer_head *next = bh->b_this_page;
- if (buffer_mapped(bh)) {
+ if (buffer_async_write(bh)) {
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
submit_bh(WRITE, bh);
/* Stage two: lock the buffers */
for (i = 0; i < nr; i++) {
- struct buffer_head * bh = arr[i];
+ bh = arr[i];
lock_buffer(bh);
- if (buffer_uptodate(bh))
- buffer_error();
- if (buffer_dirty(bh))
- buffer_error();
- set_buffer_async_io(bh);
+ mark_buffer_async_read(bh);
}
/*
* the underlying blockdev brought it uptodate (the sct fix).
*/
for (i = 0; i < nr; i++) {
- struct buffer_head * bh = arr[i];
+ bh = arr[i];
if (buffer_uptodate(bh))
- end_buffer_io_async(bh, 1);
+ end_buffer_async_read(bh, 1);
else
submit_bh(READ, bh);
}
bh->b_blocknr = *(b++);
bh->b_bdev = bdev;
set_buffer_mapped(bh);
- if (rw == WRITE)
+ if (rw == WRITE) {
set_buffer_uptodate(bh);
- set_buffer_async_io(bh);
+ clear_buffer_dirty(bh);
+ }
+ /*
+ * Swap pages are locked during writeout, so use
+ * buffer_async_read in strange ways.
+ */
+ mark_buffer_async_read(bh);
bh = bh->b_this_page;
} while (bh != head);
/*
* include/linux/buffer_head.h
*
- * Everything to do with buffer_head.b_state.
+ * Everything to do with buffer_heads.
*/
#ifndef BUFFER_FLAGS_H
#define BUFFER_FLAGS_H
-/* bh state bits */
enum bh_state_bits {
- BH_Uptodate, /* 1 if the buffer contains valid data */
- BH_Dirty, /* 1 if the buffer is dirty */
- BH_Lock, /* 1 if the buffer is locked */
- BH_Req, /* 0 if the buffer has been invalidated */
+ BH_Uptodate, /* Contains valid data */
+ BH_Dirty, /* Is dirty */
+ BH_Lock, /* Is locked */
+ BH_Req, /* Has been submitted for I/O */
- BH_Mapped, /* 1 if the buffer has a disk mapping */
- BH_New, /* 1 if the buffer is new and not yet written out */
- BH_Async, /* 1 if the buffer is under end_buffer_io_async I/O */
- BH_JBD, /* 1 if it has an attached journal_head */
+ BH_Mapped, /* Has a disk mapping */
+ BH_New, /* Disk mapping was newly created by get_block */
+ BH_Async_Read, /* Is under end_buffer_async_read I/O */
+ BH_Async_Write, /* Is under end_buffer_async_write I/O */
+ BH_JBD, /* Has an attached ext3 journal_head */
BH_PrivateStart,/* not a state bit, but the first bit available
* for private allocation by other entities
*/
typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate);
/*
- * Try to keep the most commonly used fields in single cache lines (16
- * bytes) to improve performance. This ordering should be
- * particularly beneficial on 32-bit processors.
- *
- * We use the first 16 bytes for the data which is used in searches
- * over the block hash lists (ie. getblk() and friends).
- *
- * The second 16 bytes we use for lru buffer scans, as used by
- * sync_buffers() and refill_freelist(). -- sct
+ * Keep related fields in common cachelines. The most commonly accessed
+ * field (b_state) goes at the start so the compiler does not generate
+ * indexed addressing for it.
*/
struct buffer_head {
/* First cache line: */
- sector_t b_blocknr; /* block number */
- unsigned short b_size; /* block size */
- struct block_device *b_bdev;
-
- atomic_t b_count; /* users using this block */
unsigned long b_state; /* buffer state bitmap (see above) */
+ atomic_t b_count; /* users using this block */
struct buffer_head *b_this_page;/* circular list of page's buffers */
struct page *b_page; /* the page this bh is mapped to */
- char * b_data; /* pointer to data block */
+ sector_t b_blocknr; /* block number */
+ unsigned short b_size; /* block size */
+ char *b_data; /* pointer to data block */
+
+ struct block_device *b_bdev;
bh_end_io_t *b_end_io; /* I/O completion */
void *b_private; /* reserved for b_end_io */
struct list_head b_inode_buffers; /* list of inode dirty buffers */
return test_and_clear_bit(BH_##bit, &(bh)->b_state); \
} \
+/*
+ * Emit the buffer bitops functions. Note that there are also functions
+ * of the form "mark_buffer_foo()". These are higher-level functions which
+ * do something in addition to setting a b_state bit.
+ */
BUFFER_FNS(Uptodate, uptodate)
BUFFER_FNS(Dirty, dirty)
TAS_BUFFER_FNS(Dirty, dirty)
BUFFER_FNS(Req, req)
BUFFER_FNS(Mapped, mapped)
BUFFER_FNS(New, new)
-BUFFER_FNS(Async, async)
-
-/*
- * Utility macros
- */
+BUFFER_FNS(Async_Read, async_read)
+BUFFER_FNS(Async_Write, async_write)
/*
* FIXME: this is used only by bh_kmap, which is used only by RAID5.
- * Clean this up with blockdev-in-highmem infrastructure.
+ * Move all that stuff into raid5.c
*/
#define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK)
void buffer_insert_list(spinlock_t *lock,
struct buffer_head *, struct list_head *);
-/* reiserfs_writepage needs this */
-void set_buffer_async_io(struct buffer_head *bh);
+void mark_buffer_async_read(struct buffer_head *bh);
+void mark_buffer_async_write(struct buffer_head *bh);
void invalidate_inode_buffers(struct inode *);
void invalidate_bdev(struct block_device *, int);
void __invalidate_buffers(kdev_t dev, int);