(filler_t *)mapping->a_ops->readpage, NULL);
if (IS_ERR(page))
goto out;
- wait_on_page(page);
+ wait_on_page_locked(page);
if (!PageUptodate(page))
goto fail;
if (PageError(page))
if (error)
goto unlock;
unlock_page(page);
- wait_on_page(page);
+ wait_on_page_locked(page);
page_cache_release(page);
fsync_bdev(bdev);
skip:
if(IS_ERR(page)) {
return PTR_ERR(page);
}
- wait_on_page(page);
+ wait_on_page_locked(page);
if(!PageUptodate(page)) {
/* error reading page */
printk("blkmtd: read: page not uptodate\n");
return retval;
}
-/*
- * AKPM: fixme. unneeded stuff here.
- */
static int __block_fsync(struct inode * inode)
{
- int ret, err;
-
- ret = filemap_fdatasync(inode->i_mapping);
- err = sync_buffers(inode->i_bdev, 1);
- if (err && !ret)
- ret = err;
- err = filemap_fdatawait(inode->i_mapping);
- if (err && !ret)
- ret = err;
-
- return ret;
+ return sync_buffers(inode->i_bdev, 1);
}
/*
* waitqueue, which is used here. (Well. Other locked buffers
* against the page will pin it. But complain anyway).
*/
- if (atomic_read(&bh->b_count) == 0 && !PageLocked(bh->b_page))
+ if (atomic_read(&bh->b_count) == 0 &&
+ !PageLocked(bh->b_page) &&
+ !PageWriteback(bh->b_page))
buffer_error();
clear_buffer_locked(bh);
* via its mapping. Does not take the superblock lock.
*
* If `wait' is true, wait on the writeout.
+ *
+ * FIXME: rename this function.
*/
int sync_buffers(struct block_device *bdev, int wait)
{
int ret;
- ret = filemap_fdatasync(bdev->bd_inode->i_mapping);
+ ret = filemap_fdatawrite(bdev->bd_inode->i_mapping);
if (wait) {
int err;
ret = -EINVAL;
if (!file->f_op || !file->f_op->fsync) {
- /* Why? We can still call filemap_fdatasync */
+ /* Why? We can still call filemap_fdatawrite */
goto out_putf;
}
/* We need to protect against concurrent writers.. */
down(&inode->i_sem);
- ret = filemap_fdatasync(inode->i_mapping);
+ ret = filemap_fdatawait(inode->i_mapping);
+ err = filemap_fdatawrite(inode->i_mapping);
+ if (!ret)
+ ret = err;
err = file->f_op->fsync(file, dentry, 0);
- if (err && !ret)
+ if (!ret)
ret = err;
err = filemap_fdatawait(inode->i_mapping);
- if (err && !ret)
+ if (!ret)
ret = err;
up(&inode->i_sem);
goto out_putf;
down(&inode->i_sem);
- ret = filemap_fdatasync(inode->i_mapping);
+ ret = filemap_fdatawait(inode->i_mapping);
+ err = filemap_fdatawrite(inode->i_mapping);
+ if (!ret)
+ ret = err;
err = file->f_op->fsync(file, dentry, 1);
- if (err && !ret)
+ if (!ret)
ret = err;
err = filemap_fdatawait(inode->i_mapping);
- if (err && !ret)
+ if (!ret)
ret = err;
up(&inode->i_sem);
*/
if (page_uptodate && !PageError(page))
SetPageUptodate(page);
- unlock_page(page);
+ if (PageWriteback(page)) {
+ /* It was a write */
+ end_page_writeback(page);
+ } else {
+ /* read */
+ unlock_page(page);
+ }
return;
still_busy:
bh->b_end_io = end_buffer_io_async;
set_buffer_async(bh);
}
+EXPORT_SYMBOL(set_buffer_async_io);
/*
* osync is designed to support O_SYNC io. It waits synchronously for
if (!PageLocked(page))
BUG();
+ if (PageWriteback(page))
+ return 0;
if (mapping && mapping->a_ops->releasepage)
return mapping->a_ops->releasepage(page, gfp_mask);
struct buffer_head *bh, *head;
int nr_underway = 0;
- if (!PageLocked(page))
- BUG();
+ BUG_ON(!PageLocked(page));
last_block = (inode->i_size - 1) >> inode->i_blkbits;
bh = bh->b_this_page;
} while (bh != head);
+ BUG_ON(PageWriteback(page));
+ SetPageWriteback(page); /* Keeps try_to_free_buffers() away */
+ unlock_page(page);
+
/*
* The page may come unlocked any time after the *first* submit_bh()
* call. Be careful with its buffers.
} while (bh != head);
if (uptodate)
SetPageUptodate(page);
- unlock_page(page);
+ end_page_writeback(page);
}
return err;
recover:
* ENOSPC, or some other error. We may already have added some
* blocks to the file, so we need to write these out to avoid
* exposing stale data.
+ * The page is currently locked and not marked for writeback
*/
ClearPageUptodate(page);
bh = head;
}
bh = next;
} while (bh != head);
+ BUG_ON(PageWriteback(page));
+ SetPageWriteback(page);
+ unlock_page(page);
goto done;
}
*
* FIXME: we need a swapper_inode->get_block function to remove
* some of the bmap kludges and interface ugliness here.
+ *
+ * NOTE: unlike file pages, swap pages are locked while under writeout.
+ * This is to avoid a deadlock which occurs when free_swap_and_cache()
+ * calls block_flushpage() under spinlock and hits a locked buffer, and
+ * schedules under spinlock. Another approach would be to teach
+ * find_trylock_page() to also trylock the page's writeback flags.
*/
int brw_page(int rw, struct page *page,
struct block_device *bdev, sector_t b[], int size)
bh->b_blocknr = *(b++);
bh->b_bdev = bdev;
set_buffer_mapped(bh);
- if (rw == WRITE) /* To support submit_bh debug tests */
+ if (rw == WRITE)
set_buffer_uptodate(bh);
set_buffer_async_io(bh);
bh = bh->b_this_page;
* OTOH it's obviously correct and should make the page up-to-date.
*/
err = mapping->a_ops->readpage(NULL, page);
- wait_on_page(page);
+ wait_on_page_locked(page);
page_cache_release(page);
if (err < 0)
goto fail;
int ret = 0;
BUG_ON(!PageLocked(page));
+ if (PageWriteback(page))
+ return 0;
if (page->mapping == NULL) /* swapped-in anon page */
return drop_buffers(page);
struct page *page = read_cache_page(mapping, n,
(filler_t*)mapping->a_ops->readpage, NULL);
if (!IS_ERR(page)) {
- wait_on_page(page);
+ wait_on_page_locked(page);
kmap(page);
if (!PageUptodate(page))
goto fail;
(filler_t*)mapping->a_ops->readpage, NULL);
if (!IS_ERR(pp)) {
- wait_on_page(pp);
+ wait_on_page_locked(pp);
kmap(pp);
if (!PageUptodate(pp))
goto fail;
inode->i_state &= ~I_DIRTY;
spin_unlock(&inode_lock);
+ if (wait)
+ filemap_fdatawait(mapping);
+
if (mapping->a_ops->writeback_mapping)
mapping->a_ops->writeback_mapping(mapping, nr_to_write);
else
- filemap_fdatasync(mapping);
+ filemap_fdatawrite(mapping);
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC))
if (what & (OSYNC_METADATA|OSYNC_DATA))
err = fsync_inode_buffers(inode);
if (what & OSYNC_DATA) {
- err2 = filemap_fdatasync(inode->i_mapping);
+ err2 = filemap_fdatawrite(inode->i_mapping);
if (!err)
err = err2;
}
kunmap(pg);
/* XXX: Does the page get freed automatically? */
- /* AAA: Judging by the unmount getting stuck in __wait_on_page, nope. */
+ /* AAA: Judging by the unmount getting stuck in __wait_on_page_locked, nope. */
page_cache_release(pg);
return ret;
}
/*
* write out dirty pages of bmap
*/
- filemap_fdatasync(ipbmap->i_mapping);
+ filemap_fdatawait(ipbmap->i_mapping);
+ filemap_fdatawrite(ipbmap->i_mapping);
filemap_fdatawait(ipbmap->i_mapping);
ipbmap->i_state |= I_DIRTY;
/*
* write out dirty pages of imap
*/
- filemap_fdatasync(ipimap->i_mapping);
+ filemap_fdatawait(ipimap->i_mapping);
+ filemap_fdatawrite(ipimap->i_mapping);
filemap_fdatawait(ipimap->i_mapping);
diWriteSpecial(ipimap);
jERROR(1, ("diFreeSpecial called with NULL ip!\n"));
return;
}
- filemap_fdatasync(ip->i_mapping);
+ filemap_fdatawait(ip->i_mapping);
+ filemap_fdatawrite(ip->i_mapping);
filemap_fdatawait(ip->i_mapping);
truncate_inode_pages(ip->i_mapping, 0);
iput(ip);
* We need to make sure all of the "written" metapages
* actually make it to disk
*/
- filemap_fdatasync(sbi->ipbmap->i_mapping);
- filemap_fdatasync(sbi->ipimap->i_mapping);
- filemap_fdatasync(sbi->direct_inode->i_mapping);
+ filemap_fdatawait(sbi->ipbmap->i_mapping);
+ filemap_fdatawait(sbi->ipimap->i_mapping);
+ filemap_fdatawait(sbi->direct_inode->i_mapping);
+ filemap_fdatawrite(sbi->ipbmap->i_mapping);
+ filemap_fdatawrite(sbi->ipimap->i_mapping);
+ filemap_fdatawrite(sbi->direct_inode->i_mapping);
filemap_fdatawait(sbi->ipbmap->i_mapping);
filemap_fdatawait(sbi->ipimap->i_mapping);
filemap_fdatawait(sbi->direct_inode->i_mapping);
jfs_ip = JFS_IP(ip);
/*
- * BUGBUG - Should we call filemap_fdatasync here instead
+ * BUGBUG - Should we call filemap_fdatawrite here instead
* of fsync_inode_data?
* If we do, we have a deadlock condition since we may end
* up recursively calling jfs_get_block with the IWRITELOCK
*/
if ((!S_ISDIR(ip->i_mode))
&& (tblk->flag & COMMIT_DELETE) == 0) {
- filemap_fdatasync(ip->i_mapping);
+ filemap_fdatawait(ip->i_mapping);
+ filemap_fdatawrite(ip->i_mapping);
filemap_fdatawait(ip->i_mapping);
}
* We need to clean out the direct_inode pages since this inode
* is not in the inode hash.
*/
- filemap_fdatasync(sbi->direct_inode->i_mapping);
+ filemap_fdatawait(sbi->direct_inode->i_mapping);
+ filemap_fdatawrite(sbi->direct_inode->i_mapping);
filemap_fdatawait(sbi->direct_inode->i_mapping);
truncate_inode_pages(sbi->direct_mapping, 0);
iput(sbi->direct_inode);
jERROR(1, ("jfs_umount failed with return code %d\n", rc));
}
out_mount_failed:
- filemap_fdatasync(sbi->direct_inode->i_mapping);
+ filemap_fdatawait(sbi->direct_inode->i_mapping);
+ filemap_fdatawrite(sbi->direct_inode->i_mapping);
filemap_fdatawait(sbi->direct_inode->i_mapping);
truncate_inode_pages(sbi->direct_mapping, 0);
make_bad_inode(sbi->direct_inode);
struct page *page = read_cache_page(mapping, n,
(filler_t*)mapping->a_ops->readpage, NULL);
if (!IS_ERR(page)) {
- wait_on_page(page);
+ wait_on_page_locked(page);
kmap(page);
if (!PageUptodate(page))
goto fail;
NULL);
if (IS_ERR(page))
goto sync_fail;
- wait_on_page(page);
+ wait_on_page_locked(page);
if (!PageUptodate(page))
goto async_fail;
*ppage = page;
}
/*
- * The following is used by wait_on_page(), generic_file_readahead()
+ * The following is used by wait_on_page_locked(), generic_file_readahead()
* to initiate the completion of any page readahead operations.
*/
static int nfs_sync_page(struct page *page)
* Flush all pending writes before doing anything
* with locks..
*/
- status = filemap_fdatasync(inode->i_mapping);
+ status = filemap_fdatawait(inode->i_mapping);
+ status2 = filemap_fdatawrite(inode->i_mapping);
+ if (!status)
+ status = status2;
down(&inode->i_sem);
status2 = nfs_wb_all(inode);
- if (status2 && !status)
+ if (!status)
status = status2;
up(&inode->i_sem);
status2 = filemap_fdatawait(inode->i_mapping);
- if (status2 && !status)
+ if (!status)
status = status2;
if (status < 0)
return status;
*/
out_ok:
if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
- filemap_fdatasync(inode->i_mapping);
+ filemap_fdatawait(inode->i_mapping);
+ filemap_fdatawrite(inode->i_mapping);
down(&inode->i_sem);
nfs_wb_all(inode); /* we may have slept */
up(&inode->i_sem);
if (!S_ISREG(inode->i_mode))
attr->ia_valid &= ~ATTR_SIZE;
- filemap_fdatasync(inode->i_mapping);
+ filemap_fdatawait(inode->i_mapping);
+ filemap_fdatawrite(inode->i_mapping);
error = nfs_wb_all(inode);
filemap_fdatawait(inode->i_mapping);
if (error)
struct inode *inode = dp->d_inode;
int (*fsync) (struct file *, struct dentry *, int);
- filemap_fdatasync(inode->i_mapping);
+ filemap_fdatawait(inode->i_mapping);
+ filemap_fdatawrite(inode->i_mapping);
if (fop && (fsync = fop->fsync))
fsync(filp, dp, 0);
filemap_fdatawait(inode->i_mapping);
(filler_t*)mapping->a_ops->readpage, NULL);
if (!IS_ERR(page)) {
- wait_on_page(page);
+ wait_on_page_locked(page);
kmap(page);
if (PageUptodate(page) && !PageError(page))
return page;
"page (index 0x%lx).", index - 1);
continue;
}
- wait_on_page(page);
+ wait_on_page_locked(page);
if (!PageUptodate(page)) {
ntfs_debug("Async read_cache_page() error. Skipping "
"page (index 0x%lx).", index - 1);
"page (index 0x%lx).", index - 1);
continue;
}
- wait_on_page(page);
+ wait_on_page_locked(page);
if (!PageUptodate(page)) {
ntfs_debug("Async read_cache_page() error. Skipping "
"page (index 0x%lx).", index - 1);
page = read_cache_page(mapping, n/sect,
(filler_t *)mapping->a_ops->readpage, NULL);
if (!IS_ERR(page)) {
- wait_on_page(page);
+ wait_on_page_locked(page);
if (!PageUptodate(page))
goto fail;
if (PageError(page))
block++ ;
} while(bh != head) ;
+ if (!partial)
+ SetPageUptodate(page) ;
+ BUG_ON(PageWriteback(page));
+ SetPageWriteback(page);
+ unlock_page(page);
+
/* if this page only had a direct item, it is very possible for
** nr == 0 without there being any kind of error.
*/
if (nr) {
submit_bh_for_writepage(arr, nr) ;
} else {
- unlock_page(page) ;
+ end_page_writeback(page) ;
}
- if (!partial)
- SetPageUptodate(page) ;
return 0 ;
fail:
if (nr) {
+ SetPageWriteback(page);
+ unlock_page(page);
submit_bh_for_writepage(arr, nr) ;
} else {
unlock_page(page) ;
/* We must flush any dirty pages now as we won't be able to
write anything after close. mmap can trigger this.
"openers" should perhaps include mmap'ers ... */
- filemap_fdatasync(inode->i_mapping);
+ filemap_fdatawait(inode->i_mapping);
+ filemap_fdatawrite(inode->i_mapping);
filemap_fdatawait(inode->i_mapping);
smb_close(inode);
}
DENTRY_PATH(dentry),
(long) inode->i_size, (long) attr->ia_size);
- filemap_fdatasync(inode->i_mapping);
+ filemap_fdatawait(inode->i_mapping);
+ filemap_fdatawrite(inode->i_mapping);
filemap_fdatawait(inode->i_mapping);
error = smb_open(dentry, O_WRONLY);
struct page *page = read_cache_page(mapping, n,
(filler_t*)mapping->a_ops->readpage, NULL);
if (!IS_ERR(page)) {
- wait_on_page(page);
+ wait_on_page_locked(page);
kmap(page);
if (!PageUptodate(page))
goto fail;
dentry_dst=(struct dentry *)page;
if (IS_ERR(page))
goto out;
- wait_on_page(page);
+ wait_on_page_locked(page);
if (!PageUptodate(page))
goto async_fail;
(filler_t*)mapping->a_ops->readpage, NULL);
if (IS_ERR(page))
goto sync_fail;
- wait_on_page(page);
+ wait_on_page_locked(page);
if (!PageUptodate(page))
goto async_fail;
p = (struct umsdos_dirent*)(kmap(page)+offs);
page = page2;
goto sync_fail;
}
- wait_on_page(page2);
+ wait_on_page_locked(page2);
if (!PageUptodate(page2)) {
kunmap(page);
page_cache_release(page2);
page = read_cache_page(mapping,index,readpage,NULL);
if (IS_ERR(page))
goto sync_fail;
- wait_on_page(page);
+ wait_on_page_locked(page);
if (!PageUptodate(page))
goto async_fail;
p = kmap(page);
page = next_page;
goto sync_fail;
}
- wait_on_page(next_page);
+ wait_on_page_locked(next_page);
if (!PageUptodate(next_page)) {
page_cache_release(page);
page = next_page;
/* reiserfs_writepage needs this */
-void set_buffer_async_io(struct buffer_head *bh) ;
+void set_buffer_async_io(struct buffer_head *bh);
void invalidate_inode_buffers(struct inode *);
void invalidate_bdev(struct block_device *, int);
void __invalidate_buffers(kdev_t dev, int);
extern void invalidate_inode_pages2(struct address_space *);
extern void write_inode_now(struct inode *, int);
extern void sync_inodes_sb(struct super_block *);
-extern int filemap_fdatasync(struct address_space *);
+extern int filemap_fdatawrite(struct address_space *);
extern int filemap_fdatawait(struct address_space *);
extern void sync_supers(void);
extern int bmap(struct inode *, int);
* table, they should be so rare as to be outweighed by the
* benefits from the saved space.
*
- * __wait_on_page() and unlock_page() in mm/filemap.c, are the
+ * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
* primary users of these fields, and in mm/page_alloc.c
* free_area_init_core() performs the initialization of them.
*/
#define PG_launder 12 /* written out by VM pressure.. */
#define PG_private 13 /* Has something at ->private */
+#define PG_writeback 14 /* Page is under writeback */
/*
* Global page accounting. One instance per CPU.
#define ClearPagePrivate(page) clear_bit(PG_private, &(page)->flags)
#define PagePrivate(page) test_bit(PG_private, &(page)->flags)
+#define PageWriteback(page) test_bit(PG_writeback, &(page)->flags)
+#define SetPageWriteback(page) set_bit(PG_writeback, &(page)->flags)
+#define ClearPageWriteback(page) clear_bit(PG_writeback, &(page)->flags)
+#define TestSetPageWriteback(page) \
+ test_and_set_bit(PG_writeback, &(page)->flags)
+#define TestClearPageWriteback(page) \
+ test_and_clear_bit(PG_writeback, &(page)->flags)
+
/*
* The PageSwapCache predicate doesn't use a PG_flag at this time,
* but it may again do so one day.
extern void FASTCALL(lock_page(struct page *page));
extern void FASTCALL(unlock_page(struct page *page));
+extern void end_page_writeback(struct page *page);
-extern void ___wait_on_page(struct page *);
+extern void ___wait_on_page_locked(struct page *);
-static inline void wait_on_page(struct page * page)
+static inline void wait_on_page_locked(struct page *page)
{
if (PageLocked(page))
- ___wait_on_page(page);
+ ___wait_on_page_locked(page);
}
extern void wake_up_page(struct page *);
+extern void wait_on_page_writeback(struct page *page);
typedef int filler_t(void *, struct page*);
EXPORT_SYMBOL(__d_path);
EXPORT_SYMBOL(mark_buffer_dirty);
EXPORT_SYMBOL(end_buffer_io_sync);
-EXPORT_SYMBOL(set_buffer_async_io);
EXPORT_SYMBOL(__mark_inode_dirty);
EXPORT_SYMBOL(get_empty_filp);
EXPORT_SYMBOL(init_private_file);
EXPORT_SYMBOL(submit_bh);
EXPORT_SYMBOL(unlock_buffer);
EXPORT_SYMBOL(__wait_on_buffer);
-EXPORT_SYMBOL(___wait_on_page);
EXPORT_SYMBOL(generic_direct_IO);
EXPORT_SYMBOL(block_write_full_page);
EXPORT_SYMBOL(block_read_full_page);
EXPORT_SYMBOL(dentry_open);
EXPORT_SYMBOL(filemap_nopage);
EXPORT_SYMBOL(filemap_sync);
-EXPORT_SYMBOL(filemap_fdatasync);
+EXPORT_SYMBOL(filemap_fdatawrite);
EXPORT_SYMBOL(filemap_fdatawait);
EXPORT_SYMBOL(lock_page);
EXPORT_SYMBOL(unlock_page);
int failed;
page_cache_get(page);
+ if (PageWriteback(page)) {
+ /*
+ * urgggh. This function is utterly foul,
+ * and this addition doesn't help. Kill.
+ */
+ write_unlock(&mapping->page_lock);
+ wait_on_page_writeback(page);
+ unlocked = 1;
+ write_lock(&mapping->page_lock);
+ goto restart;
+ }
failed = TestSetPageLocked(page);
list_del(head);
unlock_page(page);
} else
- wait_on_page(page);
+ wait_on_page_locked(page);
page_cache_release(page);
return unlocked;
}
+/*
+ * Unconditionally clean all pages outside `start'. The mapping lock
+ * must be held.
+ */
+static void clean_list_pages(struct address_space *mapping,
+ struct list_head *head, unsigned long start)
+{
+ struct page *page;
+ struct list_head *curr;
+
+ for (curr = head->next; curr != head; curr = curr->next) {
+ page = list_entry(curr, struct page, list);
+ if (page->index > start)
+ ClearPageDirty(page);
+ }
+}
+
/**
* truncate_inode_pages - truncate *all* the pages from an offset
* @mapping: mapping to truncate
int unlocked;
write_lock(&mapping->page_lock);
+ clean_list_pages(mapping, &mapping->io_pages, start);
+ clean_list_pages(mapping, &mapping->dirty_pages, start);
do {
unlocked |= truncate_list_pages(mapping,
&mapping->io_pages, start, &partial);
while (curr != head) {
page = list_entry(curr, struct page, list);
+ if (PageWriteback(page)) {
+ write_unlock(&mapping->page_lock);
+ wait_on_page_writeback(page);
+ unlocked = 1;
+ write_lock(&mapping->page_lock);
+ goto restart;
+ }
if (!TestSetPageLocked(page)) {
int __unlocked;
page_cache_get(page);
write_unlock(&mapping->page_lock);
unlocked = 1;
- wait_on_page(page);
+ wait_on_page_locked(page);
}
page_cache_release(page);
unlock_page(page);
return 0;
}
-
EXPORT_SYMBOL(fail_writepage);
/**
- * filemap_fdatasync - walk the list of dirty pages of the given address space
+ * filemap_fdatawrite - walk the list of dirty pages of the given address space
* and writepage() all of them.
*
* @mapping: address space structure to write
*
*/
-int filemap_fdatasync(struct address_space *mapping)
+int filemap_fdatawrite(struct address_space *mapping)
{
if (mapping->a_ops->writeback_mapping)
return mapping->a_ops->writeback_mapping(mapping, NULL);
struct page *page = list_entry(mapping->locked_pages.next, struct page, list);
list_del(&page->list);
- list_add(&page->list, &mapping->clean_pages);
+ if (PageDirty(page))
+ list_add(&page->list, &mapping->dirty_pages);
+ else
+ list_add(&page->list, &mapping->clean_pages);
- if (!PageLocked(page))
+ if (!PageWriteback(page))
continue;
page_cache_get(page);
write_unlock(&mapping->page_lock);
- ___wait_on_page(page);
+ wait_on_page_writeback(page);
if (PageError(page))
ret = -EIO;
return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)];
}
-/*
- * Wait for a page to get unlocked.
- *
- * This must be called with the caller "holding" the page,
- * ie with increased "page->count" so that the page won't
- * go away during the wait..
- */
-void ___wait_on_page(struct page *page)
+static void wait_on_page_bit(struct page *page, int bit_nr)
{
wait_queue_head_t *waitqueue = page_waitqueue(page);
struct task_struct *tsk = current;
add_wait_queue(waitqueue, &wait);
do {
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
- if (!PageLocked(page))
+ if (!test_bit(bit_nr, &page->flags))
break;
sync_page(page);
schedule();
- } while (PageLocked(page));
+ } while (test_bit(bit_nr, &page->flags));
__set_task_state(tsk, TASK_RUNNING);
remove_wait_queue(waitqueue, &wait);
}
-/*
- * Unlock the page and wake up sleepers in ___wait_on_page.
+/*
+ * Wait for a page to be unlocked.
+ *
+ * This must be called with the caller "holding" the page,
+ * ie with increased "page->count" so that the page won't
+ * go away during the wait..
+ */
+void ___wait_on_page_locked(struct page *page)
+{
+ wait_on_page_bit(page, PG_locked_dontuse);
+}
+EXPORT_SYMBOL(___wait_on_page_locked);
+
+/*
+ * Wait for a page to complete writeback
+ */
+void wait_on_page_writeback(struct page *page)
+{
+ wait_on_page_bit(page, PG_writeback);
+}
+EXPORT_SYMBOL(wait_on_page_writeback);
+
+/**
+ * unlock_page() - unlock a locked page
+ *
+ * @page: the page
+ *
+ * Unlocks the page and wakes up sleepers in ___wait_on_page_locked().
+ * Also wakes sleepers in wait_on_page_writeback() because the wakeup
+ * mechananism between PageLocked pages and PageWriteback pages is shared.
+ * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
*
* The first mb is necessary to safely close the critical section opened by the
* TryLockPage(), the second mb is necessary to enforce ordering between
* the clear_bit and the read of the waitqueue (to avoid SMP races with a
- * parallel wait_on_page).
+ * parallel wait_on_page_locked()).
*/
void unlock_page(struct page *page)
{
wake_up_all(waitqueue);
}
+/*
+ * End writeback against a page.
+ */
+void end_page_writeback(struct page *page)
+{
+ wait_queue_head_t *waitqueue = page_waitqueue(page);
+ clear_bit(PG_launder, &(page)->flags);
+ smp_mb__before_clear_bit();
+ if (!TestClearPageWriteback(page))
+ BUG();
+ smp_mb__after_clear_bit();
+ if (waitqueue_active(waitqueue))
+ wake_up_all(waitqueue);
+}
+EXPORT_SYMBOL(end_page_writeback);
+
/*
* Get a lock on the page, assuming we need to sleep
* to get it..
if (!error) {
if (PageUptodate(page))
goto page_ok;
- wait_on_page(page);
+ wait_on_page_locked(page);
if (PageUptodate(page))
goto page_ok;
error = -EIO;
* Flush to disk exclusively the _data_, metadata must remain
* completly asynchronous or performance will go to /dev/null.
*/
- retval = filemap_fdatasync(mapping);
+ retval = filemap_fdatawait(mapping);
+ if (retval == 0)
+ retval = filemap_fdatawrite(mapping);
if (retval == 0)
retval = filemap_fdatawait(mapping);
if (retval < 0)
}
if (!mapping->a_ops->readpage(file, page)) {
- wait_on_page(page);
+ wait_on_page_locked(page);
if (PageUptodate(page))
goto success;
}
}
ClearPageError(page);
if (!mapping->a_ops->readpage(file, page)) {
- wait_on_page(page);
+ wait_on_page_locked(page);
if (PageUptodate(page))
goto success;
}
return -EINVAL;
/* Try again... */
- wait_on_page(page);
+ wait_on_page_locked(page);
}
if (++repeat < 16)
if (!ret && (flags & (MS_SYNC|MS_ASYNC))) {
struct inode * inode = file->f_dentry->d_inode;
+ int err;
down(&inode->i_sem);
- ret = filemap_fdatasync(inode->i_mapping);
+ ret = filemap_fdatawait(inode->i_mapping);
+ err = filemap_fdatawrite(inode->i_mapping);
+ if (!ret)
+ ret = err;
if (flags & MS_SYNC) {
- int err;
-
if (file->f_op && file->f_op->fsync) {
err = file->f_op->fsync(file, file->f_dentry, 1);
if (err && !ret)
ret = err;
}
err = filemap_fdatawait(inode->i_mapping);
- if (err && !ret)
+ if (!ret)
ret = err;
}
up(&inode->i_sem);
* address_space_operation for filesystems which are using multipage BIO
* writeback.
*
- * We need to be careful to avoid deadlocks here. mpage_bio_writepage() does
- * not immediately start I/O against each page. It waits until the bio is
- * full, or until mpage_bio_flush() is called. So generic_writeback_mapping()
- * is locking multiple pages without necessarily starting I/O against them.
- *
- * AB/BA deadlocks are avoided via locking implemented in the filesystem.
- * Only one process ever has multiple locked pages against any mapping.
- *
- * FIXME: doing the locking in the fs is a bit grotty, but it allows us to
- * not have to put a new semaphore in struct inode. The fs could
- * pass its bio_write_state up here, I guess.
+ * (The next two paragraphs refer to code which isn't here yet, but they
+ * explain the presence of address_space.io_pages)
*
* Pages can be moved from clean_pages or locked_pages onto dirty_pages
* at any time - it's not possible to lock against that. So pages which
* have already been added to a BIO may magically reappear on the dirty_pages
* list. And generic_writeback_mapping() will again try to lock those pages.
- * But I/O has not yet been started agains the page. Thus deadlock.
+ * But I/O has not yet been started against the page. Thus deadlock.
*
* To avoid this, the entire contents of the dirty_pages list are moved
* onto io_pages up-front. We then walk io_pages, locking the
* This has the added benefit of preventing a livelock which would otherwise
* occur if pages are being dirtied faster than we can write them out.
*
- * Thus generic_writeback_mapping() only makes the guarantee that all pages
- * which were dirty at the time it was called will have I/O started against
- * them. And it's not possible to make a stronger guarantee than that.
+ * If a page is already under I/O, generic_writeback_mapping() skips it, even
+ * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
+ * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
+ * and msync() need to guarentee that all the data which was dirty at the time
+ * the call was made get new I/O started against them. The way to do this is
+ * to run filemap_fdatawait() before calling filemap_fdatawrite().
+ *
+ * It's fairly rare for PageWriteback pages to be on ->dirty_pages. It
+ * means that someone redirtied the page while it was under I/O.
*/
int generic_writeback_mapping(struct address_space *mapping, int *nr_to_write)
{
struct page *page = list_entry(mapping->io_pages.prev,
struct page, list);
list_del(&page->list);
- list_add(&page->list, &mapping->locked_pages);
- if (!PageDirty(page))
+ if (PageWriteback(page)) {
+ if (PageDirty(page)) {
+ list_add(&page->list, &mapping->dirty_pages);
+ continue;
+ }
+ list_add(&page->list, &mapping->locked_pages);
+ continue;
+ }
+ if (!PageDirty(page)) {
+ list_add(&page->list, &mapping->clean_pages);
continue;
+ }
+ list_add(&page->list, &mapping->locked_pages);
page_cache_get(page);
write_unlock(&mapping->page_lock);
if (*nr_to_write <= 0)
done = 1;
}
- } else
+ } else {
unlock_page(page);
+ }
page_cache_release(page);
write_lock(&mapping->page_lock);
BUG_ON(!PageLocked(page));
+ if (wait && PageWriteback(page))
+ wait_on_page_writeback(page);
+
write_lock(&mapping->page_lock);
list_del(&page->list);
- list_add(&page->list, &mapping->locked_pages);
- write_unlock(&mapping->page_lock);
-
if (TestClearPageDirty(page)) {
+ list_add(&page->list, &mapping->locked_pages);
page_cache_get(page);
+ write_unlock(&mapping->page_lock);
ret = mapping->a_ops->writepage(page);
if (ret == 0 && wait) {
- wait_on_page(page);
+ wait_on_page_writeback(page);
if (PageError(page))
ret = -EIO;
}
page_cache_release(page);
} else {
+ list_add(&page->list, &mapping->clean_pages);
+ write_unlock(&mapping->page_lock);
unlock_page(page);
}
return ret;
BUG();
if (PageActive(page))
BUG();
+ if (PageWriteback(page))
+ BUG();
ClearPageDirty(page);
page->flags &= ~(1<<PG_referenced);
BUG();
if (PageDirty(page))
BUG();
+ if (PageWriteback(page))
+ BUG();
break;
}
page->mapping = &swapper_space;
if (!rw_swap_page_base(rw, entry, page))
unlock_page(page);
- wait_on_page(page);
+ if (rw == WRITE)
+ wait_on_page_writeback(page);
+ else
+ wait_on_page_locked(page);
page->mapping = NULL;
}
}
/*
- * Do this now, rather than at the next wait_on_page().
+ * Do this now, rather than at the next wait_on_page_locked().
*/
run_task_queue(&tq_disk);
goto repeat;
return ERR_PTR(-ENOMEM);
}
- wait_on_page(page);
+ wait_on_page_locked(page);
if (!PageUptodate(page) && entry->val == swap.val) {
page_cache_release(page);
return ERR_PTR(-EIO);
wait_retry:
spin_unlock (&info->lock);
- wait_on_page(page);
+ wait_on_page_locked(page);
page_cache_release(page);
goto repeat;
}
* Wait for and lock page. When do_swap_page races with
* try_to_unuse, do_swap_page can handle the fault much
* faster than try_to_unuse can locate the entry. This
- * apparently redundant "wait_on_page" lets try_to_unuse
+ * apparently redundant "wait_on_page_locked" lets try_to_unuse
* defer to do_swap_page in such a case - in some tests,
* do_swap_page and try_to_unuse repeatedly compete.
*/
- wait_on_page(page);
+ wait_on_page_locked(page);
+ wait_on_page_writeback(page);
lock_page(page);
/*
if (TestSetPageLocked(page))
return 0;
+ if (PageWriteback(page))
+ goto out_unlock;
+
/* From this point on, the odds are that we're going to
* nuke this pte, so read and clear the pte. This hook
* is needed on CPUs which update the accessed and dirty
/* No swap space left */
preserve:
set_pte(page_table, pte);
+out_unlock:
unlock_page(page);
return 0;
}
* The page is locked. IO in progress?
* Move it to the back of the list.
*/
- if (unlikely(TestSetPageLocked(page))) {
+ if (unlikely(PageWriteback(page))) {
if (PageLaunder(page) && (gfp_mask & __GFP_FS)) {
page_cache_get(page);
spin_unlock(&pagemap_lru_lock);
- wait_on_page(page);
+ wait_on_page_writeback(page);
page_cache_release(page);
spin_lock(&pagemap_lru_lock);
}
continue;
}
+ if (TestSetPageLocked(page))
+ continue;
+
+ if (PageWriteback(page)) { /* The non-racy check */
+ unlock_page(page);
+ continue;
+ }
+
mapping = page->mapping;
if (PageDirty(page) && is_page_cache_freeable(page) &&
writeback = a_ops->vm_writeback;
writepage = a_ops->writepage;
if (writeback || writepage) {
- ClearPageDirty(page);
SetPageLaunder(page);
page_cache_get(page);
spin_unlock(&pagemap_lru_lock);
+ ClearPageDirty(page);
if (writeback) {
int nr_to_write = WRITEOUT_PAGES;