expand_stack(), it is hard to come up with a destructive scenario without
having the vmlist protection in this case.
-The page_table_lock nests with the inode i_shared_sem and the kmem cache
+The page_table_lock nests with the inode i_mmap_lock and the kmem cache
c_spinlock spinlocks. This is okay, since the kmem code asks for pages after
dropping c_spinlock. The page_table_lock also nests with pagecache_lock and
pagemap_lru_lock spinlocks, and no code asks for memory with these locks
pgoff = offset >> HPAGE_SHIFT;
inode->i_size = offset;
- down(&mapping->i_shared_sem);
+ spin_lock(&mapping->i_mmap_lock);
if (!list_empty(&mapping->i_mmap))
hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
if (!list_empty(&mapping->i_mmap_shared))
hugetlb_vmtruncate_list(&mapping->i_mmap_shared, pgoff);
- up(&mapping->i_shared_sem);
+ spin_unlock(&mapping->i_mmap_lock);
truncate_hugepages(mapping, offset);
return 0;
}
init_rwsem(&inode->i_alloc_sem);
INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
spin_lock_init(&inode->i_data.tree_lock);
- init_MUTEX(&inode->i_data.i_shared_sem);
+ spin_lock_init(&inode->i_data.i_mmap_lock);
atomic_set(&inode->i_data.truncate_count, 0);
INIT_LIST_HEAD(&inode->i_data.private_list);
spin_lock_init(&inode->i_data.private_lock);
struct address_space_operations *a_ops; /* methods */
struct list_head i_mmap; /* list of private mappings */
struct list_head i_mmap_shared; /* list of shared mappings */
- struct semaphore i_shared_sem; /* protect both above lists */
+ spinlock_t i_mmap_lock; /* protect both above lists */
atomic_t truncate_count; /* Cover race condition with truncate */
unsigned long flags; /* error bits/gfp mask */
struct backing_dev_info *backing_dev_info; /* device readahead, etc */
struct address_space *check_mapping; /* Check page->mapping if set */
pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */
+ int atomic; /* May not schedule() */
};
void zap_page_range(struct vm_area_struct *vma, unsigned long address,
static inline int make_page_exclusive(struct vm_area_struct *vma,
unsigned long addr)
{
- switch (handle_mm_fault(vma->vm_mm, vma, addr, 1)) {
- case VM_FAULT_MINOR:
- case VM_FAULT_MAJOR:
+ if (handle_mm_fault(vma->vm_mm, vma, addr, 1) != VM_FAULT_OOM)
return 0;
- case VM_FAULT_OOM:
- return -ENOMEM;
- default:
- return -EFAULT;
- }
+ return -ENOMEM;
}
/*
atomic_dec(&inode->i_writecount);
/* insert tmp into the share list, just after mpnt */
- down(&file->f_mapping->i_shared_sem);
+ spin_lock(&file->f_mapping->i_mmap_lock);
list_add(&tmp->shared, &mpnt->shared);
- up(&file->f_mapping->i_shared_sem);
+ spin_unlock(&file->f_mapping->i_mmap_lock);
}
/*
/*
* Lock ordering:
*
- * ->i_shared_sem (vmtruncate)
+ * ->i_mmap_lock (vmtruncate)
* ->private_lock (__free_pte->__set_page_dirty_buffers)
* ->swap_list_lock
* ->swap_device_lock (exclusive_swap_page, others)
* ->mapping->tree_lock
*
* ->i_sem
- * ->i_shared_sem (truncate->unmap_mapping_range)
+ * ->i_mmap_lock (truncate->unmap_mapping_range)
*
* ->mmap_sem
- * ->i_shared_sem (various places)
+ * ->i_mmap_lock (various places)
*
* ->mmap_sem
* ->lock_page (access_process_vm)
static long madvise_dontneed(struct vm_area_struct * vma,
unsigned long start, unsigned long end)
{
- struct zap_details details;
-
if (vma->vm_flags & VM_LOCKED)
return -EINVAL;
if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
- details.check_mapping = NULL;
- details.nonlinear_vma = vma;
- details.first_index = 0;
- details.last_index = ULONG_MAX;
+ struct zap_details details = {
+ .nonlinear_vma = vma,
+ .last_index = ULONG_MAX,
+ };
zap_page_range(vma, start, end - start, &details);
} else
zap_page_range(vma, start, end - start, NULL);
if (offset + size > PMD_SIZE)
size = PMD_SIZE - offset;
size &= PAGE_MASK;
+ if (details && !details->check_mapping && !details->nonlinear_vma)
+ details = NULL;
for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
pte_t pte = *ptep;
if (pte_none(pte))
unsigned long tlb_start = 0; /* For tlb_finish_mmu */
int tlb_start_valid = 0;
int ret = 0;
+ int atomic = details && details->atomic;
for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
unsigned long start;
zap_bytes -= block;
if ((long)zap_bytes > 0)
continue;
- if (need_resched()) {
+ if (!atomic && need_resched()) {
int fullmm = tlb_is_full_mm(*tlbp);
tlb_finish_mmu(*tlbp, tlb_start, start);
cond_resched_lock(&mm->page_table_lock);
unsigned long end = address + size;
unsigned long nr_accounted = 0;
- might_sleep();
-
if (is_vm_hugetlb_page(vma)) {
zap_hugepage_range(vma, address, size);
return;
zea = vea;
zap_page_range(vma,
((zba - vba) << PAGE_SHIFT) + vma->vm_start,
- (zea - zba + 1) << PAGE_SHIFT,
- details->check_mapping? details: NULL);
+ (zea - zba + 1) << PAGE_SHIFT, details);
}
}
* but 0 when invalidating pagecache, don't throw away private data.
*/
void unmap_mapping_range(struct address_space *mapping,
- loff_t const holebegin, loff_t const holelen, int even_cows)
+ loff_t const holebegin, loff_t const holelen, int even_cows)
{
struct zap_details details;
pgoff_t hba = holebegin >> PAGE_SHIFT;
details.nonlinear_vma = NULL;
details.first_index = hba;
details.last_index = hba + hlen - 1;
+ details.atomic = 1; /* A spinlock is held */
if (details.last_index < details.first_index)
details.last_index = ULONG_MAX;
- down(&mapping->i_shared_sem);
+ spin_lock(&mapping->i_mmap_lock);
/* Protect against page fault */
atomic_inc(&mapping->truncate_count);
if (unlikely(!list_empty(&mapping->i_mmap)))
if (unlikely(!list_empty(&mapping->i_mmap_shared)))
unmap_mapping_range_list(&mapping->i_mmap_shared, &details);
- up(&mapping->i_shared_sem);
+ spin_unlock(&mapping->i_mmap_lock);
}
EXPORT_SYMBOL(unmap_mapping_range);
EXPORT_SYMBOL(vm_committed_space);
/*
- * Requires inode->i_mapping->i_shared_sem
+ * Requires inode->i_mapping->i_mmap_lock
*/
static inline void
__remove_shared_vm_struct(struct vm_area_struct *vma, struct inode *inode)
if (file) {
struct address_space *mapping = file->f_mapping;
- down(&mapping->i_shared_sem);
+ spin_lock(&mapping->i_mmap_lock);
__remove_shared_vm_struct(vma, file->f_dentry->d_inode);
- up(&mapping->i_shared_sem);
+ spin_unlock(&mapping->i_mmap_lock);
}
}
mapping = vma->vm_file->f_mapping;
if (mapping)
- down(&mapping->i_shared_sem);
+ spin_lock(&mapping->i_mmap_lock);
spin_lock(&mm->page_table_lock);
__vma_link(mm, vma, prev, rb_link, rb_parent);
spin_unlock(&mm->page_table_lock);
if (mapping)
- up(&mapping->i_shared_sem);
+ spin_unlock(&mapping->i_mmap_lock);
mark_mm_hugetlb(mm, vma);
mm->map_count++;
/*
* Insert vm structure into process list sorted by address and into the inode's
* i_mmap ring. The caller should hold mm->page_table_lock and
- * ->f_mappping->i_shared_sem if vm_file is non-NULL.
+ * ->f_mappping->i_mmap_lock if vm_file is non-NULL.
*/
static void
__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
{
spinlock_t *lock = &mm->page_table_lock;
struct inode *inode = file ? file->f_dentry->d_inode : NULL;
- struct semaphore *i_shared_sem;
+ spinlock_t *i_mmap_lock;
/*
* We later require that vma->vm_flags == vm_flags, so this tests
if (vm_flags & VM_SPECIAL)
return NULL;
- i_shared_sem = file ? &file->f_mapping->i_shared_sem : NULL;
+ i_mmap_lock = file ? &file->f_mapping->i_mmap_lock : NULL;
if (!prev) {
prev = rb_entry(rb_parent, struct vm_area_struct, vm_rb);
if (unlikely(file && prev->vm_next &&
prev->vm_next->vm_file == file)) {
- down(i_shared_sem);
+ spin_lock(i_mmap_lock);
need_up = 1;
}
spin_lock(lock);
__remove_shared_vm_struct(next, inode);
spin_unlock(lock);
if (need_up)
- up(i_shared_sem);
+ spin_unlock(i_mmap_lock);
if (file)
fput(file);
}
spin_unlock(lock);
if (need_up)
- up(i_shared_sem);
+ spin_unlock(i_mmap_lock);
return prev;
}
return NULL;
if (end == prev->vm_start) {
if (file)
- down(i_shared_sem);
+ spin_lock(i_mmap_lock);
spin_lock(lock);
prev->vm_start = addr;
prev->vm_pgoff -= (end - addr) >> PAGE_SHIFT;
spin_unlock(lock);
if (file)
- up(i_shared_sem);
+ spin_unlock(i_mmap_lock);
return prev;
}
}
mapping = vma->vm_file->f_mapping;
if (mapping)
- down(&mapping->i_shared_sem);
+ spin_lock(&mapping->i_mmap_lock);
spin_lock(&mm->page_table_lock);
if (new_below) {
spin_unlock(&mm->page_table_lock);
if (mapping)
- up(&mapping->i_shared_sem);
+ spin_unlock(&mapping->i_mmap_lock);
return 0;
}
/* Insert vm structure into process list sorted by address
* and into the inode's i_mmap ring. If vm_file is non-NULL
- * then i_shared_sem is taken here.
+ * then i_mmap_lock is taken here.
*/
void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
{
unsigned long new_len, unsigned long new_addr)
{
struct mm_struct *mm = vma->vm_mm;
- struct address_space *mapping = NULL;
struct vm_area_struct *new_vma;
unsigned long vm_flags = vma->vm_flags;
unsigned long new_pgoff;
if (!new_vma)
return -ENOMEM;
- if (vma->vm_file) {
- /*
- * Subtle point from Rajesh Venkatasubramanian: before
- * moving file-based ptes, we must lock vmtruncate out,
- * since it might clean the dst vma before the src vma,
- * and we propagate stale pages into the dst afterward.
- */
- mapping = vma->vm_file->f_mapping;
- down(&mapping->i_shared_sem);
- }
moved_len = move_page_tables(vma, new_addr, old_addr, old_len, &cows);
if (moved_len < old_len) {
/*
if (cows) /* Downgrade or remove this message later */
printk(KERN_WARNING "%s: mremap moved %d cows\n",
current->comm, cows);
- if (mapping)
- up(&mapping->i_shared_sem);
/* Conceal VM_ACCOUNT so old reservation is not undone */
if (vm_flags & VM_ACCOUNT) {
*
* This function is only called from page_referenced for object-based pages.
*
- * The semaphore address_space->i_shared_sem is tried. If it can't be gotten,
+ * The semaphore address_space->i_mmap_lock is tried. If it can't be gotten,
* assume a reference count of 0, so try_to_unmap will then have a go.
*/
static inline int page_referenced_file(struct page *page)
int referenced = 0;
int failed = 0;
- if (down_trylock(&mapping->i_shared_sem))
+ if (!spin_trylock(&mapping->i_mmap_lock))
return 0;
list_for_each_entry(vma, &mapping->i_mmap, shared) {
WARN_ON(!failed);
out:
- up(&mapping->i_shared_sem);
+ spin_unlock(&mapping->i_mmap_lock);
return referenced;
}
*
* This function is only called from try_to_unmap for object-based pages.
*
- * The semaphore address_space->i_shared_sem is tried. If it can't be gotten,
+ * The semaphore address_space->i_mmap_lock is tried. If it can't be gotten,
* return a temporary error.
*/
static inline int try_to_unmap_file(struct page *page)
unsigned long max_nl_cursor = 0;
unsigned long max_nl_size = 0;
- if (down_trylock(&mapping->i_shared_sem))
+ if (!spin_trylock(&mapping->i_mmap_lock))
return ret;
list_for_each_entry(vma, &mapping->i_mmap, shared) {
relock:
page_map_lock(page);
out:
- up(&mapping->i_shared_sem);
+ spin_unlock(&mapping->i_mmap_lock);
return ret;
}