[PATCH] Convert i_shared_sem back to a spinlock

author Andrew Morton <akpm@osdl.org>

Sat, 22 May 2004 15:02:36 +0000 (08:02 -0700)

committer Linus Torvalds <torvalds@ppc970.osdl.org>

Sat, 22 May 2004 15:02:36 +0000 (08:02 -0700)
author Andrew Morton <akpm@osdl.org>
Sat, 22 May 2004 15:02:36 +0000 (08:02 -0700)
committer Linus Torvalds <torvalds@ppc970.osdl.org>
Sat, 22 May 2004 15:02:36 +0000 (08:02 -0700)
diff --git a/Documentation/vm/locking b/Documentation/vm/locking

index d29992e40aa28d789ab7b80bb0def499e2a78f39..c3ef09ae3bb114f484247389776415ce401ed405 100644 (file)
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -66,7 +66,7 @@ in some cases it is not really needed. Eg, vm_start is modified by
  expand_stack(), it is hard to come up with a destructive scenario without 
  having the vmlist protection in this case.
  
-The page_table_lock nests with the inode i_shared_sem and the kmem cache
+The page_table_lock nests with the inode i_mmap_lock and the kmem cache
  c_spinlock spinlocks.  This is okay, since the kmem code asks for pages after
  dropping c_spinlock.  The page_table_lock also nests with pagecache_lock and
  pagemap_lru_lock spinlocks, and no code asks for memory with these locks
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index 4290213f401004bff5ebf469a22e9846b6d391bb..f83ccf8530d807e547e4275fa705f56709a45b66 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -321,12 +321,12 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
         pgoff = offset >> HPAGE_SHIFT;
  
         inode->i_size = offset;
-       down(&mapping->i_shared_sem);
+       spin_lock(&mapping->i_mmap_lock);
         if (!list_empty(&mapping->i_mmap))
                 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
         if (!list_empty(&mapping->i_mmap_shared))
                 hugetlb_vmtruncate_list(&mapping->i_mmap_shared, pgoff);
-       up(&mapping->i_shared_sem);
+       spin_unlock(&mapping->i_mmap_lock);
         truncate_hugepages(mapping, offset);
         return 0;
  }
diff --git a/fs/inode.c b/fs/inode.c

index 4e8cbca353f71fb9adebff5e2281dd81264de551..4897c3a35c848810c5f09b43a8f555a8ae2aa27c 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -196,7 +196,7 @@ void inode_init_once(struct inode *inode)
         init_rwsem(&inode->i_alloc_sem);
         INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
         spin_lock_init(&inode->i_data.tree_lock);
-       init_MUTEX(&inode->i_data.i_shared_sem);
+       spin_lock_init(&inode->i_data.i_mmap_lock);
         atomic_set(&inode->i_data.truncate_count, 0);
         INIT_LIST_HEAD(&inode->i_data.private_list);
         spin_lock_init(&inode->i_data.private_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h

index a10ba0ee9c1ae86f0e6da33f273879a3ab0f32ed..c30ed84a5f7f60e7ab18d2c67917212349f0f0d4 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -331,7 +331,7 @@ struct address_space {
         struct address_space_operations *a_ops; /* methods */
         struct list_head        i_mmap;         /* list of private mappings */
         struct list_head        i_mmap_shared;  /* list of shared mappings */
-       struct semaphore        i_shared_sem;   /* protect both above lists */
+       spinlock_t              i_mmap_lock;    /* protect both above lists */
         atomic_t                truncate_count; /* Cover race condition with truncate */
         unsigned long           flags;          /* error bits/gfp mask */
         struct backing_dev_info *backing_dev_info; /* device readahead, etc */
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 905e87989dd6b6e9a34273f6074ff13ea03c8f52..75cdd2d98da6b71f870a6c266f79fdfd26068db8 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -477,6 +477,7 @@ struct zap_details {
         struct address_space *check_mapping;    /* Check page->mapping if set */
         pgoff_t first_index;                    /* Lowest page->index to unmap */
         pgoff_t last_index;                     /* Highest page->index to unmap */
+       int atomic;                             /* May not schedule() */
  };
  
  void zap_page_range(struct vm_area_struct *vma, unsigned long address,
diff --git a/include/linux/rmap.h b/include/linux/rmap.h

index 33bfd51cf009da2521ce2c003b4a0825f88ccdb0..2e48763371be5e9214cdc7b77efacc8509d51fe9 100644 (file)
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -69,15 +69,9 @@ static inline int mremap_moved_anon_rmap(struct page *page, unsigned long addr)
  static inline int make_page_exclusive(struct vm_area_struct *vma,
                                         unsigned long addr)
  {
-       switch (handle_mm_fault(vma->vm_mm, vma, addr, 1)) {
-       case VM_FAULT_MINOR:
-       case VM_FAULT_MAJOR:
+       if (handle_mm_fault(vma->vm_mm, vma, addr, 1) != VM_FAULT_OOM)
                 return 0;
-       case VM_FAULT_OOM:
-               return -ENOMEM;
-       default:
-               return -EFAULT;
-       }
+       return -ENOMEM;
  }
  
  /*
diff --git a/kernel/fork.c b/kernel/fork.c

index 47b8f8e6f78730960bfe4bcbe1885793f0fb846e..3cb3bc41b0c0858ee45f812df7fdf987a747a5f4 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -324,9 +324,9 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
                                 atomic_dec(&inode->i_writecount);
        
                         /* insert tmp into the share list, just after mpnt */
-                       down(&file->f_mapping->i_shared_sem);
+                       spin_lock(&file->f_mapping->i_mmap_lock);
                         list_add(&tmp->shared, &mpnt->shared);
-                       up(&file->f_mapping->i_shared_sem);
+                       spin_unlock(&file->f_mapping->i_mmap_lock);
                 }
  
                 /*
diff --git a/mm/filemap.c b/mm/filemap.c

index 1dcf80bf791818a966d47feaf1d9196e6812396f..804d96e88fd78a9ec31662ef1661af2c3d3f9dd5 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -55,17 +55,17 @@
  /*
   * Lock ordering:
   *
- *  ->i_shared_sem             (vmtruncate)
+ *  ->i_mmap_lock              (vmtruncate)
   *    ->private_lock           (__free_pte->__set_page_dirty_buffers)
   *      ->swap_list_lock
   *        ->swap_device_lock   (exclusive_swap_page, others)
   *          ->mapping->tree_lock
   *
   *  ->i_sem
- *    ->i_shared_sem           (truncate->unmap_mapping_range)
+ *    ->i_mmap_lock            (truncate->unmap_mapping_range)
   *
   *  ->mmap_sem
- *    ->i_shared_sem           (various places)
+ *    ->i_mmap_lock            (various places)
   *
   *  ->mmap_sem
   *    ->lock_page              (access_process_vm)
diff --git a/mm/madvise.c b/mm/madvise.c

index 4b1360317c7cf7db720048d804fe4e5a7a858777..e54f3906d8ccaf0645495bebb8c57345a8980ae8 100644 (file)
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -92,16 +92,14 @@ static long madvise_willneed(struct vm_area_struct * vma,
  static long madvise_dontneed(struct vm_area_struct * vma,
                              unsigned long start, unsigned long end)
  {
-       struct zap_details details;
-
         if (vma->vm_flags & VM_LOCKED)
                 return -EINVAL;
  
         if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
-               details.check_mapping = NULL;
-               details.nonlinear_vma = vma;
-               details.first_index = 0;
-               details.last_index = ULONG_MAX;
+               struct zap_details details = {
+                       .nonlinear_vma = vma,
+                       .last_index = ULONG_MAX,
+               };
                 zap_page_range(vma, start, end - start, &details);
         } else
                 zap_page_range(vma, start, end - start, NULL);
diff --git a/mm/memory.c b/mm/memory.c

index 1c2ce1f326e596f36a8b9f3d622368cea0bb9fcf..b36b907bcaa04092bca483c1e1f23a1cdbe89f91 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -365,6 +365,8 @@ static void zap_pte_range(struct mmu_gather *tlb,
         if (offset + size > PMD_SIZE)
                 size = PMD_SIZE - offset;
         size &= PAGE_MASK;
+       if (details && !details->check_mapping && !details->nonlinear_vma)
+               details = NULL;
         for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
                 pte_t pte = *ptep;
                 if (pte_none(pte))
@@ -518,6 +520,7 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
         unsigned long tlb_start = 0;    /* For tlb_finish_mmu */
         int tlb_start_valid = 0;
         int ret = 0;
+       int atomic = details && details->atomic;
  
         for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
                 unsigned long start;
@@ -555,7 +558,7 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
                         zap_bytes -= block;
                         if ((long)zap_bytes > 0)
                                 continue;
-                       if (need_resched()) {
+                       if (!atomic && need_resched()) {
                                 int fullmm = tlb_is_full_mm(*tlbp);
                                 tlb_finish_mmu(*tlbp, tlb_start, start);
                                 cond_resched_lock(&mm->page_table_lock);
@@ -583,8 +586,6 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address,
         unsigned long end = address + size;
         unsigned long nr_accounted = 0;
  
-       might_sleep();
-
         if (is_vm_hugetlb_page(vma)) {
                 zap_hugepage_range(vma, address, size);
                 return;
@@ -1133,8 +1134,7 @@ static void unmap_mapping_range_list(struct list_head *head,
                         zea = vea;
                 zap_page_range(vma,
                         ((zba - vba) << PAGE_SHIFT) + vma->vm_start,
-                       (zea - zba + 1) << PAGE_SHIFT,
-                       details->check_mapping? details: NULL);
+                       (zea - zba + 1) << PAGE_SHIFT, details);
         }
  }
  
@@ -1155,7 +1155,7 @@ static void unmap_mapping_range_list(struct list_head *head,
   * but 0 when invalidating pagecache, don't throw away private data.
   */
  void unmap_mapping_range(struct address_space *mapping,
-       loff_t const holebegin, loff_t const holelen, int even_cows)
+               loff_t const holebegin, loff_t const holelen, int even_cows)
  {
         struct zap_details details;
         pgoff_t hba = holebegin >> PAGE_SHIFT;
@@ -1173,10 +1173,11 @@ void unmap_mapping_range(struct address_space *mapping,
         details.nonlinear_vma = NULL;
         details.first_index = hba;
         details.last_index = hba + hlen - 1;
+       details.atomic = 1;     /* A spinlock is held */
         if (details.last_index < details.first_index)
                 details.last_index = ULONG_MAX;
  
-       down(&mapping->i_shared_sem);
+       spin_lock(&mapping->i_mmap_lock);
         /* Protect against page fault */
         atomic_inc(&mapping->truncate_count);
         if (unlikely(!list_empty(&mapping->i_mmap)))
@@ -1187,7 +1188,7 @@ void unmap_mapping_range(struct address_space *mapping,
  
         if (unlikely(!list_empty(&mapping->i_mmap_shared)))
                 unmap_mapping_range_list(&mapping->i_mmap_shared, &details);
-       up(&mapping->i_shared_sem);
+       spin_unlock(&mapping->i_mmap_lock);
  }
  EXPORT_SYMBOL(unmap_mapping_range);
  
diff --git a/mm/mmap.c b/mm/mmap.c

index 54aab8d30d4eaf6fc42e03378d0aaccb5be272f2..0f66a034273d9b7b67970c68288bc584a4977078 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -63,7 +63,7 @@ EXPORT_SYMBOL(sysctl_max_map_count);
  EXPORT_SYMBOL(vm_committed_space);
  
  /*
- * Requires inode->i_mapping->i_shared_sem
+ * Requires inode->i_mapping->i_mmap_lock
   */
  static inline void
  __remove_shared_vm_struct(struct vm_area_struct *vma, struct inode *inode)
@@ -84,9 +84,9 @@ static void remove_shared_vm_struct(struct vm_area_struct *vma)
  
         if (file) {
                 struct address_space *mapping = file->f_mapping;
-               down(&mapping->i_shared_sem);
+               spin_lock(&mapping->i_mmap_lock);
                 __remove_shared_vm_struct(vma, file->f_dentry->d_inode);
-               up(&mapping->i_shared_sem);
+               spin_unlock(&mapping->i_mmap_lock);
         }
  }
  
@@ -286,12 +286,12 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
                 mapping = vma->vm_file->f_mapping;
  
         if (mapping)
-               down(&mapping->i_shared_sem);
+               spin_lock(&mapping->i_mmap_lock);
         spin_lock(&mm->page_table_lock);
         __vma_link(mm, vma, prev, rb_link, rb_parent);
         spin_unlock(&mm->page_table_lock);
         if (mapping)
-               up(&mapping->i_shared_sem);
+               spin_unlock(&mapping->i_mmap_lock);
  
         mark_mm_hugetlb(mm, vma);
         mm->map_count++;
@@ -301,7 +301,7 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
  /*
   * Insert vm structure into process list sorted by address and into the inode's
   * i_mmap ring. The caller should hold mm->page_table_lock and
- * ->f_mappping->i_shared_sem if vm_file is non-NULL.
+ * ->f_mappping->i_mmap_lock if vm_file is non-NULL.
   */
  static void
  __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
@@ -391,7 +391,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
  {
         spinlock_t *lock = &mm->page_table_lock;
         struct inode *inode = file ? file->f_dentry->d_inode : NULL;
-       struct semaphore *i_shared_sem;
+       spinlock_t *i_mmap_lock;
  
         /*
          * We later require that vma->vm_flags == vm_flags, so this tests
@@ -400,7 +400,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
         if (vm_flags & VM_SPECIAL)
                 return NULL;
  
-       i_shared_sem = file ? &file->f_mapping->i_shared_sem : NULL;
+       i_mmap_lock = file ? &file->f_mapping->i_mmap_lock : NULL;
  
         if (!prev) {
                 prev = rb_entry(rb_parent, struct vm_area_struct, vm_rb);
@@ -417,7 +417,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
  
                 if (unlikely(file && prev->vm_next &&
                                 prev->vm_next->vm_file == file)) {
-                       down(i_shared_sem);
+                       spin_lock(i_mmap_lock);
                         need_up = 1;
                 }
                 spin_lock(lock);
@@ -435,7 +435,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
                         __remove_shared_vm_struct(next, inode);
                         spin_unlock(lock);
                         if (need_up)
-                               up(i_shared_sem);
+                               spin_unlock(i_mmap_lock);
                         if (file)
                                 fput(file);
  
@@ -445,7 +445,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
                 }
                 spin_unlock(lock);
                 if (need_up)
-                       up(i_shared_sem);
+                       spin_unlock(i_mmap_lock);
                 return prev;
         }
  
@@ -460,13 +460,13 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
                         return NULL;
                 if (end == prev->vm_start) {
                         if (file)
-                               down(i_shared_sem);
+                               spin_lock(i_mmap_lock);
                         spin_lock(lock);
                         prev->vm_start = addr;
                         prev->vm_pgoff -= (end - addr) >> PAGE_SHIFT;
                         spin_unlock(lock);
                         if (file)
-                               up(i_shared_sem);
+                               spin_unlock(i_mmap_lock);
                         return prev;
                 }
         }
@@ -1232,7 +1232,7 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
                  mapping = vma->vm_file->f_mapping;
  
         if (mapping)
-               down(&mapping->i_shared_sem);
+               spin_lock(&mapping->i_mmap_lock);
         spin_lock(&mm->page_table_lock);
  
         if (new_below) {
@@ -1245,7 +1245,7 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
  
         spin_unlock(&mm->page_table_lock);
         if (mapping)
-               up(&mapping->i_shared_sem);
+               spin_unlock(&mapping->i_mmap_lock);
  
         return 0;
  }
@@ -1479,7 +1479,7 @@ void exit_mmap(struct mm_struct *mm)
  
  /* Insert vm structure into process list sorted by address
   * and into the inode's i_mmap ring.  If vm_file is non-NULL
- * then i_shared_sem is taken here.
+ * then i_mmap_lock is taken here.
   */
  void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
  {
diff --git a/mm/mremap.c b/mm/mremap.c

index b54bbfb5cb795edbdbb774ca1b0b0722650b3189..d7a9b0f3d7cbb10aa08a6a5cc50e851f7c288041 100644 (file)
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -180,7 +180,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
                 unsigned long new_len, unsigned long new_addr)
  {
         struct mm_struct *mm = vma->vm_mm;
-       struct address_space *mapping = NULL;
         struct vm_area_struct *new_vma;
         unsigned long vm_flags = vma->vm_flags;
         unsigned long new_pgoff;
@@ -201,16 +200,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
         if (!new_vma)
                 return -ENOMEM;
  
-       if (vma->vm_file) {
-               /*
-                * Subtle point from Rajesh Venkatasubramanian: before
-                * moving file-based ptes, we must lock vmtruncate out,
-                * since it might clean the dst vma before the src vma,
-                * and we propagate stale pages into the dst afterward.
-                */
-               mapping = vma->vm_file->f_mapping;
-               down(&mapping->i_shared_sem);
-       }
         moved_len = move_page_tables(vma, new_addr, old_addr, old_len, &cows);
         if (moved_len < old_len) {
                 /*
@@ -227,8 +216,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
         if (cows)       /* Downgrade or remove this message later */
                 printk(KERN_WARNING "%s: mremap moved %d cows\n",
                                                         current->comm, cows);
-       if (mapping)
-               up(&mapping->i_shared_sem);
  
         /* Conceal VM_ACCOUNT so old reservation is not undone */
         if (vm_flags & VM_ACCOUNT) {
diff --git a/mm/rmap.c b/mm/rmap.c

index a8267f6945ce4d5283694dd4bdce89caf58011e7..e0290854e30a5e03176b39b06e78c5e665bcdc88 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -300,7 +300,7 @@ migrate:
   *
   * This function is only called from page_referenced for object-based pages.
   *
- * The semaphore address_space->i_shared_sem is tried.  If it can't be gotten,
+ * The semaphore address_space->i_mmap_lock is tried.  If it can't be gotten,
   * assume a reference count of 0, so try_to_unmap will then have a go.
   */
  static inline int page_referenced_file(struct page *page)
@@ -313,7 +313,7 @@ static inline int page_referenced_file(struct page *page)
         int referenced = 0;
         int failed = 0;
  
-       if (down_trylock(&mapping->i_shared_sem))
+       if (!spin_trylock(&mapping->i_mmap_lock))
                 return 0;
  
         list_for_each_entry(vma, &mapping->i_mmap, shared) {
@@ -355,7 +355,7 @@ static inline int page_referenced_file(struct page *page)
  
         WARN_ON(!failed);
  out:
-       up(&mapping->i_shared_sem);
+       spin_unlock(&mapping->i_mmap_lock);
         return referenced;
  }
  
@@ -725,7 +725,7 @@ out:
   *
   * This function is only called from try_to_unmap for object-based pages.
   *
- * The semaphore address_space->i_shared_sem is tried.  If it can't be gotten,
+ * The semaphore address_space->i_mmap_lock is tried.  If it can't be gotten,
   * return a temporary error.
   */
  static inline int try_to_unmap_file(struct page *page)
@@ -740,7 +740,7 @@ static inline int try_to_unmap_file(struct page *page)
         unsigned long max_nl_cursor = 0;
         unsigned long max_nl_size = 0;
  
-       if (down_trylock(&mapping->i_shared_sem))
+       if (!spin_trylock(&mapping->i_mmap_lock))
                 return ret;
  
         list_for_each_entry(vma, &mapping->i_mmap, shared) {
@@ -839,7 +839,7 @@ static inline int try_to_unmap_file(struct page *page)
  relock:
         page_map_lock(page);
  out:
-       up(&mapping->i_shared_sem);
+       spin_unlock(&mapping->i_mmap_lock);
         return ret;
  }
author	Andrew Morton <akpm@osdl.org>
	Sat, 22 May 2004 15:02:36 +0000 (08:02 -0700)
committer	Linus Torvalds <torvalds@ppc970.osdl.org>
	Sat, 22 May 2004 15:02:36 +0000 (08:02 -0700)
Documentation/vm/locking		patch \| blob \| history
fs/hugetlbfs/inode.c		patch \| blob \| history
fs/inode.c		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
include/linux/mm.h		patch \| blob \| history
include/linux/rmap.h		patch \| blob \| history
kernel/fork.c		patch \| blob \| history
mm/filemap.c		patch \| blob \| history
mm/madvise.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/mmap.c		patch \| blob \| history
mm/mremap.c		patch \| blob \| history
mm/rmap.c		patch \| blob \| history