]> git.hungrycats.org Git - linux/commitdiff
[PATCH] Re: [PATCH] Preemptible Kernel for 2.5
authorRobert Love <rml@tech9.net>
Sat, 9 Feb 2002 03:11:35 +0000 (19:11 -0800)
committerLinus Torvalds <torvalds@home.transmeta.com>
Sat, 9 Feb 2002 03:11:35 +0000 (19:11 -0800)
On Sat, 2002-02-09 at 01:43, Linus Torvalds wrote:

> That will clean up all your issues with header file ordering.

You are right, it did.  I removed all the sched.h dependencies and this
reduced the size of the patch greatly.  I now use current_thread_info()
and none of the header or include hackery from before.  I've tested this
with and without preemption enabled with success.

I appreciate your help with this.

Again, this is a minimal i386-only patch.  I have other arches,
documentation, etc.  Patch against 2.5.4-pre5.  Enjoy,

Robert Love

26 files changed:
arch/i386/Config.help
arch/i386/config.in
arch/i386/kernel/entry.S
arch/i386/kernel/i387.c
arch/i386/kernel/smp.c
arch/i386/kernel/traps.c
fs/exec.c
include/asm-i386/hardirq.h
include/asm-i386/highmem.h
include/asm-i386/hw_irq.h
include/asm-i386/i387.h
include/asm-i386/pgalloc.h
include/asm-i386/smplock.h
include/asm-i386/softirq.h
include/asm-i386/spinlock.h
include/asm-i386/thread_info.h
include/linux/brlock.h
include/linux/sched.h
include/linux/smp.h
include/linux/smp_lock.h
include/linux/spinlock.h
kernel/exit.c
kernel/fork.c
kernel/ksyms.c
kernel/sched.c
net/socket.c

index 98b4c7542cd178d0905524325854a894cf8358a3..abd88edb2efa899c1d534d5801fc8d586b63319b 100644 (file)
@@ -25,6 +25,16 @@ CONFIG_SMP
 
   If you don't know what to do here, say N.
 
+CONFIG_PREEMPT
+  This option reduces the latency of the kernel when reacting to
+  real-time or interactive events by allowing a low priority process to
+  be preempted even if it is in kernel mode executing a system call.
+  This allows applications to run more reliably even when the system is
+  under load.
+
+  Say Y here if you are building a kernel for a desktop, embedded
+  or real-time system.  Say N if you are unsure.
+
 CONFIG_X86
   This is Linux's home port.  Linux was originally native to the Intel
   386, and runs on all the later x86 processors including the Intel
index ae7cd0b7d463b8081ef769802ee083abd45283a9..21c2586884c64eebd3d5158f6b1d54be9a7e646d 100644 (file)
@@ -167,6 +167,7 @@ fi
 bool 'Math emulation' CONFIG_MATH_EMULATION
 bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
 bool 'Symmetric multi-processing support' CONFIG_SMP
+bool 'Preemptible Kernel' CONFIG_PREEMPT
 if [ "$CONFIG_SMP" != "y" ]; then
    bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC
    dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC
@@ -180,9 +181,12 @@ else
    bool 'Multiquad NUMA system' CONFIG_MULTIQUAD
 fi
 
-if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
-   define_bool CONFIG_HAVE_DEC_LOCK y
+if [ "$CONFIG_SMP" = "y" -o "$CONFIG_PREEMPT" = "y" ]; then
+   if [ "$CONFIG_X86_CMPXCHG" = "y" ]; then
+      define_bool CONFIG_HAVE_DEC_LOCK y
+   fi
 fi
+
 endmenu
 
 mainmenu_option next_comment
index 65bfc86e6828ff95123f6fd8a55a9c9bd14626d2..f006f47c4d18df2951797eb63200d994df9f8043 100644 (file)
@@ -69,6 +69,37 @@ IF_MASK              = 0x00000200
 NT_MASK                = 0x00004000
 VM_MASK                = 0x00020000
 
+/* These are offsets into the irq_stat structure
+ * There is one per cpu and it is aligned to 32
+ * byte boundry (we put that here as a shift count)
+ */
+irq_array_shift                        = CONFIG_X86_L1_CACHE_SHIFT
+irq_stat_local_irq_count       = 4
+irq_stat_local_bh_count                = 8
+
+#ifdef CONFIG_SMP
+#define GET_CPU_INDX   movl TI_CPU(%ebx),%eax;  \
+                        shll $irq_array_shift,%eax
+#define GET_CURRENT_CPU_INDX GET_THREAD_INFO(%ebx); \
+                             GET_CPU_INDX
+#define CPU_INDX (,%eax)
+#else
+#define GET_CPU_INDX
+#define GET_CURRENT_CPU_INDX GET_THREAD_INFO(%ebx)
+#define CPU_INDX
+#endif
+
+#ifdef CONFIG_PREEMPT
+#define preempt_stop cli
+#define init_ret_intr \
+       cli; \
+       decl TI_PRE_COUNT(%ebx);
+#else
+#define preempt_stop
+#define init_ret_intr
+#define resume_kernel restore_all
+#endif
+
 #define SAVE_ALL \
        cld; \
        pushl %es; \
@@ -176,11 +207,12 @@ ENTRY(ret_from_fork)
        ALIGN
 ENTRY(ret_from_intr)
        GET_THREAD_INFO(%ebx)
+       init_ret_intr
 ret_from_exception:
        movl EFLAGS(%esp),%eax          # mix EFLAGS and CS
        movb CS(%esp),%al
        testl $(VM_MASK | 3),%eax
-       jz restore_all                  # returning to kernel-space or vm86-space
+       jz resume_kernel                # returning to kernel or vm86-space
 ENTRY(resume_userspace)
        cli                             # make sure we don't miss an interrupt setting need_resched
                                        # or sigpending between sampling and the iret
@@ -189,6 +221,22 @@ ENTRY(resume_userspace)
        jne work_pending
        jmp restore_all
 
+#ifdef CONFIG_PREEMPT
+ENTRY(resume_kernel)
+       cmpl $0,TI_PRE_COUNT(%ebx)
+       jnz restore_all
+       movl TI_FLAGS(%ebx),%ecx
+       testb $_TIF_NEED_RESCHED,%cl
+       jz restore_all
+       movl SYMBOL_NAME(irq_stat)+irq_stat_local_bh_count CPU_INDX,%ecx
+       addl SYMBOL_NAME(irq_stat)+irq_stat_local_irq_count CPU_INDX,%ecx
+       jnz restore_all
+       incl TI_PRE_COUNT(%ebx)
+       sti
+       call SYMBOL_NAME(preempt_schedule)
+       jmp ret_from_intr
+#endif
+
        # system call handler stub
        ALIGN
 ENTRY(system_call)
@@ -302,6 +350,7 @@ error_code:
        GET_THREAD_INFO(%ebx)
        call *%edi
        addl $8,%esp
+       preempt_stop
        jmp ret_from_exception
 
 ENTRY(coprocessor_error)
@@ -321,12 +370,14 @@ ENTRY(device_not_available)
        movl %cr0,%eax
        testl $0x4,%eax                 # EM (math emulation bit)
        jne device_not_available_emulate
+       preempt_stop
        call SYMBOL_NAME(math_state_restore)
        jmp ret_from_exception
 device_not_available_emulate:
        pushl $0                # temporary storage for ORIG_EIP
        call  SYMBOL_NAME(math_emulate)
        addl $4,%esp
+       preempt_stop
        jmp ret_from_exception
 
 ENTRY(debug)
index a87a362c4e6a272a27fb3c1536529dfc4d04de47..c237c22fef9ba1f3b7b1138ec727e91811c39108 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/config.h>
 #include <linux/sched.h>
+#include <linux/spinlock.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
 #include <asm/math_emu.h>
@@ -63,6 +64,7 @@ void save_init_fpu( struct task_struct *tsk )
 
 void kernel_fpu_begin(void)
 {
+       preempt_disable();
        if (test_thread_flag(TIF_USEDFPU)) {
                __save_init_fpu(current);
                return;
index af1dc738720634449776c9539f2e61df44d2b4ad..7fdbdfdba7dabc92651e458ba48a4649a63a7bdb 100644 (file)
@@ -497,7 +497,7 @@ void smp_migrate_task(int cpu, task_t *p)
        /*
         * The target CPU will unlock the migration spinlock:
         */
-       spin_lock(&migration_lock);
+       _raw_spin_lock(&migration_lock);
        new_task = p;
        send_IPI_mask(1 << cpu, TASK_MIGRATION_VECTOR);
 }
@@ -511,7 +511,7 @@ asmlinkage void smp_task_migration_interrupt(void)
 
        ack_APIC_irq();
        p = new_task;
-       spin_unlock(&migration_lock);
+       _raw_spin_unlock(&migration_lock);
        sched_task_migrated(p);
 }
 /*
index ad68256f83c564f2fecbb0537758b4e4ad0724f8..7c2c2520d1c19c1871d1631c1b478c63565b99d2 100644 (file)
@@ -710,6 +710,8 @@ asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs,
  *
  * Careful.. There are problems with IBM-designed IRQ13 behaviour.
  * Don't touch unless you *really* know how it works.
+ *
+ * Must be called with kernel preemption disabled.
  */
 asmlinkage void math_state_restore(struct pt_regs regs)
 {
index 3e34704f507c06cfb01ede9a794fb8f2f9cf4190..0167e7c5b8916cea44650c4fa6f36f3655bdd83b 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -420,8 +420,8 @@ static int exec_mmap(void)
                active_mm = current->active_mm;
                current->mm = mm;
                current->active_mm = mm;
-               task_unlock(current);
                activate_mm(active_mm, mm);
+               task_unlock(current);
                mm_release();
                if (old_mm) {
                        if (active_mm != old_mm) BUG();
index 4acb4b09ddc3a081378a00780808efbcc5f83a47..64ef2bcf2a899fe148026ff65dae7ecc9df52365 100644 (file)
@@ -36,6 +36,8 @@ typedef struct {
 
 #define synchronize_irq()      barrier()
 
+#define release_irqlock(cpu)   do { } while (0)
+
 #else
 
 #include <asm/atomic.h>
index 42f32426eac5ec372c7298891051ff4f141dfad3..e8d4f37ae84a5c75c96cba00f652fcd068c385f9 100644 (file)
@@ -88,6 +88,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type)
        enum fixed_addresses idx;
        unsigned long vaddr;
 
+       preempt_disable();
        if (page < highmem_start_page)
                return page_address(page);
 
@@ -109,8 +110,10 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type)
        unsigned long vaddr = (unsigned long) kvaddr;
        enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
 
-       if (vaddr < FIXADDR_START) // FIXME
+       if (vaddr < FIXADDR_START) { // FIXME
+               preempt_enable();
                return;
+       }
 
        if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
                BUG();
@@ -122,6 +125,8 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type)
        pte_clear(kmap_pte-idx);
        __flush_tlb_one(vaddr);
 #endif
+
+       preempt_enable();
 }
 
 #endif /* __KERNEL__ */
index 5b43b7c41d6459d1df763fc5c9e2d50dc7dce279..b572c28744cb86dac0e959ea776529be4cbe5497 100644 (file)
@@ -96,6 +96,18 @@ extern char _stext, _etext;
 #define __STR(x) #x
 #define STR(x) __STR(x)
 
+#define GET_THREAD_INFO \
+       "movl $-8192, %ebx\n\t" \
+       "andl %esp, %ebx\n\t"
+
+#ifdef CONFIG_PREEMPT
+#define BUMP_LOCK_COUNT \
+       GET_THREAD_INFO \
+       "incl 16(%ebx)\n\t"
+#else
+#define BUMP_LOCK_COUNT
+#endif
+
 #define SAVE_ALL \
        "cld\n\t" \
        "pushl %es\n\t" \
@@ -109,7 +121,8 @@ extern char _stext, _etext;
        "pushl %ebx\n\t" \
        "movl $" STR(__KERNEL_DS) ",%edx\n\t" \
        "movl %edx,%ds\n\t" \
-       "movl %edx,%es\n\t"
+       "movl %edx,%es\n\t" \
+       BUMP_LOCK_COUNT
 
 #define IRQ_NAME2(nr) nr##_interrupt(void)
 #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
index 462ec5a234c5855635a5a15dce7aa0b99a621e35..b8b60c2744e98ea8eb4e2f153b26d1f49a4bd69f 100644 (file)
@@ -12,6 +12,7 @@
 #define __ASM_I386_I387_H
 
 #include <linux/sched.h>
+#include <linux/spinlock.h>
 #include <asm/processor.h>
 #include <asm/sigcontext.h>
 #include <asm/user.h>
@@ -24,7 +25,7 @@ extern void save_init_fpu( struct task_struct *tsk );
 extern void restore_fpu( struct task_struct *tsk );
 
 extern void kernel_fpu_begin(void);
-#define kernel_fpu_end() stts()
+#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
 
 
 #define unlazy_fpu( tsk ) do { \
index 090573f59076a9fda711c142de050984b5d2f5e8..67773c0472ca3998c0732c173b819c98d62fe1c7 100644 (file)
@@ -75,20 +75,26 @@ static inline pgd_t *get_pgd_fast(void)
 {
        unsigned long *ret;
 
+       preempt_disable();
        if ((ret = pgd_quicklist) != NULL) {
                pgd_quicklist = (unsigned long *)(*ret);
                ret[0] = 0;
                pgtable_cache_size--;
-       } else
+               preempt_enable();
+       } else {
+               preempt_enable();
                ret = (unsigned long *)get_pgd_slow();
+       }
        return (pgd_t *)ret;
 }
 
 static inline void free_pgd_fast(pgd_t *pgd)
 {
+       preempt_disable();
        *(unsigned long *)pgd = (unsigned long) pgd_quicklist;
        pgd_quicklist = (unsigned long *) pgd;
        pgtable_cache_size++;
+       preempt_enable();
 }
 
 static inline void free_pgd_slow(pgd_t *pgd)
@@ -119,19 +125,23 @@ static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm,
 {
        unsigned long *ret;
 
+       preempt_disable();
        if ((ret = (unsigned long *)pte_quicklist) != NULL) {
                pte_quicklist = (unsigned long *)(*ret);
                ret[0] = ret[1];
                pgtable_cache_size--;
        }
+       preempt_enable();
        return (pte_t *)ret;
 }
 
 static inline void pte_free_fast(pte_t *pte)
 {
+       preempt_disable();
        *(unsigned long *)pte = (unsigned long) pte_quicklist;
        pte_quicklist = (unsigned long *) pte;
        pgtable_cache_size++;
+       preempt_enable();
 }
 
 static __inline__ void pte_free_slow(pte_t *pte)
index c270defe9be48c562f2e2f7a6784641525fcd9bd..199084cce08fffc9f258e1fb7180387b910e0a7c 100644 (file)
 
 extern spinlock_t kernel_flag;
 
+#ifdef CONFIG_SMP
 #define kernel_locked()                spin_is_locked(&kernel_flag)
+#else
+#ifdef CONFIG_PREEMPT
+#define kernel_locked()                preempt_get_count()
+#else
+#define kernel_locked()                1
+#endif
+#endif
 
 /*
  * Release global kernel lock and global interrupt lock
@@ -43,6 +51,11 @@ do {                                         \
  */
 static __inline__ void lock_kernel(void)
 {
+#ifdef CONFIG_PREEMPT
+       if (current->lock_depth == -1)
+               spin_lock(&kernel_flag);
+       ++current->lock_depth;
+#else
 #if 1
        if (!++current->lock_depth)
                spin_lock(&kernel_flag);
@@ -55,6 +68,7 @@ static __inline__ void lock_kernel(void)
                :"=m" (__dummy_lock(&kernel_flag)),
                 "=m" (current->lock_depth));
 #endif
+#endif
 }
 
 static __inline__ void unlock_kernel(void)
index b9f7796b296dcb153d59390c0b25bb5860c82e07..c62cbece6ce793ae5d85287be14c2ddd77f33d21 100644 (file)
@@ -5,9 +5,9 @@
 #include <asm/hardirq.h>
 
 #define __cpu_bh_enable(cpu) \
-               do { barrier(); local_bh_count(cpu)--; } while (0)
+               do { barrier(); local_bh_count(cpu)--; preempt_enable(); } while (0)
 #define cpu_bh_disable(cpu) \
-               do { local_bh_count(cpu)++; barrier(); } while (0)
+               do { preempt_disable(); local_bh_count(cpu)++; barrier(); } while (0)
 
 #define local_bh_disable()     cpu_bh_disable(smp_processor_id())
 #define __local_bh_enable()    __cpu_bh_enable(smp_processor_id())
@@ -22,7 +22,7 @@
  * If you change the offsets in irq_stat then you have to
  * update this code as well.
  */
-#define local_bh_enable()                                              \
+#define _local_bh_enable()                                             \
 do {                                                                   \
        unsigned int *ptr = &local_bh_count(smp_processor_id());        \
                                                                        \
@@ -45,4 +45,6 @@ do {                                                                  \
                /* no registers clobbered */ );                         \
 } while (0)
 
+#define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while (0)
+
 #endif /* __ASM_SOFTIRQ_H */
index 89118fced025bd9adb72eaa7bf45d29f8e128641..1a4b4879c35887de129c036225b87352dd1743bb 100644 (file)
@@ -77,7 +77,7 @@ typedef struct {
                :"=m" (lock->lock) : : "memory"
 
 
-static inline void spin_unlock(spinlock_t *lock)
+static inline void _raw_spin_unlock(spinlock_t *lock)
 {
 #if SPINLOCK_DEBUG
        if (lock->magic != SPINLOCK_MAGIC)
@@ -97,7 +97,7 @@ static inline void spin_unlock(spinlock_t *lock)
                :"=q" (oldval), "=m" (lock->lock) \
                :"0" (oldval) : "memory"
 
-static inline void spin_unlock(spinlock_t *lock)
+static inline void _raw_spin_unlock(spinlock_t *lock)
 {
        char oldval = 1;
 #if SPINLOCK_DEBUG
@@ -113,7 +113,7 @@ static inline void spin_unlock(spinlock_t *lock)
 
 #endif
 
-static inline int spin_trylock(spinlock_t *lock)
+static inline int _raw_spin_trylock(spinlock_t *lock)
 {
        char oldval;
        __asm__ __volatile__(
@@ -123,7 +123,7 @@ static inline int spin_trylock(spinlock_t *lock)
        return oldval > 0;
 }
 
-static inline void spin_lock(spinlock_t *lock)
+static inline void _raw_spin_lock(spinlock_t *lock)
 {
 #if SPINLOCK_DEBUG
        __label__ here;
@@ -179,7 +179,7 @@ typedef struct {
  */
 /* the spinlock helpers are in arch/i386/kernel/semaphore.c */
 
-static inline void read_lock(rwlock_t *rw)
+static inline void _raw_read_lock(rwlock_t *rw)
 {
 #if SPINLOCK_DEBUG
        if (rw->magic != RWLOCK_MAGIC)
@@ -188,7 +188,7 @@ static inline void read_lock(rwlock_t *rw)
        __build_read_lock(rw, "__read_lock_failed");
 }
 
-static inline void write_lock(rwlock_t *rw)
+static inline void _raw_write_lock(rwlock_t *rw)
 {
 #if SPINLOCK_DEBUG
        if (rw->magic != RWLOCK_MAGIC)
@@ -197,10 +197,10 @@ static inline void write_lock(rwlock_t *rw)
        __build_write_lock(rw, "__write_lock_failed");
 }
 
-#define read_unlock(rw)                asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
-#define write_unlock(rw)       asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
+#define _raw_read_unlock(rw)           asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
+#define _raw_write_unlock(rw)  asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
 
-static inline int write_trylock(rwlock_t *lock)
+static inline int _raw_write_trylock(rwlock_t *lock)
 {
        atomic_t *count = (atomic_t *)lock;
        if (atomic_sub_and_test(RW_LOCK_BIAS, count))
index 0ce93f99f90c2ff2cc5f75866f5dea4754e1ec72..0359b0948f9749aa136a6e5f9f1e5cf4e6b04e23 100644 (file)
@@ -25,6 +25,7 @@ struct thread_info {
        struct exec_domain      *exec_domain;   /* execution domain */
        __u32                   flags;          /* low level flags */
        __u32                   cpu;            /* current CPU */
+       __s32                   preempt_count; /* 0 => preemptable, <0 => BUG */
 
        mm_segment_t            addr_limit;     /* thread address space:
                                                   0-0xBFFFFFFF for user-thead
@@ -41,7 +42,8 @@ struct thread_info {
 #define TI_EXEC_DOMAIN 0x00000004
 #define TI_FLAGS       0x00000008
 #define TI_CPU         0x0000000C
-#define TI_ADDR_LIMIT  0x00000010
+#define TI_PRE_COUNT   0x00000010
+#define TI_ADDR_LIMIT  0x00000014
 
 #endif
 
index 208c4573381a617342c74ab07f27fca543454558..e36492e06f0466d9e364244d5f333bd8f2c726a6 100644 (file)
@@ -171,11 +171,11 @@ static inline void br_write_unlock (enum brlock_indices idx)
 }
 
 #else
-# define br_read_lock(idx)     ((void)(idx))
-# define br_read_unlock(idx)   ((void)(idx))
-# define br_write_lock(idx)    ((void)(idx))
-# define br_write_unlock(idx)  ((void)(idx))
-#endif
+# define br_read_lock(idx)     ({ (void)(idx); preempt_disable(); })
+# define br_read_unlock(idx)   ({ (void)(idx); preempt_enable(); })
+# define br_write_lock(idx)    ({ (void)(idx); preempt_disable(); })
+# define br_write_unlock(idx)  ({ (void)(idx); preempt_enable(); })
+#endif /* CONFIG_SMP */
 
 /*
  * Now enumerate all of the possible sw/hw IRQ protected
index 78a5834f8a25b19708a2eae4acfbd65244e5aaa8..ad38cabb1619040251aedbc2dcb94e8850a9ceee 100644 (file)
@@ -91,6 +91,7 @@ extern unsigned long nr_running(void);
 #define TASK_UNINTERRUPTIBLE   2
 #define TASK_ZOMBIE            4
 #define TASK_STOPPED           8
+#define PREEMPT_ACTIVE         0x4000000
 
 #define __set_task_state(tsk, state_value)             \
        do { (tsk)->state = (state_value); } while (0)
index bb1ff5c5ea1aa56389389aca6f767f4b6f0bfa55..43bef9087932ebfa1b42509bf735ba62d44e8401 100644 (file)
@@ -81,7 +81,9 @@ extern volatile int smp_msg_id;
 #define smp_processor_id()                     0
 #define hard_smp_processor_id()                        0
 #define smp_threads_ready                      1
+#ifndef CONFIG_PREEMPT
 #define kernel_lock()
+#endif
 #define cpu_logical_map(cpu)                   0
 #define cpu_number_map(cpu)                    0
 #define smp_call_function(func,info,retry,wait)        ({ 0; })
index d1bb03872447a153a3945488b705b19c6ac5fe2d..13d8c7ace0bbe68e6d4cef8923aacc5eb2a6483d 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <linux/config.h>
 
-#ifndef CONFIG_SMP
+#if !defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT)
 
 #define lock_kernel()                          do { } while(0)
 #define unlock_kernel()                                do { } while(0)
index dc27910a6ad5f94e34e43d44dbc6d7af9a01c6db..6e3ef75fd8850acc218958252541db14409b5b15 100644 (file)
@@ -2,6 +2,10 @@
 #define __LINUX_SPINLOCK_H
 
 #include <linux/config.h>
+#include <linux/linkage.h>
+#include <linux/compiler.h>
+#include <linux/thread_info.h>
+#include <linux/kernel.h>
 
 /*
  * These are the generic versions of the spinlocks and read-write
 
 #if (DEBUG_SPINLOCKS < 1)
 
+#ifndef CONFIG_PREEMPT
 #define atomic_dec_and_lock(atomic,lock) atomic_dec_and_test(atomic)
 #define ATOMIC_DEC_AND_LOCK
+#endif
 
 /*
  * Your basic spinlocks, allowing only a single CPU anywhere
 #endif
 
 #define spin_lock_init(lock)   do { } while(0)
-#define spin_lock(lock)                (void)(lock) /* Not "unused variable". */
+#define _raw_spin_lock(lock)   (void)(lock) /* Not "unused variable". */
 #define spin_is_locked(lock)   (0)
-#define spin_trylock(lock)     ({1; })
+#define _raw_spin_trylock(lock)        ({1; })
 #define spin_unlock_wait(lock) do { } while(0)
-#define spin_unlock(lock)      do { } while(0)
+#define _raw_spin_unlock(lock) do { } while(0)
 
 #elif (DEBUG_SPINLOCKS < 2)
 
@@ -142,13 +148,79 @@ typedef struct {
 #endif
 
 #define rwlock_init(lock)      do { } while(0)
-#define read_lock(lock)                (void)(lock) /* Not "unused variable". */
-#define read_unlock(lock)      do { } while(0)
-#define write_lock(lock)       (void)(lock) /* Not "unused variable". */
-#define write_unlock(lock)     do { } while(0)
+#define _raw_read_lock(lock)   (void)(lock) /* Not "unused variable". */
+#define _raw_read_unlock(lock) do { } while(0)
+#define _raw_write_lock(lock)  (void)(lock) /* Not "unused variable". */
+#define _raw_write_unlock(lock)        do { } while(0)
 
 #endif /* !SMP */
 
+#ifdef CONFIG_PREEMPT
+
+asmlinkage void preempt_schedule(void);
+
+#define preempt_get_count() (current_thread_info()->preempt_count)
+
+#define preempt_disable() \
+do { \
+       ++current_thread_info()->preempt_count; \
+       barrier(); \
+} while (0)
+
+#define preempt_enable_no_resched() \
+do { \
+       --current_thread_info()->preempt_count; \
+       barrier(); \
+} while (0)
+
+#define preempt_enable() \
+do { \
+       --current_thread_info()->preempt_count; \
+       barrier(); \
+       if (unlikely(!(current_thread_info()->preempt_count) && \
+               test_thread_flag(TIF_NEED_RESCHED))) \
+                       preempt_schedule(); \
+} while (0)
+
+#define spin_lock(lock)        \
+do { \
+       preempt_disable(); \
+       _raw_spin_lock(lock); \
+} while(0)
+
+#define spin_trylock(lock)     ({preempt_disable(); _raw_spin_trylock(lock) ? \
+                               1 : ({preempt_enable(); 0;});})
+#define spin_unlock(lock) \
+do { \
+       _raw_spin_unlock(lock); \
+       preempt_enable(); \
+} while (0)
+
+#define read_lock(lock)                ({preempt_disable(); _raw_read_lock(lock);})
+#define read_unlock(lock)      ({_raw_read_unlock(lock); preempt_enable();})
+#define write_lock(lock)       ({preempt_disable(); _raw_write_lock(lock);})
+#define write_unlock(lock)     ({_raw_write_unlock(lock); preempt_enable();})
+#define write_trylock(lock)    ({preempt_disable();_raw_write_trylock(lock) ? \
+                               1 : ({preempt_enable(); 0;});})
+
+#else
+
+#define preempt_get_count()    do { } while (0)
+#define preempt_disable()      do { } while (0)
+#define preempt_enable_no_resched()    do {} while(0)
+#define preempt_enable()       do { } while (0)
+
+#define spin_lock(lock)                _raw_spin_lock(lock)
+#define spin_trylock(lock)     _raw_spin_trylock(lock)
+#define spin_unlock(lock)      _raw_spin_unlock(lock)
+
+#define read_lock(lock)                _raw_read_lock(lock)
+#define read_unlock(lock)      _raw_read_unlock(lock)
+#define write_lock(lock)       _raw_write_lock(lock)
+#define write_unlock(lock)     _raw_write_unlock(lock)
+#define write_trylock(lock)    _raw_write_trylock(lock)
+#endif
+
 /* "lock on reference count zero" */
 #ifndef ATOMIC_DEC_AND_LOCK
 #include <asm/atomic.h>
index 6b5a7cba048edf98bebb877dfb283cb2079e4529..e5e631714afc96cc0ad4e9117c87564ccef8b016 100644 (file)
@@ -390,8 +390,8 @@ static inline void __exit_mm(struct task_struct * tsk)
                /* more a memory barrier than a real lock */
                task_lock(tsk);
                tsk->mm = NULL;
-               task_unlock(tsk);
                enter_lazy_tlb(mm, current, smp_processor_id());
+               task_unlock(tsk);
                mmput(mm);
        }
 }
index 3e49ad5c1ebc15a0fdac701b5bb29962dc77e4e9..9ac534b70d1f1b87f5e609eaea6efdde283532df 100644 (file)
@@ -650,6 +650,13 @@ int do_fork(unsigned long clone_flags, unsigned long stack_start,
        if (p->binfmt && p->binfmt->module)
                __MOD_INC_USE_COUNT(p->binfmt->module);
 
+#ifdef CONFIG_PREEMPT
+       /*
+        * schedule_tail drops this_rq()->lock so we compensate with a count
+        * of 1.  Also, we want to start with kernel preemption disabled.
+        */
+       p->thread_info->preempt_count = 1;
+#endif
        p->did_exec = 0;
        p->swappable = 0;
        p->state = TASK_UNINTERRUPTIBLE;
index ae89152ce9363fb6cdecf7833dc360da1d614524..41635b99dafeeb086024b4b5ed17dcdb059446fb 100644 (file)
@@ -445,6 +445,9 @@ EXPORT_SYMBOL(sleep_on_timeout);
 EXPORT_SYMBOL(interruptible_sleep_on);
 EXPORT_SYMBOL(interruptible_sleep_on_timeout);
 EXPORT_SYMBOL(schedule);
+#ifdef CONFIG_PREEMPT
+EXPORT_SYMBOL(preempt_schedule);
+#endif
 EXPORT_SYMBOL(schedule_timeout);
 EXPORT_SYMBOL(sys_sched_yield);
 EXPORT_SYMBOL(set_user_nice);
index 56fb9a54e7fe35f342501d88a5a2ab53d4d12bf4..a8cf63321a11ee4cd63207a7deae9d6ea6997ed5 100644 (file)
@@ -61,10 +61,12 @@ static inline runqueue_t *lock_task_rq(task_t *p, unsigned long *flags)
        struct runqueue *__rq;
 
 repeat_lock_task:
+       preempt_disable();
        __rq = task_rq(p);
        spin_lock_irqsave(&__rq->lock, *flags);
        if (unlikely(__rq != task_rq(p))) {
                spin_unlock_irqrestore(&__rq->lock, *flags);
+               preempt_enable();
                goto repeat_lock_task;
        }
        return __rq;
@@ -73,6 +75,7 @@ repeat_lock_task:
 static inline void unlock_task_rq(runqueue_t *rq, unsigned long *flags)
 {
        spin_unlock_irqrestore(&rq->lock, *flags);
+       preempt_enable();
 }
 /*
  * Adding/removing a task to/from a priority array:
@@ -195,6 +198,7 @@ static inline void resched_task(task_t *p)
 #ifdef CONFIG_SMP
        int need_resched, nrpolling;
 
+       preempt_disable();
        /* minimise the chance of sending an interrupt to poll_idle() */
        nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);
        need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED);
@@ -202,6 +206,7 @@ static inline void resched_task(task_t *p)
 
        if (!need_resched && !nrpolling && (p->thread_info->cpu != smp_processor_id()))
                smp_send_reschedule(p->thread_info->cpu);
+       preempt_enable();
 #else
        set_tsk_need_resched(p);
 #endif
@@ -219,6 +224,7 @@ void wait_task_inactive(task_t * p)
        runqueue_t *rq;
 
 repeat:
+       preempt_disable();
        rq = task_rq(p);
        while (unlikely(rq->curr == p)) {
                cpu_relax();
@@ -227,9 +233,11 @@ repeat:
        rq = lock_task_rq(p, &flags);
        if (unlikely(rq->curr == p)) {
                unlock_task_rq(rq, &flags);
+               preempt_enable();
                goto repeat;
        }
        unlock_task_rq(rq, &flags);
+       preempt_enable();
 }
 
 /*
@@ -295,7 +303,10 @@ int wake_up_process(task_t * p)
 
 void wake_up_forked_process(task_t * p)
 {
-       runqueue_t *rq = this_rq();
+       runqueue_t *rq;
+       
+       preempt_disable();
+       rq = this_rq();
 
        p->state = TASK_RUNNING;
        if (!rt_task(p)) {
@@ -308,6 +319,7 @@ void wake_up_forked_process(task_t * p)
        p->thread_info->cpu = smp_processor_id();
        activate_task(p, rq);
        spin_unlock_irq(&rq->lock);
+       preempt_enable();
 }
 
 asmlinkage void schedule_tail(task_t *prev)
@@ -635,17 +647,31 @@ void scheduling_functions_start_here(void) { }
  */
 asmlinkage void schedule(void)
 {
-       task_t *prev = current, *next;
-       runqueue_t *rq = this_rq();
+       task_t *prev, *next;
+       runqueue_t *rq;
        prio_array_t *array;
        list_t *queue;
        int idx;
 
        if (unlikely(in_interrupt()))
                BUG();
+
+       preempt_disable();
+       prev = current;
+       rq = this_rq();
+       
        release_kernel_lock(prev, smp_processor_id());
        spin_lock_irq(&rq->lock);
 
+#ifdef CONFIG_PREEMPT
+       /*
+        * if entering from preempt_schedule, off a kernel preemption,
+        * go straight to picking the next task.
+        */
+       if (unlikely(preempt_get_count() & PREEMPT_ACTIVE))
+               goto pick_next_task;
+#endif
+       
        switch (prev->state) {
        case TASK_RUNNING:
                prev->sleep_timestamp = jiffies;
@@ -659,7 +685,7 @@ asmlinkage void schedule(void)
        default:
                deactivate_task(prev, rq);
        }
-#if CONFIG_SMP
+#if CONFIG_SMP || CONFIG_PREEMPT
 pick_next_task:
 #endif
        if (unlikely(!rq->nr_running)) {
@@ -707,9 +733,25 @@ switch_tasks:
        spin_unlock_irq(&rq->lock);
 
        reacquire_kernel_lock(current);
+       preempt_enable_no_resched();
        return;
 }
 
+#ifdef CONFIG_PREEMPT
+/*
+ * this is is the entry point to schedule() from in-kernel preemption.
+ */
+asmlinkage void preempt_schedule(void)
+{
+       do {
+               current_thread_info()->preempt_count += PREEMPT_ACTIVE;
+               schedule();
+               current_thread_info()->preempt_count -= PREEMPT_ACTIVE;
+               barrier();
+       } while (test_thread_flag(TIF_NEED_RESCHED));
+}
+#endif /* CONFIG_PREEMPT */
+
 /*
  * The core wakeup function.  Non-exclusive wakeups (nr_exclusive == 0) just
  * wake everything up.  If it's an exclusive wakeup (nr_exclusive == small +ve
@@ -1105,9 +1147,12 @@ out_unlock:
 
 asmlinkage long sys_sched_yield(void)
 {
-       runqueue_t *rq = this_rq();
+       runqueue_t *rq;
        prio_array_t *array;
 
+       preempt_disable();
+       rq = this_rq();
+
        /*
         * Decrease the yielding task's priority by one, to avoid
         * livelocks. This priority loss is temporary, it's recovered
@@ -1134,6 +1179,7 @@ asmlinkage long sys_sched_yield(void)
                __set_bit(current->prio, array->bitmap);
        }
        spin_unlock(&rq->lock);
+       preempt_enable_no_resched();
 
        schedule();
 
index 2965aa8d0f49950f17414306323b5e0519eb4ded..588328c30ff3b12c343e1b029bc5c0ba3d7e491b 100644 (file)
@@ -132,7 +132,7 @@ static struct file_operations socket_file_ops = {
 
 static struct net_proto_family *net_families[NPROTO];
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
 static atomic_t net_family_lockct = ATOMIC_INIT(0);
 static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;