cleanups, speedups and fixes. Added support for non-current set_cpus_allowed().

author Ingo Molnar <mingo@elte.hu>

Thu, 21 Feb 2002 18:10:07 +0000 (19:10 +0100)

committer Ingo Molnar <mingo@elte.hu>

Thu, 21 Feb 2002 18:10:07 +0000 (19:10 +0100)
author Ingo Molnar <mingo@elte.hu>
Thu, 21 Feb 2002 18:10:07 +0000 (19:10 +0100)
committer Ingo Molnar <mingo@elte.hu>
Thu, 21 Feb 2002 18:10:07 +0000 (19:10 +0100)
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c

index e6a887b85dd32605b2234a2748d7d3ec13595c0e..042c9d7323e2e6fbe94f0eabf480f4e9481d331b 100644 (file)
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -79,7 +79,6 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
   * through the ICC by us (IPIs)
   */
  #ifdef CONFIG_SMP
-BUILD_SMP_INTERRUPT(task_migration_interrupt,TASK_MIGRATION_VECTOR)
  BUILD_SMP_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
  BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
  BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
@@ -474,9 +473,6 @@ void __init init_IRQ(void)
          */
         set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
  
-       /* IPI for task migration */
-       set_intr_gate(TASK_MIGRATION_VECTOR, task_migration_interrupt);
-
         /* IPI for invalidation */
         set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
  
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c

index 7fdbdfdba7dabc92651e458ba48a4649a63a7bdb..c0b3a94a17c263233626147aa613b0c5d38a9230 100644 (file)
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -485,35 +485,6 @@ void flush_tlb_all(void)
         do_flush_tlb_all_local();
  }
  
-static spinlock_t migration_lock = SPIN_LOCK_UNLOCKED;
-static task_t *new_task;
-
-/*
- * This function sends a 'task migration' IPI to another CPU.
- * Must be called from syscall contexts, with interrupts *enabled*.
- */
-void smp_migrate_task(int cpu, task_t *p)
-{
-       /*
-        * The target CPU will unlock the migration spinlock:
-        */
-       _raw_spin_lock(&migration_lock);
-       new_task = p;
-       send_IPI_mask(1 << cpu, TASK_MIGRATION_VECTOR);
-}
-
-/*
- * Task migration callback.
- */
-asmlinkage void smp_task_migration_interrupt(void)
-{
-       task_t *p;
-
-       ack_APIC_irq();
-       p = new_task;
-       _raw_spin_unlock(&migration_lock);
-       sched_task_migrated(p);
-}
  /*
   * this function sends a 'reschedule' IPI to another CPU.
   * it goes straight through and wastes no time serializing
diff --git a/fs/pipe.c b/fs/pipe.c

index 596188fa639f60e70757d5cf4c51e97a822a6d2a..f2d9da0143c70b66b00b59ec145941745ca2d2bf 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -116,7 +116,7 @@ do_more_read:
                  * writers synchronously that there is more
                  * room.
                  */
-               wake_up_interruptible_sync(PIPE_WAIT(*inode));
+               wake_up_interruptible(PIPE_WAIT(*inode));
                 if (!PIPE_EMPTY(*inode))
                         BUG();
                 goto do_more_read;
@@ -214,7 +214,7 @@ pipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos)
                          * is going to give up this CPU, so it doesnt have
                          * to do idle reschedules.
                          */
-                       wake_up_interruptible_sync(PIPE_WAIT(*inode));
+                       wake_up_interruptible(PIPE_WAIT(*inode));
                         PIPE_WAITING_WRITERS(*inode)++;
                         pipe_wait(inode);
                         PIPE_WAITING_WRITERS(*inode)--;
diff --git a/include/asm-i386/hw_irq.h b/include/asm-i386/hw_irq.h

index b572c28744cb86dac0e959ea776529be4cbe5497..1461dab08d90af6a4a9e8b54c85709291ed00247 100644 (file)
--- a/include/asm-i386/hw_irq.h
+++ b/include/asm-i386/hw_irq.h
@@ -35,14 +35,13 @@
   *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
   *  TLB, reschedule and local APIC vectors are performance-critical.
   *
- *  Vectors 0xf0-0xf9 are free (reserved for future Linux use).
+ *  Vectors 0xf0-0xfa are free (reserved for future Linux use).
   */
  #define SPURIOUS_APIC_VECTOR   0xff
  #define ERROR_APIC_VECTOR      0xfe
  #define INVALIDATE_TLB_VECTOR  0xfd
  #define RESCHEDULE_VECTOR      0xfc
-#define TASK_MIGRATION_VECTOR  0xfb
-#define CALL_FUNCTION_VECTOR   0xfa
+#define CALL_FUNCTION_VECTOR   0xfb
  
  /*
   * Local APIC timer IRQ vector is on a different priority level,
diff --git a/include/linux/init_task.h b/include/linux/init_task.h

index 9f34e057079af4d145c678f75fbcd93c98bf12f8..f97e245dcddc7bb5d2489ed76afb06e48b57063a 100644 (file)
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -52,6 +52,8 @@
      mm:                        NULL,                                           \
      active_mm:         &init_mm,                                       \
      run_list:          LIST_HEAD_INIT(tsk.run_list),                   \
+    migration_list:    LIST_HEAD_INIT(tsk.migration_list),             \
+    migration_sem:     __MUTEX_INITIALIZER(tsk.migration_sem),         \
      time_slice:                HZ,                                             \
      next_task:         &tsk,                                           \
      prev_task:         &tsk,                                           \
diff --git a/include/linux/sched.h b/include/linux/sched.h

index ab7abbdd7fa57802365b1c6f783541e2eb15340e..2992fe65e6aad1db7015a4607df0a98edb7bb993 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -150,8 +150,7 @@ extern void update_process_times(int user);
  extern void update_one_process(struct task_struct *p, unsigned long user,
                                unsigned long system, int cpu);
  extern void scheduler_tick(int user_tick, int system);
-extern void sched_task_migrated(struct task_struct *p);
-extern void smp_migrate_task(int cpu, task_t *task);
+extern void migration_init(void);
  extern unsigned long cache_decay_ticks;
  
  
@@ -286,6 +285,10 @@ struct task_struct {
  
         wait_queue_head_t wait_chldexit;        /* for wait4() */
         struct completion *vfork_done;          /* for vfork() */
+
+       list_t migration_list;
+       struct semaphore migration_sem;
+
         unsigned long rt_priority;
         unsigned long it_real_value, it_prof_value, it_virt_value;
         unsigned long it_real_incr, it_prof_incr, it_virt_incr;
@@ -382,7 +385,12 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
   */
  #define _STK_LIM       (8*1024*1024)
  
+#if CONFIG_SMP
  extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
+#else
+# define set_cpus_allowed(p, new_mask) do { } while (0)
+#endif
+
  extern void set_user_nice(task_t *p, long nice);
  extern int task_prio(task_t *p);
  extern int task_nice(task_t *p);
@@ -460,7 +468,6 @@ extern unsigned long prof_len;
  extern unsigned long prof_shift;
  
  extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr));
-extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
  extern void FASTCALL(sleep_on(wait_queue_head_t *q));
  extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q,
                                       signed long timeout));
@@ -474,13 +481,9 @@ extern void FASTCALL(sched_exit(task_t * p));
  #define wake_up(x)                     __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
  #define wake_up_nr(x, nr)              __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
  #define wake_up_all(x)                 __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0)
-#define wake_up_sync(x)                        __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
-#define wake_up_sync_nr(x, nr)         __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
  #define wake_up_interruptible(x)       __wake_up((x),TASK_INTERRUPTIBLE, 1)
  #define wake_up_interruptible_nr(x, nr)        __wake_up((x),TASK_INTERRUPTIBLE, nr)
  #define wake_up_interruptible_all(x)   __wake_up((x),TASK_INTERRUPTIBLE, 0)
-#define wake_up_interruptible_sync(x)  __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
-#define wake_up_interruptible_sync_nr(x) __wake_up_sync((x),TASK_INTERRUPTIBLE,  nr)
  asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);
  
  extern int in_group_p(gid_t);
diff --git a/init/main.c b/init/main.c

index 64f4c299092064094aaf981cb2e1a0baafe9873c..ddc2ae8ca1259c700a766652f6e1c14f4482f6d2 100644 (file)
--- a/init/main.c
+++ b/init/main.c
@@ -413,7 +413,12 @@ static void __init do_initcalls(void)
   */
  static void __init do_basic_setup(void)
  {
-
+       /*
+        * Let the per-CPU migration threads start up:
+        */
+#if CONFIG_SMP
+       migration_init();
+#endif
         /*
          * Tell the world that we're going to be the grim
          * reaper of innocent orphaned children.
diff --git a/kernel/ksyms.c b/kernel/ksyms.c

index cf006273b3bac1d65bc9bfc034bbefba6bfa856d..30bb84d5c19d79c7c25d017043532d4786878523 100644 (file)
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -443,7 +443,6 @@ EXPORT_SYMBOL(iomem_resource);
  /* process management */
  EXPORT_SYMBOL(complete_and_exit);
  EXPORT_SYMBOL(__wake_up);
-EXPORT_SYMBOL(__wake_up_sync);
  EXPORT_SYMBOL(wake_up_process);
  EXPORT_SYMBOL(sleep_on);
  EXPORT_SYMBOL(sleep_on_timeout);
@@ -458,6 +457,9 @@ EXPORT_SYMBOL(sys_sched_yield);
  EXPORT_SYMBOL(set_user_nice);
  EXPORT_SYMBOL(task_nice);
  EXPORT_SYMBOL_GPL(idle_cpu);
+#if CONFIG_SMP
+EXPORT_SYMBOL_GPL(set_cpus_allowed);
+#endif
  EXPORT_SYMBOL(jiffies);
  EXPORT_SYMBOL(xtime);
  EXPORT_SYMBOL(do_gettimeofday);
diff --git a/kernel/sched.c b/kernel/sched.c

index 4124241f198896cf3bdc4ea484dd8c4149b630c6..f1ef91af5b31a721d539dad44015b46dab5f13e5 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -16,12 +16,12 @@
  #include <linux/nmi.h>
  #include <linux/init.h>
  #include <asm/uaccess.h>
+#include <linux/highmem.h>
  #include <linux/smp_lock.h>
+#include <asm/mmu_context.h>
  #include <linux/interrupt.h>
  #include <linux/completion.h>
-#include <asm/mmu_context.h>
  #include <linux/kernel_stat.h>
-#include <linux/highmem.h>
  
  /*
   * Priority of a process goes from 0 to 139. The 0-99
@@ -127,8 +127,6 @@ typedef struct runqueue runqueue_t;
  
  struct prio_array {
         int nr_active;
-       spinlock_t *lock;
-       runqueue_t *rq;
         unsigned long bitmap[BITMAP_SIZE];
         list_t queue[MAX_PRIO];
  };
@@ -146,6 +144,8 @@ struct runqueue {
         task_t *curr, *idle;
         prio_array_t *active, *expired, arrays[2];
         int prev_nr_running[NR_CPUS];
+       task_t *migration_thread;
+       list_t migration_queue;
  } ____cacheline_aligned;
  
  static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
@@ -156,23 +156,23 @@ static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
  #define cpu_curr(cpu)          (cpu_rq(cpu)->curr)
  #define rt_task(p)             ((p)->prio < MAX_RT_PRIO)
  
-static inline runqueue_t *lock_task_rq(task_t *p, unsigned long *flags)
+static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags)
  {
-       struct runqueue *__rq;
+       struct runqueue *rq;
  
  repeat_lock_task:
         preempt_disable();
-       __rq = task_rq(p);
-       spin_lock_irqsave(&__rq->lock, *flags);
-       if (unlikely(__rq != task_rq(p))) {
-               spin_unlock_irqrestore(&__rq->lock, *flags);
+       rq = task_rq(p);
+       spin_lock_irqsave(&rq->lock, *flags);
+       if (unlikely(rq != task_rq(p))) {
+               spin_unlock_irqrestore(&rq->lock, *flags);
                 preempt_enable();
                 goto repeat_lock_task;
         }
-       return __rq;
+       return rq;
  }
  
-static inline void unlock_task_rq(runqueue_t *rq, unsigned long *flags)
+static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
  {
         spin_unlock_irqrestore(&rq->lock, *flags);
         preempt_enable();
@@ -184,7 +184,7 @@ static inline void unlock_task_rq(runqueue_t *rq, unsigned long *flags)
  static inline void dequeue_task(struct task_struct *p, prio_array_t *array)
  {
         array->nr_active--;
-       list_del_init(&p->run_list);
+       list_del(&p->run_list);
         if (list_empty(array->queue + p->prio))
                 __clear_bit(p->prio, array->bitmap);
  }
@@ -289,30 +289,16 @@ repeat:
                 cpu_relax();
                 barrier();
         }
-       rq = lock_task_rq(p, &flags);
+       rq = task_rq_lock(p, &flags);
         if (unlikely(rq->curr == p)) {
-               unlock_task_rq(rq, &flags);
+               task_rq_unlock(rq, &flags);
                 preempt_enable();
                 goto repeat;
         }
-       unlock_task_rq(rq, &flags);
+       task_rq_unlock(rq, &flags);
         preempt_enable();
  }
  
-/*
- * The SMP message passing code calls this function whenever
- * the new task has arrived at the target CPU. We move the
- * new task into the local runqueue.
- *
- * This function must be called with interrupts disabled.
- */
-void sched_task_migrated(task_t *new_task)
-{
-       wait_task_inactive(new_task);
-       new_task->thread_info->cpu = smp_processor_id();
-       wake_up_process(new_task);
-}
-
  /*
   * Kick the remote CPU if the task is running currently,
   * this code is used by the signal code to signal tasks
@@ -337,27 +323,27 @@ void kick_if_running(task_t * p)
   * "current->state = TASK_RUNNING" to mark yourself runnable
   * without the overhead of this.
   */
-static int try_to_wake_up(task_t * p, int synchronous)
+static int try_to_wake_up(task_t * p)
  {
         unsigned long flags;
         int success = 0;
         runqueue_t *rq;
  
-       rq = lock_task_rq(p, &flags);
+       rq = task_rq_lock(p, &flags);
         p->state = TASK_RUNNING;
         if (!p->array) {
                 activate_task(p, rq);
-               if ((rq->curr == rq->idle) || (p->prio < rq->curr->prio))
+               if (p->prio < rq->curr->prio)
                         resched_task(rq->curr);
                 success = 1;
         }
-       unlock_task_rq(rq, &flags);
+       task_rq_unlock(rq, &flags);
         return success;
  }
  
  int wake_up_process(task_t * p)
  {
-       return try_to_wake_up(p, 0);
+       return try_to_wake_up(p);
  }
  
  void wake_up_forked_process(task_t * p)
@@ -366,6 +352,7 @@ void wake_up_forked_process(task_t * p)
  
         preempt_disable();
         rq = this_rq();
+       spin_lock_irq(&rq->lock);
  
         p->state = TASK_RUNNING;
         if (!rt_task(p)) {
@@ -378,10 +365,12 @@ void wake_up_forked_process(task_t * p)
                 p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
                 p->prio = effective_prio(p);
         }
-       spin_lock_irq(&rq->lock);
+       INIT_LIST_HEAD(&p->migration_list);
         p->thread_info->cpu = smp_processor_id();
         activate_task(p, rq);
+
         spin_unlock_irq(&rq->lock);
+       init_MUTEX(&p->migration_sem);
         preempt_enable();
  }
  
@@ -861,44 +850,33 @@ asmlinkage void preempt_schedule(void)
   * started to run but is not in state TASK_RUNNING.  try_to_wake_up() returns
   * zero in this (rare) case, and we handle it by continuing to scan the queue.
   */
-static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
-                                    int nr_exclusive, const int sync)
+static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
  {
         struct list_head *tmp;
+       unsigned int state;
+       wait_queue_t *curr;
         task_t *p;
  
-       list_for_each(tmp,&q->task_list) {
-               unsigned int state;
-               wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
-
+       list_for_each(tmp, &q->task_list) {
+               curr = list_entry(tmp, wait_queue_t, task_list);
                 p = curr->task;
                 state = p->state;
-               if ((state & mode) &&
-                               try_to_wake_up(p, sync) &&
-                               ((curr->flags & WQ_FLAG_EXCLUSIVE) &&
-                                       !--nr_exclusive))
-                       break;
+               if ((state & mode) && try_to_wake_up(p) &&
+                       ((curr->flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive))
+                               break;
         }
  }
  
-void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr)
+void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
  {
-       if (q) {
-               unsigned long flags;
-               wq_read_lock_irqsave(&q->lock, flags);
-               __wake_up_common(q, mode, nr, 0);
-               wq_read_unlock_irqrestore(&q->lock, flags);
-       }
-}
+       unsigned long flags;
  
-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)
-{
-       if (q) {
-               unsigned long flags;
-               wq_read_lock_irqsave(&q->lock, flags);
-               __wake_up_common(q, mode, nr, 1);
-               wq_read_unlock_irqrestore(&q->lock, flags);
-       }
+       if (unlikely(!q))
+               return;
+
+       wq_read_lock_irqsave(&q->lock, flags);
+       __wake_up_common(q, mode, nr_exclusive);
+       wq_read_unlock_irqrestore(&q->lock, flags);
  }
  
  void complete(struct completion *x)
@@ -907,7 +885,7 @@ void complete(struct completion *x)
  
         spin_lock_irqsave(&x->wait.lock, flags);
         x->done++;
-       __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, 0);
+       __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1);
         spin_unlock_irqrestore(&x->wait.lock, flags);
  }
  
@@ -994,35 +972,66 @@ long sleep_on_timeout(wait_queue_head_t *q, long timeout)
         return timeout;
  }
  
+void scheduling_functions_end_here(void) { }
+
+#if CONFIG_SMP
+
  /*
- * Change the current task's CPU affinity. Migrate the process to a
- * proper CPU and schedule away if the current CPU is removed from
- * the allowed bitmask.
+ * Change a given task's CPU affinity. Migrate the process to a
+ * proper CPU and schedule it away if the current CPU is removed
+ * from the allowed bitmask.
   */
  void set_cpus_allowed(task_t *p, unsigned long new_mask)
  {
+       unsigned long flags;
+       runqueue_t *rq;
+       int dest_cpu;
+
+       down(&p->migration_sem);
+       if (!list_empty(&p->migration_list))
+               BUG();
+
         new_mask &= cpu_online_map;
         if (!new_mask)
                 BUG();
-       if (p != current)
-               BUG();
  
+       rq = task_rq_lock(p, &flags);
         p->cpus_allowed = new_mask;
         /*
-        * Can the task run on the current CPU? If not then
+        * Can the task run on the task's current CPU? If not then
          * migrate the process off to a proper CPU.
          */
-       if (new_mask & (1UL << smp_processor_id()))
-               return;
-#if CONFIG_SMP
-       current->state = TASK_UNINTERRUPTIBLE;
-       smp_migrate_task(__ffs(new_mask), current);
+       if (new_mask & (1UL << p->thread_info->cpu)) {
+               task_rq_unlock(rq, &flags);
+               goto out;
+       }
+       /*
+        * We mark the process as nonrunnable, and kick it to
+        * schedule away from its current CPU. We also add
+        * the task to the migration queue and wake up the
+        * target CPU's migration thread, so that it can pick
+        * up this task and insert it into the local runqueue.
+        */
+       p->state = TASK_UNINTERRUPTIBLE;
+       kick_if_running(p);
+       task_rq_unlock(rq, &flags);
  
-       schedule();
-#endif
+       dest_cpu = __ffs(new_mask);
+       rq = cpu_rq(dest_cpu);
+
+       spin_lock_irq(&rq->lock);
+       list_add(&p->migration_list, &rq->migration_queue);
+       spin_unlock_irq(&rq->lock);
+       wake_up_process(rq->migration_thread);
+
+       while (!((1UL << p->thread_info->cpu) & p->cpus_allowed) &&
+                       (p->state != TASK_ZOMBIE))
+               yield();
+out:
+       up(&p->migration_sem);
  }
  
-void scheduling_functions_end_here(void) { }
+#endif
  
  void set_user_nice(task_t *p, long nice)
  {
@@ -1036,7 +1045,7 @@ void set_user_nice(task_t *p, long nice)
          * We have to be careful, if called from sys_setpriority(),
          * the task might be in the middle of scheduling on another CPU.
          */
-       rq = lock_task_rq(p, &flags);
+       rq = task_rq_lock(p, &flags);
         if (rt_task(p)) {
                 p->static_prio = NICE_TO_PRIO(nice);
                 goto out_unlock;
@@ -1056,7 +1065,7 @@ void set_user_nice(task_t *p, long nice)
                         resched_task(rq->curr);
         }
  out_unlock:
-       unlock_task_rq(rq, &flags);
+       task_rq_unlock(rq, &flags);
  }
  
  #ifndef __alpha__
@@ -1154,7 +1163,7 @@ static int setscheduler(pid_t pid, int policy, struct sched_param *param)
          * To be able to change p->policy safely, the apropriate
          * runqueue lock must be held.
          */
-       rq = lock_task_rq(p, &flags);
+       rq = task_rq_lock(p, &flags);
  
         if (policy < 0)
                 policy = p->policy;
@@ -1197,7 +1206,7 @@ static int setscheduler(pid_t pid, int policy, struct sched_param *param)
                 activate_task(p, task_rq(p));
  
  out_unlock:
-       unlock_task_rq(rq, &flags);
+       task_rq_unlock(rq, &flags);
  out_unlock_tasklist:
         read_unlock_irq(&tasklist_lock);
  
@@ -1477,7 +1486,7 @@ static inline void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
  
  void __init init_idle(task_t *idle, int cpu)
  {
-       runqueue_t *idle_rq = cpu_rq(cpu), *rq = idle->array->rq;
+       runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(idle->thread_info->cpu);
         unsigned long flags;
  
         __save_flags(flags);
@@ -1509,14 +1518,13 @@ void __init sched_init(void)
                 runqueue_t *rq = cpu_rq(i);
                 prio_array_t *array;
  
-               rq->active = rq->arrays + 0;
+               rq->active = rq->arrays;
                 rq->expired = rq->arrays + 1;
                 spin_lock_init(&rq->lock);
+               INIT_LIST_HEAD(&rq->migration_queue);
  
                 for (j = 0; j < 2; j++) {
                         array = rq->arrays + j;
-                       array->rq = rq;
-                       array->lock = &rq->lock;
                         for (k = 0; k < MAX_PRIO; k++) {
                                 INIT_LIST_HEAD(array->queue + k);
                                 __clear_bit(k, array->bitmap);
@@ -1545,3 +1553,104 @@ void __init sched_init(void)
         atomic_inc(&init_mm.mm_count);
         enter_lazy_tlb(&init_mm, current, smp_processor_id());
  }
+
+#if CONFIG_SMP
+
+static volatile unsigned long migration_mask;
+
+static int migration_thread(void * unused)
+{
+       runqueue_t *rq;
+
+       daemonize();
+       sigfillset(&current->blocked);
+       set_user_nice(current, -20);
+
+       /*
+        * We have to migrate manually - there is no migration thread
+        * to do this for us yet :-)
+        *
+        * We use the following property of the Linux scheduler. At
+        * this point no other task is running, so by keeping all
+        * migration threads running, the load-balancer will distribute
+        * them between all CPUs equally. At that point every migration
+        * task binds itself to the current CPU.
+        */
+
+       /* wait for all migration threads to start up. */
+       while (!migration_mask)
+               yield();
+
+       for (;;) {
+               preempt_disable();
+               if (test_and_clear_bit(smp_processor_id(), &migration_mask))
+                       current->cpus_allowed = 1 << smp_processor_id();
+               if (test_thread_flag(TIF_NEED_RESCHED))
+                       schedule();
+               if (!migration_mask)
+                       break;
+               preempt_enable();
+       }
+       rq = this_rq();
+       rq->migration_thread = current;
+       preempt_enable();
+
+       sprintf(current->comm, "migration_CPU%d", smp_processor_id());
+
+       for (;;) {
+               struct list_head *head;
+               unsigned long flags;
+               task_t *p = NULL;
+
+               spin_lock_irqsave(&rq->lock, flags);
+               head = &rq->migration_queue;
+               if (list_empty(head)) {
+                       current->state = TASK_UNINTERRUPTIBLE;
+                       spin_unlock_irqrestore(&rq->lock, flags);
+                       schedule();
+                       continue;
+               }
+               p = list_entry(head->next, task_t, migration_list);
+               list_del_init(head->next);
+               spin_unlock_irqrestore(&rq->lock, flags);
+
+               for (;;) {
+                       runqueue_t *rq2 = task_rq_lock(p, &flags);
+
+                       if (!p->array) {
+                               p->thread_info->cpu = smp_processor_id();
+                               task_rq_unlock(rq2, &flags);
+                               wake_up_process(p);
+                               break;
+                       }
+                       if (p->state != TASK_UNINTERRUPTIBLE) {
+                               p->state = TASK_UNINTERRUPTIBLE;
+                               kick_if_running(p);
+                       }
+                       task_rq_unlock(rq2, &flags);
+                       while ((p->state == TASK_UNINTERRUPTIBLE) && p->array) {
+                               cpu_relax();
+                               barrier();
+                       }
+               }
+       }
+}
+
+void __init migration_init(void)
+{
+       int cpu;
+
+       for (cpu = 0; cpu < smp_num_cpus; cpu++)
+               if (kernel_thread(migration_thread, NULL,
+                               CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
+                       BUG();
+
+       migration_mask = (1 << smp_num_cpus) -1;
+
+       for (cpu = 0; cpu < smp_num_cpus; cpu++)
+               while (!cpu_rq(cpu)->migration_thread)
+                       yield();
+       if (migration_mask)
+               BUG();
+}
+#endif
author	Ingo Molnar <mingo@elte.hu>
	Thu, 21 Feb 2002 18:10:07 +0000 (19:10 +0100)
committer	Ingo Molnar <mingo@elte.hu>
	Thu, 21 Feb 2002 18:10:07 +0000 (19:10 +0100)
arch/i386/kernel/i8259.c		patch \| blob \| history
arch/i386/kernel/smp.c		patch \| blob \| history
fs/pipe.c		patch \| blob \| history
include/asm-i386/hw_irq.h		patch \| blob \| history
include/linux/init_task.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
init/main.c		patch \| blob \| history
kernel/ksyms.c		patch \| blob \| history
kernel/sched.c		patch \| blob \| history