extern void update_one_process(struct task_struct *p, unsigned long user,
unsigned long system, int cpu);
extern void scheduler_tick(int user_tick, int system);
-extern void sched_task_migrated(struct task_struct *p);
-extern void smp_migrate_task(int cpu, task_t *task);
+extern void migration_init(void);
extern unsigned long cache_decay_ticks;
wait_queue_head_t wait_chldexit; /* for wait4() */
struct completion *vfork_done; /* for vfork() */
+
+ list_t migration_list;
+ struct semaphore migration_sem;
+
unsigned long rt_priority;
unsigned long it_real_value, it_prof_value, it_virt_value;
unsigned long it_real_incr, it_prof_incr, it_virt_incr;
*/
#define _STK_LIM (8*1024*1024)
+#if CONFIG_SMP
extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
+#else
+# define set_cpus_allowed(p, new_mask) do { } while (0)
+#endif
+
extern void set_user_nice(task_t *p, long nice);
extern int task_prio(task_t *p);
extern int task_nice(task_t *p);
extern unsigned long prof_shift;
extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr));
-extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
extern void FASTCALL(sleep_on(wait_queue_head_t *q));
extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q,
signed long timeout));
#define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
#define wake_up_nr(x, nr) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
#define wake_up_all(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0)
-#define wake_up_sync(x) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
-#define wake_up_sync_nr(x, nr) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
#define wake_up_interruptible(x) __wake_up((x),TASK_INTERRUPTIBLE, 1)
#define wake_up_interruptible_nr(x, nr) __wake_up((x),TASK_INTERRUPTIBLE, nr)
#define wake_up_interruptible_all(x) __wake_up((x),TASK_INTERRUPTIBLE, 0)
-#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
-#define wake_up_interruptible_sync_nr(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, nr)
asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);
extern int in_group_p(gid_t);
#include <linux/nmi.h>
#include <linux/init.h>
#include <asm/uaccess.h>
+#include <linux/highmem.h>
#include <linux/smp_lock.h>
+#include <asm/mmu_context.h>
#include <linux/interrupt.h>
#include <linux/completion.h>
-#include <asm/mmu_context.h>
#include <linux/kernel_stat.h>
-#include <linux/highmem.h>
/*
* Priority of a process goes from 0 to 139. The 0-99
struct prio_array {
int nr_active;
- spinlock_t *lock;
- runqueue_t *rq;
unsigned long bitmap[BITMAP_SIZE];
list_t queue[MAX_PRIO];
};
task_t *curr, *idle;
prio_array_t *active, *expired, arrays[2];
int prev_nr_running[NR_CPUS];
+ task_t *migration_thread;
+ list_t migration_queue;
} ____cacheline_aligned;
static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define rt_task(p) ((p)->prio < MAX_RT_PRIO)
-static inline runqueue_t *lock_task_rq(task_t *p, unsigned long *flags)
+static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags)
{
- struct runqueue *__rq;
+ struct runqueue *rq;
repeat_lock_task:
preempt_disable();
- __rq = task_rq(p);
- spin_lock_irqsave(&__rq->lock, *flags);
- if (unlikely(__rq != task_rq(p))) {
- spin_unlock_irqrestore(&__rq->lock, *flags);
+ rq = task_rq(p);
+ spin_lock_irqsave(&rq->lock, *flags);
+ if (unlikely(rq != task_rq(p))) {
+ spin_unlock_irqrestore(&rq->lock, *flags);
preempt_enable();
goto repeat_lock_task;
}
- return __rq;
+ return rq;
}
-static inline void unlock_task_rq(runqueue_t *rq, unsigned long *flags)
+static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
{
spin_unlock_irqrestore(&rq->lock, *flags);
preempt_enable();
static inline void dequeue_task(struct task_struct *p, prio_array_t *array)
{
array->nr_active--;
- list_del_init(&p->run_list);
+ list_del(&p->run_list);
if (list_empty(array->queue + p->prio))
__clear_bit(p->prio, array->bitmap);
}
cpu_relax();
barrier();
}
- rq = lock_task_rq(p, &flags);
+ rq = task_rq_lock(p, &flags);
if (unlikely(rq->curr == p)) {
- unlock_task_rq(rq, &flags);
+ task_rq_unlock(rq, &flags);
preempt_enable();
goto repeat;
}
- unlock_task_rq(rq, &flags);
+ task_rq_unlock(rq, &flags);
preempt_enable();
}
-/*
- * The SMP message passing code calls this function whenever
- * the new task has arrived at the target CPU. We move the
- * new task into the local runqueue.
- *
- * This function must be called with interrupts disabled.
- */
-void sched_task_migrated(task_t *new_task)
-{
- wait_task_inactive(new_task);
- new_task->thread_info->cpu = smp_processor_id();
- wake_up_process(new_task);
-}
-
/*
* Kick the remote CPU if the task is running currently,
* this code is used by the signal code to signal tasks
* "current->state = TASK_RUNNING" to mark yourself runnable
* without the overhead of this.
*/
-static int try_to_wake_up(task_t * p, int synchronous)
+static int try_to_wake_up(task_t * p)
{
unsigned long flags;
int success = 0;
runqueue_t *rq;
- rq = lock_task_rq(p, &flags);
+ rq = task_rq_lock(p, &flags);
p->state = TASK_RUNNING;
if (!p->array) {
activate_task(p, rq);
- if ((rq->curr == rq->idle) || (p->prio < rq->curr->prio))
+ if (p->prio < rq->curr->prio)
resched_task(rq->curr);
success = 1;
}
- unlock_task_rq(rq, &flags);
+ task_rq_unlock(rq, &flags);
return success;
}
int wake_up_process(task_t * p)
{
- return try_to_wake_up(p, 0);
+ return try_to_wake_up(p);
}
void wake_up_forked_process(task_t * p)
preempt_disable();
rq = this_rq();
+ spin_lock_irq(&rq->lock);
p->state = TASK_RUNNING;
if (!rt_task(p)) {
p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
p->prio = effective_prio(p);
}
- spin_lock_irq(&rq->lock);
+ INIT_LIST_HEAD(&p->migration_list);
p->thread_info->cpu = smp_processor_id();
activate_task(p, rq);
+
spin_unlock_irq(&rq->lock);
+ init_MUTEX(&p->migration_sem);
preempt_enable();
}
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue.
*/
-static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
- int nr_exclusive, const int sync)
+static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
{
struct list_head *tmp;
+ unsigned int state;
+ wait_queue_t *curr;
task_t *p;
- list_for_each(tmp,&q->task_list) {
- unsigned int state;
- wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
-
+ list_for_each(tmp, &q->task_list) {
+ curr = list_entry(tmp, wait_queue_t, task_list);
p = curr->task;
state = p->state;
- if ((state & mode) &&
- try_to_wake_up(p, sync) &&
- ((curr->flags & WQ_FLAG_EXCLUSIVE) &&
- !--nr_exclusive))
- break;
+ if ((state & mode) && try_to_wake_up(p) &&
+ ((curr->flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive))
+ break;
}
}
-void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr)
+void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
{
- if (q) {
- unsigned long flags;
- wq_read_lock_irqsave(&q->lock, flags);
- __wake_up_common(q, mode, nr, 0);
- wq_read_unlock_irqrestore(&q->lock, flags);
- }
-}
+ unsigned long flags;
-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)
-{
- if (q) {
- unsigned long flags;
- wq_read_lock_irqsave(&q->lock, flags);
- __wake_up_common(q, mode, nr, 1);
- wq_read_unlock_irqrestore(&q->lock, flags);
- }
+ if (unlikely(!q))
+ return;
+
+ wq_read_lock_irqsave(&q->lock, flags);
+ __wake_up_common(q, mode, nr_exclusive);
+ wq_read_unlock_irqrestore(&q->lock, flags);
}
void complete(struct completion *x)
spin_lock_irqsave(&x->wait.lock, flags);
x->done++;
- __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, 0);
+ __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1);
spin_unlock_irqrestore(&x->wait.lock, flags);
}
return timeout;
}
+void scheduling_functions_end_here(void) { }
+
+#if CONFIG_SMP
+
/*
- * Change the current task's CPU affinity. Migrate the process to a
- * proper CPU and schedule away if the current CPU is removed from
- * the allowed bitmask.
+ * Change a given task's CPU affinity. Migrate the process to a
+ * proper CPU and schedule it away if the current CPU is removed
+ * from the allowed bitmask.
*/
void set_cpus_allowed(task_t *p, unsigned long new_mask)
{
+ unsigned long flags;
+ runqueue_t *rq;
+ int dest_cpu;
+
+ down(&p->migration_sem);
+ if (!list_empty(&p->migration_list))
+ BUG();
+
new_mask &= cpu_online_map;
if (!new_mask)
BUG();
- if (p != current)
- BUG();
+ rq = task_rq_lock(p, &flags);
p->cpus_allowed = new_mask;
/*
- * Can the task run on the current CPU? If not then
+ * Can the task run on the task's current CPU? If not then
* migrate the process off to a proper CPU.
*/
- if (new_mask & (1UL << smp_processor_id()))
- return;
-#if CONFIG_SMP
- current->state = TASK_UNINTERRUPTIBLE;
- smp_migrate_task(__ffs(new_mask), current);
+ if (new_mask & (1UL << p->thread_info->cpu)) {
+ task_rq_unlock(rq, &flags);
+ goto out;
+ }
+ /*
+ * We mark the process as nonrunnable, and kick it to
+ * schedule away from its current CPU. We also add
+ * the task to the migration queue and wake up the
+ * target CPU's migration thread, so that it can pick
+ * up this task and insert it into the local runqueue.
+ */
+ p->state = TASK_UNINTERRUPTIBLE;
+ kick_if_running(p);
+ task_rq_unlock(rq, &flags);
- schedule();
-#endif
+ dest_cpu = __ffs(new_mask);
+ rq = cpu_rq(dest_cpu);
+
+ spin_lock_irq(&rq->lock);
+ list_add(&p->migration_list, &rq->migration_queue);
+ spin_unlock_irq(&rq->lock);
+ wake_up_process(rq->migration_thread);
+
+ while (!((1UL << p->thread_info->cpu) & p->cpus_allowed) &&
+ (p->state != TASK_ZOMBIE))
+ yield();
+out:
+ up(&p->migration_sem);
}
-void scheduling_functions_end_here(void) { }
+#endif
void set_user_nice(task_t *p, long nice)
{
* We have to be careful, if called from sys_setpriority(),
* the task might be in the middle of scheduling on another CPU.
*/
- rq = lock_task_rq(p, &flags);
+ rq = task_rq_lock(p, &flags);
if (rt_task(p)) {
p->static_prio = NICE_TO_PRIO(nice);
goto out_unlock;
resched_task(rq->curr);
}
out_unlock:
- unlock_task_rq(rq, &flags);
+ task_rq_unlock(rq, &flags);
}
#ifndef __alpha__
* To be able to change p->policy safely, the apropriate
* runqueue lock must be held.
*/
- rq = lock_task_rq(p, &flags);
+ rq = task_rq_lock(p, &flags);
if (policy < 0)
policy = p->policy;
activate_task(p, task_rq(p));
out_unlock:
- unlock_task_rq(rq, &flags);
+ task_rq_unlock(rq, &flags);
out_unlock_tasklist:
read_unlock_irq(&tasklist_lock);
void __init init_idle(task_t *idle, int cpu)
{
- runqueue_t *idle_rq = cpu_rq(cpu), *rq = idle->array->rq;
+ runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(idle->thread_info->cpu);
unsigned long flags;
__save_flags(flags);
runqueue_t *rq = cpu_rq(i);
prio_array_t *array;
- rq->active = rq->arrays + 0;
+ rq->active = rq->arrays;
rq->expired = rq->arrays + 1;
spin_lock_init(&rq->lock);
+ INIT_LIST_HEAD(&rq->migration_queue);
for (j = 0; j < 2; j++) {
array = rq->arrays + j;
- array->rq = rq;
- array->lock = &rq->lock;
for (k = 0; k < MAX_PRIO; k++) {
INIT_LIST_HEAD(array->queue + k);
__clear_bit(k, array->bitmap);
atomic_inc(&init_mm.mm_count);
enter_lazy_tlb(&init_mm, current, smp_processor_id());
}
+
+#if CONFIG_SMP
+
+static volatile unsigned long migration_mask;
+
+static int migration_thread(void * unused)
+{
+ runqueue_t *rq;
+
+ daemonize();
+ sigfillset(¤t->blocked);
+ set_user_nice(current, -20);
+
+ /*
+ * We have to migrate manually - there is no migration thread
+ * to do this for us yet :-)
+ *
+ * We use the following property of the Linux scheduler. At
+ * this point no other task is running, so by keeping all
+ * migration threads running, the load-balancer will distribute
+ * them between all CPUs equally. At that point every migration
+ * task binds itself to the current CPU.
+ */
+
+ /* wait for all migration threads to start up. */
+ while (!migration_mask)
+ yield();
+
+ for (;;) {
+ preempt_disable();
+ if (test_and_clear_bit(smp_processor_id(), &migration_mask))
+ current->cpus_allowed = 1 << smp_processor_id();
+ if (test_thread_flag(TIF_NEED_RESCHED))
+ schedule();
+ if (!migration_mask)
+ break;
+ preempt_enable();
+ }
+ rq = this_rq();
+ rq->migration_thread = current;
+ preempt_enable();
+
+ sprintf(current->comm, "migration_CPU%d", smp_processor_id());
+
+ for (;;) {
+ struct list_head *head;
+ unsigned long flags;
+ task_t *p = NULL;
+
+ spin_lock_irqsave(&rq->lock, flags);
+ head = &rq->migration_queue;
+ if (list_empty(head)) {
+ current->state = TASK_UNINTERRUPTIBLE;
+ spin_unlock_irqrestore(&rq->lock, flags);
+ schedule();
+ continue;
+ }
+ p = list_entry(head->next, task_t, migration_list);
+ list_del_init(head->next);
+ spin_unlock_irqrestore(&rq->lock, flags);
+
+ for (;;) {
+ runqueue_t *rq2 = task_rq_lock(p, &flags);
+
+ if (!p->array) {
+ p->thread_info->cpu = smp_processor_id();
+ task_rq_unlock(rq2, &flags);
+ wake_up_process(p);
+ break;
+ }
+ if (p->state != TASK_UNINTERRUPTIBLE) {
+ p->state = TASK_UNINTERRUPTIBLE;
+ kick_if_running(p);
+ }
+ task_rq_unlock(rq2, &flags);
+ while ((p->state == TASK_UNINTERRUPTIBLE) && p->array) {
+ cpu_relax();
+ barrier();
+ }
+ }
+ }
+}
+
+void __init migration_init(void)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < smp_num_cpus; cpu++)
+ if (kernel_thread(migration_thread, NULL,
+ CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
+ BUG();
+
+ migration_mask = (1 << smp_num_cpus) -1;
+
+ for (cpu = 0; cpu < smp_num_cpus; cpu++)
+ while (!cpu_rq(cpu)->migration_thread)
+ yield();
+ if (migration_mask)
+ BUG();
+}
+#endif