sched/nohz: Fix rq->cpu_load calculations some more

author Peter Zijlstra <a.p.zijlstra@chello.nl>

Thu, 17 May 2012 15:15:29 +0000 (17:15 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 20 Feb 2014 18:45:32 +0000 (10:45 -0800)
author Peter Zijlstra <a.p.zijlstra@chello.nl>
Thu, 17 May 2012 15:15:29 +0000 (17:15 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 20 Feb 2014 18:45:32 +0000 (10:45 -0800)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 1c2470de80522376abe17aeeac0504b945d96278..8cd5cb80223c762a58634e5cb50dd76822d6e29d 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -144,6 +144,7 @@ extern unsigned long this_cpu_load(void);
  
  
  extern void calc_global_load(unsigned long ticks);
+extern void update_cpu_load_nohz(void);
  
  extern unsigned long get_parent_ip(unsigned long addr);
  
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 410d78df23e44950694ba18cb79f863dae1fee64..94f132775d0510438a07ece6ff6ff20e27da41e8 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2649,25 +2649,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
         sched_avg_update(this_rq);
  }
  
+#ifdef CONFIG_NO_HZ
+/*
+ * There is no sane way to deal with nohz on smp when using jiffies because the
+ * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
+ * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
+ *
+ * Therefore we cannot use the delta approach from the regular tick since that
+ * would seriously skew the load calculation. However we'll make do for those
+ * updates happening while idle (nohz_idle_balance) or coming out of idle
+ * (tick_nohz_idle_exit).
+ *
+ * This means we might still be one tick off for nohz periods.
+ */
+
  /*
   * Called from nohz_idle_balance() to update the load ratings before doing the
   * idle balance.
   */
  void update_idle_cpu_load(struct rq *this_rq)
  {
-       unsigned long curr_jiffies = jiffies;
+       unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
         unsigned long load = this_rq->load.weight;
         unsigned long pending_updates;
  
         /*
-        * Bloody broken means of dealing with nohz, but better than nothing..
-        * jiffies is updated by one cpu, another cpu can drift wrt the jiffy
-        * update and see 0 difference the one time and 2 the next, even though
-        * we ticked at roughtly the same rate.
-        *
-        * Hence we only use this from nohz_idle_balance() and skip this
-        * nonsense when called from the scheduler_tick() since that's
-        * guaranteed a stable rate.
+        * bail if there's load or we're actually up-to-date.
          */
         if (load || curr_jiffies == this_rq->last_load_update_tick)
                 return;
@@ -2678,13 +2685,39 @@ void update_idle_cpu_load(struct rq *this_rq)
         __update_cpu_load(this_rq, load, pending_updates);
  }
  
+/*
+ * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
+ */
+void update_cpu_load_nohz(void)
+{
+       struct rq *this_rq = this_rq();
+       unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
+       unsigned long pending_updates;
+
+       if (curr_jiffies == this_rq->last_load_update_tick)
+               return;
+
+       raw_spin_lock(&this_rq->lock);
+       pending_updates = curr_jiffies - this_rq->last_load_update_tick;
+       if (pending_updates) {
+               this_rq->last_load_update_tick = curr_jiffies;
+               /*
+                * We were idle, this means load 0, the current load might be
+                * !0 due to remote wakeups and the sort.
+                */
+               __update_cpu_load(this_rq, 0, pending_updates);
+       }
+       raw_spin_unlock(&this_rq->lock);
+}
+#endif /* CONFIG_NO_HZ */
+
  /*
   * Called from scheduler_tick()
   */
  static void update_cpu_load_active(struct rq *this_rq)
  {
         /*
-        * See the mess in update_idle_cpu_load().
+        * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
          */
         this_rq->last_load_update_tick = jiffies;
         __update_cpu_load(this_rq, this_rq->load.weight, 1);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c

index a1e079536a7116dc03b75732420821844827ab10..638dadf6295fb9259751f6604cc7728463445065 100644 (file)
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -582,6 +582,7 @@ void tick_nohz_idle_exit(void)
         /* Update jiffies first */
         select_nohz_load_balancer(0);
         tick_do_update_jiffies64(now);
+       update_cpu_load_nohz();
  
  #ifndef CONFIG_VIRT_CPU_ACCOUNTING
         /*
author	Peter Zijlstra <a.p.zijlstra@chello.nl>
	Thu, 17 May 2012 15:15:29 +0000 (17:15 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 20 Feb 2014 18:45:32 +0000 (10:45 -0800)
include/linux/sched.h		patch \| blob \| history
kernel/sched/core.c		patch \| blob \| history
kernel/time/tick-sched.c		patch \| blob \| history