sched: avoid large irq-latencies in smp-balancing SMP balancing is done with IRQs disabled and can iterate the full rq. When rqs are large this can cause large irq-latencies. Limit the nr of iterations on each run. This fixes a scheduling latency regression reported by the -rt folks. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Steven Rostedt <rostedt@goodmis.org> Tested-by: Gregory Haskins <ghaskins@novell.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit: b82d9fdd848abfbe7263a4ecd9bbb55e575100a6 [log] [tgz]
author: Peter Zijlstra <a.p.zijlstra@chello.nl> Fri Nov 09 22:39:39 2007 +0100
committer: Ingo Molnar <mingo@elte.hu> Fri Nov 09 22:39:39 2007 +0100
tree: 7d4c525cc4bd633c84abcfd8d934c84e5dc9ce7c
parent: 3c90e6e99b08f01d5684a3a07cceae6a543e4fa8 [diff] [blame]
diff --git a/kernel/sched.c b/kernel/sched.c
index 2a107e4..e195a42 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c

@@ -472,6 +472,12 @@
 #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
 
 /*
+ * Number of tasks to iterate in a single balance run.
+ * Limited because this is done with IRQs disabled.
+ */
+const_debug unsigned int sysctl_sched_nr_migrate = 32;
+
+/*
  * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
  * clock constructed from sched_clock():
  */
@@ -2235,7 +2241,7 @@
 	      enum cpu_idle_type idle, int *all_pinned,
 	      int *this_best_prio, struct rq_iterator *iterator)
 {
-	int pulled = 0, pinned = 0, skip_for_load;
+	int loops = 0, pulled = 0, pinned = 0, skip_for_load;
 	struct task_struct *p;
 	long rem_load_move = max_load_move;
 
@@ -2249,10 +2255,10 @@
 	 */
 	p = iterator->start(iterator->arg);
 next:
-	if (!p)
+	if (!p || loops++ > sysctl_sched_nr_migrate)
 		goto out;
 	/*
-	 * To help distribute high priority tasks accross CPUs we don't
+	 * To help distribute high priority tasks across CPUs we don't
 	 * skip a task if it will be the highest priority task (i.e. smallest
 	 * prio value) on its new queue regardless of its load weight
 	 */
@@ -2269,8 +2275,7 @@
 	rem_load_move -= p->se.load.weight;
 
 	/*
-	 * We only want to steal up to the prescribed number of tasks
-	 * and the prescribed amount of weighted load.
+	 * We only want to steal up to the prescribed amount of weighted load.
 	 */
 	if (rem_load_move > 0) {
 		if (p->prio < *this_best_prio)
commit	b82d9fdd848abfbe7263a4ecd9bbb55e575100a6	[log] [tgz]
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	Fri Nov 09 22:39:39 2007 +0100
committer	Ingo Molnar <mingo@elte.hu>	Fri Nov 09 22:39:39 2007 +0100
tree	7d4c525cc4bd633c84abcfd8d934c84e5dc9ce7c
parent	3c90e6e99b08f01d5684a3a07cceae6a543e4fa8 [diff] [blame]