sched: make posix-cpu-timers use CFS's accounting information
update the posix-cpu-timers code to use CFS's CPU accounting information.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 995eb40..3e7f189 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -482,7 +482,8 @@
* from jiffies_to_ns(utime + stime) if sched_clock uses something
* other than jiffies.)
*/
- unsigned long long sched_time;
+ unsigned long sched_time;
+ unsigned long long sum_sched_runtime;
/*
* We don't bother to synchronize most readers of this at all,
@@ -1308,7 +1309,7 @@
extern unsigned long long sched_clock(void);
extern unsigned long long
-current_sched_time(const struct task_struct *current_task);
+task_sched_runtime(struct task_struct *task);
/* sched_exec is called by processes performing an exec */
#ifdef CONFIG_SMP
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 1de710e1..b53c8fc 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -161,7 +161,7 @@
}
static inline unsigned long long sched_ns(struct task_struct *p)
{
- return (p == current) ? current_sched_time(p) : p->sched_time;
+ return task_sched_runtime(p);
}
int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
@@ -246,10 +246,10 @@
} while (t != p);
break;
case CPUCLOCK_SCHED:
- cpu->sched = p->signal->sched_time;
+ cpu->sched = p->signal->sum_sched_runtime;
/* Add in each other live thread. */
while ((t = next_thread(t)) != p) {
- cpu->sched += t->sched_time;
+ cpu->sched += t->se.sum_exec_runtime;
}
cpu->sched += sched_ns(p);
break;
@@ -422,7 +422,7 @@
*/
static void cleanup_timers(struct list_head *head,
cputime_t utime, cputime_t stime,
- unsigned long long sched_time)
+ unsigned long long sum_exec_runtime)
{
struct cpu_timer_list *timer, *next;
cputime_t ptime = cputime_add(utime, stime);
@@ -451,10 +451,10 @@
++head;
list_for_each_entry_safe(timer, next, head, entry) {
list_del_init(&timer->entry);
- if (timer->expires.sched < sched_time) {
+ if (timer->expires.sched < sum_exec_runtime) {
timer->expires.sched = 0;
} else {
- timer->expires.sched -= sched_time;
+ timer->expires.sched -= sum_exec_runtime;
}
}
}
@@ -467,7 +467,7 @@
void posix_cpu_timers_exit(struct task_struct *tsk)
{
cleanup_timers(tsk->cpu_timers,
- tsk->utime, tsk->stime, tsk->sched_time);
+ tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
}
void posix_cpu_timers_exit_group(struct task_struct *tsk)
@@ -475,7 +475,7 @@
cleanup_timers(tsk->signal->cpu_timers,
cputime_add(tsk->utime, tsk->signal->utime),
cputime_add(tsk->stime, tsk->signal->stime),
- tsk->sched_time + tsk->signal->sched_time);
+ tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime);
}
@@ -536,7 +536,7 @@
nsleft = max_t(unsigned long long, nsleft, 1);
do {
if (likely(!(t->flags & PF_EXITING))) {
- ns = t->sched_time + nsleft;
+ ns = t->se.sum_exec_runtime + nsleft;
if (t->it_sched_expires == 0 ||
t->it_sched_expires > ns) {
t->it_sched_expires = ns;
@@ -1004,7 +1004,7 @@
struct cpu_timer_list *t = list_first_entry(timers,
struct cpu_timer_list,
entry);
- if (!--maxfire || tsk->sched_time < t->expires.sched) {
+ if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
tsk->it_sched_expires = t->expires.sched;
break;
}
@@ -1024,7 +1024,7 @@
int maxfire;
struct signal_struct *const sig = tsk->signal;
cputime_t utime, stime, ptime, virt_expires, prof_expires;
- unsigned long long sched_time, sched_expires;
+ unsigned long long sum_sched_runtime, sched_expires;
struct task_struct *t;
struct list_head *timers = sig->cpu_timers;
@@ -1044,12 +1044,12 @@
*/
utime = sig->utime;
stime = sig->stime;
- sched_time = sig->sched_time;
+ sum_sched_runtime = sig->sum_sched_runtime;
t = tsk;
do {
utime = cputime_add(utime, t->utime);
stime = cputime_add(stime, t->stime);
- sched_time += t->sched_time;
+ sum_sched_runtime += t->se.sum_exec_runtime;
t = next_thread(t);
} while (t != tsk);
ptime = cputime_add(utime, stime);
@@ -1090,7 +1090,7 @@
struct cpu_timer_list *t = list_first_entry(timers,
struct cpu_timer_list,
entry);
- if (!--maxfire || sched_time < t->expires.sched) {
+ if (!--maxfire || sum_sched_runtime < t->expires.sched) {
sched_expires = t->expires.sched;
break;
}
@@ -1182,7 +1182,7 @@
virt_left = cputime_sub(virt_expires, utime);
virt_left = cputime_div_non_zero(virt_left, nthreads);
if (sched_expires) {
- sched_left = sched_expires - sched_time;
+ sched_left = sched_expires - sum_sched_runtime;
do_div(sched_left, nthreads);
sched_left = max_t(unsigned long long, sched_left, 1);
} else {
@@ -1208,7 +1208,7 @@
t->it_virt_expires = ticks;
}
- sched = t->sched_time + sched_left;
+ sched = t->se.sum_exec_runtime + sched_left;
if (sched_expires && (t->it_sched_expires == 0 ||
t->it_sched_expires > sched)) {
t->it_sched_expires = sched;
@@ -1300,7 +1300,7 @@
if (UNEXPIRED(prof) && UNEXPIRED(virt) &&
(tsk->it_sched_expires == 0 ||
- tsk->sched_time < tsk->it_sched_expires))
+ tsk->se.sum_exec_runtime < tsk->it_sched_expires))
return;
#undef UNEXPIRED
diff --git a/kernel/sched.c b/kernel/sched.c
index 29eb227..0333abd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3156,28 +3156,23 @@
EXPORT_PER_CPU_SYMBOL(kstat);
/*
- * This is called on clock ticks and on context switches.
- * Bank in p->sched_time the ns elapsed since the last tick or switch.
+ * Return p->sum_exec_runtime plus any more ns on the sched_clock
+ * that have not yet been banked in case the task is currently running.
*/
-static inline void
-update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
+unsigned long long task_sched_runtime(struct task_struct *p)
{
- p->sched_time += now - p->last_ran;
- p->last_ran = rq->most_recent_timestamp = now;
-}
-
-/*
- * Return current->sched_time plus any more ns on the sched_clock
- * that have not yet been banked.
- */
-unsigned long long current_sched_time(const struct task_struct *p)
-{
- unsigned long long ns;
unsigned long flags;
+ u64 ns, delta_exec;
+ struct rq *rq;
- local_irq_save(flags);
- ns = p->sched_time + sched_clock() - p->last_ran;
- local_irq_restore(flags);
+ rq = task_rq_lock(p, &flags);
+ ns = p->se.sum_exec_runtime;
+ if (rq->curr == p) {
+ delta_exec = rq_clock(rq) - p->se.exec_start;
+ if ((s64)delta_exec > 0)
+ ns += delta_exec;
+ }
+ task_rq_unlock(rq, &flags);
return ns;
}
@@ -3360,14 +3355,11 @@
*/
void scheduler_tick(void)
{
- unsigned long long now = sched_clock();
struct task_struct *p = current;
int cpu = smp_processor_id();
int idle_at_tick = idle_cpu(cpu);
struct rq *rq = cpu_rq(cpu);
- update_cpu_clock(p, rq, now);
-
if (!idle_at_tick)
task_running_tick(rq, p);
#ifdef CONFIG_SMP
@@ -3550,8 +3542,6 @@
clear_tsk_need_resched(prev);
rcu_qsctr_inc(task_cpu(prev));
- update_cpu_clock(prev, rq, now);
-
prev->sleep_avg -= run_time;
if ((long)prev->sleep_avg <= 0)
prev->sleep_avg = 0;