Merge branches 'tracing/ftrace', 'tracing/hw-branch-tracing' and 'tracing/ring-buffer'; commit 'v2.6.28' into tracing/core
diff --git a/Makefile b/Makefile
index 4c8d797..71e98e9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 28
-EXTRAVERSION = -rc9
+EXTRAVERSION =
 NAME = Erotic Pickled Herring
 
 # *DOCUMENTATION*
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index cfdf8c2..ea408dc 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -80,7 +80,6 @@
 #define X86_FEATURE_UP		(3*32+ 9) /* smp kernel running on up */
 #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */
 #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */
 #define X86_FEATURE_PEBS	(3*32+12) /* Precise-Event Based Sampling */
 #define X86_FEATURE_BTS		(3*32+13) /* Branch Trace Store */
 #define X86_FEATURE_SYSCALL32	(3*32+14) /* "" syscall in ia32 userspace */
@@ -92,6 +91,8 @@
 #define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */
 #define X86_FEATURE_AMDC1E	(3*32+21) /* AMD C1E detected */
 #define X86_FEATURE_XTOPOLOGY	(3*32+22) /* cpu topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC	(3*32+24) /* TSC does not stop in C states */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
@@ -117,6 +118,7 @@
 #define X86_FEATURE_XSAVE	(4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
 #define X86_FEATURE_OSXSAVE	(4*32+27) /* "" XSAVE enabled in the OS */
 #define X86_FEATURE_AVX		(4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_HYPERVISOR	(4*32+31) /* Running on a hypervisor */
 
 /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
 #define X86_FEATURE_XSTORE	(5*32+ 2) /* "rng" RNG present (xstore) */
@@ -237,6 +239,7 @@
 #define cpu_has_xmm4_2		boot_cpu_has(X86_FEATURE_XMM4_2)
 #define cpu_has_x2apic		boot_cpu_has(X86_FEATURE_X2APIC)
 #define cpu_has_xsave		boot_cpu_has(X86_FEATURE_XSAVE)
+#define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
 
 #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
 # define cpu_has_invlpg		1
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index c2a812e..b8a1799 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -85,7 +85,8 @@
 	asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
 }
 
-static inline int native_write_msr_safe(unsigned int msr,
+/* Can be uninlined because referenced by paravirt */
+notrace static inline int native_write_msr_safe(unsigned int msr,
 					unsigned low, unsigned high)
 {
 	int err;
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 82ec607..4ae495a 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -2,6 +2,11 @@
 # Makefile for x86-compatible CPU details and quirks
 #
 
+# Don't trace early stages of a secondary CPU boot
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_common.o = -pg
+endif
+
 obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
 obj-y			+= proc.o capflags.o powerflags.o common.o
 
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index ef8f831..2cf2363 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -120,9 +120,17 @@
 	c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
 						 & core_select_mask;
 	c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width);
+	/*
+	 * Reinit the apicid, now that we have extended initial_apicid.
+	 */
+	c->apicid = phys_pkg_id(c->initial_apicid, 0);
 #else
 	c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
 	c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
+	/*
+	 * Reinit the apicid, now that we have extended initial_apicid.
+	 */
+	c->apicid = phys_pkg_id(0);
 #endif
 	c->x86_max_cores = (core_level_siblings / smp_num_siblings);
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 8f1e31d..7c878f6 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -283,9 +283,14 @@
 {
 	early_init_amd_mc(c);
 
-	/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
-	if (c->x86_power & (1<<8))
+	/*
+	 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
+	 * with P/T states and does not stop in deep C-states
+	 */
+	if (c->x86_power & (1 << 8)) {
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+	}
 
 #ifdef CONFIG_X86_64
 	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cd413d9..8ea6929 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -40,6 +40,16 @@
 	if (c->x86 == 15 && c->x86_cache_alignment == 64)
 		c->x86_cache_alignment = 128;
 #endif
+
+	/*
+	 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
+	 * with P/T states and does not stop in deep C-states
+	 */
+	if (c->x86_power & (1 << 8)) {
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+	}
+
 }
 
 #ifdef CONFIG_X86_32
@@ -241,6 +251,13 @@
 
 	intel_workarounds(c);
 
+	/*
+	 * Detect the extended topology information if available. This
+	 * will reinitialise the initial_apicid which will be used
+	 * in init_intel_cacheinfo()
+	 */
+	detect_extended_topology(c);
+
 	l2 = init_intel_cacheinfo(c);
 	if (c->cpuid_level > 9) {
 		unsigned eax = cpuid_eax(10);
@@ -308,7 +325,6 @@
 		set_cpu_cap(c, X86_FEATURE_P3);
 #endif
 
-	detect_extended_topology(c);
 	if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
 		/*
 		 * let's use the legacy cpuid vector 0x1 and 0x4 for topology
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c27af49..cff9a50 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -286,7 +286,7 @@
 		rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
 		if (lo & K8_INTP_C1E_ACTIVE_MASK) {
 			c1e_detected = 1;
-			if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+			if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 				mark_tsc_unstable("TSC halt in AMD C1E");
 			printk(KERN_INFO "System has AMD C1E enabled\n");
 			set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f71f96f..f6174d2 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -287,7 +287,7 @@
 /*
  * Activate a secondary processor.
  */
-static void __cpuinit start_secondary(void *unused)
+notrace static void __cpuinit start_secondary(void *unused)
 {
 	/*
 	 * Don't put *anything* before cpu_init(), SMP booting is too
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 5f8d746..38aca04 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -374,15 +374,15 @@
 {
 	switch (boot_cpu_data.x86_vendor) {
 	case X86_VENDOR_AMD:
+	case X86_VENDOR_INTEL:
 		/*
 		 * AMD Fam10h TSC will tick in all
 		 * C/P/S0/S1 states when this bit is set.
 		 */
-		if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+		if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 			return 0;
+
 		/*FALL THROUGH*/
-	case X86_VENDOR_INTEL:
-		/* Several cases known where TSC halts in C2 too */
 	default:
 		return state > ACPI_STATE_C1;
 	}
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 04b52e6..677432b 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -303,7 +303,7 @@
 static inline void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 static inline int
-ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0)));
+ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
 
 static inline void tracing_start(void) { }
 static inline void tracing_stop(void) { }
diff --git a/include/trace/sched.h b/include/trace/sched.h
index bc4c9ea..0d81098 100644
--- a/include/trace/sched.h
+++ b/include/trace/sched.h
@@ -17,8 +17,8 @@
 		TPARGS(rq, p));
 
 DECLARE_TRACE(sched_wakeup,
-	TPPROTO(struct rq *rq, struct task_struct *p),
-		TPARGS(rq, p));
+	TPPROTO(struct rq *rq, struct task_struct *p, int success),
+		TPARGS(rq, p, success));
 
 DECLARE_TRACE(sched_wakeup_new,
 	TPPROTO(struct rq *rq, struct task_struct *p, int success),
diff --git a/kernel/sched.c b/kernel/sched.c
index ceda579..dcb39bc 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2324,7 +2324,7 @@
 	success = 1;
 
 out_running:
-	trace_sched_wakeup(rq, p);
+	trace_sched_wakeup(rq, p, success);
 	check_preempt_curr(rq, p, sync);
 
 	p->state = TASK_RUNNING;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bb6922a..76f34c0 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -838,6 +838,7 @@
 	 * back to us). This allows us to do a simple loop to
 	 * assign the commit to the tail.
 	 */
+ again:
 	while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
 		cpu_buffer->commit_page->page->commit =
 			cpu_buffer->commit_page->write;
@@ -853,6 +854,17 @@
 			cpu_buffer->commit_page->write;
 		barrier();
 	}
+
+	/* again, keep gcc from optimizing */
+	barrier();
+
+	/*
+	 * If an interrupt came in just after the first while loop
+	 * and pushed the tail page forward, we will be left with
+	 * a dangling commit that will never go forward.
+	 */
+	if (unlikely(cpu_buffer->commit_page != cpu_buffer->tail_page))
+		goto again;
 }
 
 static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
@@ -950,12 +962,15 @@
 __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 		  unsigned type, unsigned long length, u64 *ts)
 {
-	struct buffer_page *tail_page, *head_page, *reader_page;
+	struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
 	unsigned long tail, write;
 	struct ring_buffer *buffer = cpu_buffer->buffer;
 	struct ring_buffer_event *event;
 	unsigned long flags;
 
+	commit_page = cpu_buffer->commit_page;
+	/* we just need to protect against interrupts */
+	barrier();
 	tail_page = cpu_buffer->tail_page;
 	write = local_add_return(length, &tail_page->write);
 	tail = write - length;
@@ -981,7 +996,7 @@
 		 * it all the way around the buffer, bail, and warn
 		 * about it.
 		 */
-		if (unlikely(next_page == cpu_buffer->commit_page)) {
+		if (unlikely(next_page == commit_page)) {
 			WARN_ON_ONCE(1);
 			goto out_unlock;
 		}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0eb6d48..79db26e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -679,6 +679,16 @@
 	ftrace_enable_cpu();
 }
 
+void tracing_reset_online_cpus(struct trace_array *tr)
+{
+	int cpu;
+
+	tr->time_start = ftrace_now(tr->cpu);
+
+	for_each_online_cpu(cpu)
+		tracing_reset(tr, cpu);
+}
+
 #define SAVED_CMDLINES 128
 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fc75dce..cc7a4f8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -374,6 +374,7 @@
 int tracing_is_enabled(void);
 void trace_wake_up(void);
 void tracing_reset(struct trace_array *tr, int cpu);
+void tracing_reset_online_cpus(struct trace_array *tr);
 int tracing_open_generic(struct inode *inode, struct file *filp);
 struct dentry *tracing_init_dentry(void);
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index a4fa2c5..3ccebde 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -37,16 +37,6 @@
 		tracing_stop_sched_switch_record();
 }
 
-static void reset_boot_trace(struct trace_array *tr)
-{
-	int cpu;
-
-	tr->time_start = ftrace_now(tr->cpu);
-
-	for_each_online_cpu(cpu)
-		tracing_reset(tr, cpu);
-}
-
 static int boot_trace_init(struct trace_array *tr)
 {
 	int cpu;
@@ -130,7 +120,7 @@
 {
 	.name		= "initcall",
 	.init		= boot_trace_init,
-	.reset		= reset_boot_trace,
+	.reset		= tracing_reset_online_cpus,
 	.print_line	= initcall_print_line,
 };
 
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index e74f6d0..9236d7e 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -16,20 +16,10 @@
 
 #include "trace.h"
 
-static void function_reset(struct trace_array *tr)
-{
-	int cpu;
-
-	tr->time_start = ftrace_now(tr->cpu);
-
-	for_each_online_cpu(cpu)
-		tracing_reset(tr, cpu);
-}
-
 static void start_function_trace(struct trace_array *tr)
 {
 	tr->cpu = get_cpu();
-	function_reset(tr);
+	tracing_reset_online_cpus(tr);
 	put_cpu();
 
 	tracing_start_cmdline_record();
@@ -55,7 +45,7 @@
 
 static void function_trace_start(struct trace_array *tr)
 {
-	function_reset(tr);
+	tracing_reset_online_cpus(tr);
 }
 
 static struct tracer function_trace __read_mostly =
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index ee29e01..b6a3e20 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -25,16 +25,6 @@
 #define this_buffer per_cpu(buffer, smp_processor_id())
 
 
-static void bts_trace_reset(struct trace_array *tr)
-{
-	int cpu;
-
-	tr->time_start = ftrace_now(tr->cpu);
-
-	for_each_online_cpu(cpu)
-		tracing_reset(tr, cpu);
-}
-
 static void bts_trace_start_cpu(void *arg)
 {
 	if (this_tracer)
@@ -54,7 +44,7 @@
 {
 	int cpu;
 
-	bts_trace_reset(tr);
+	tracing_reset_online_cpus(tr);
 
 	for_each_cpu_mask(cpu, cpu_possible_map)
 		smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
@@ -78,7 +68,7 @@
 
 static int bts_trace_init(struct trace_array *tr)
 {
-	bts_trace_reset(tr);
+	tracing_reset_online_cpus(tr);
 	bts_trace_start(tr);
 
 	return 0;
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 2fb6da6..fffcb06 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -22,14 +22,10 @@
 
 static void mmio_reset_data(struct trace_array *tr)
 {
-	int cpu;
-
 	overrun_detected = false;
 	prev_overruns = 0;
-	tr->time_start = ftrace_now(tr->cpu);
 
-	for_each_online_cpu(cpu)
-		tracing_reset(tr, cpu);
+	tracing_reset_online_cpus(tr);
 }
 
 static int mmio_trace_init(struct trace_array *tr)
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 781d72e..df175cb 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -49,7 +49,7 @@
 }
 
 static void
-probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
+probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
 {
 	struct trace_array_cpu *data;
 	unsigned long flags;
@@ -72,16 +72,6 @@
 	local_irq_restore(flags);
 }
 
-static void sched_switch_reset(struct trace_array *tr)
-{
-	int cpu;
-
-	tr->time_start = ftrace_now(tr->cpu);
-
-	for_each_online_cpu(cpu)
-		tracing_reset(tr, cpu);
-}
-
 static int tracing_sched_register(void)
 {
 	int ret;
@@ -197,7 +187,7 @@
 
 static void start_sched_trace(struct trace_array *tr)
 {
-	sched_switch_reset(tr);
+	tracing_reset_online_cpus(tr);
 	tracing_start_sched_switch_record();
 }
 
@@ -221,7 +211,7 @@
 
 static void sched_switch_trace_start(struct trace_array *tr)
 {
-	sched_switch_reset(tr);
+	tracing_reset_online_cpus(tr);
 	tracing_start_sched_switch();
 }
 
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 0067b49..43586b6 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -211,7 +211,7 @@
 }
 
 static void
-probe_wakeup(struct rq *rq, struct task_struct *p)
+probe_wakeup(struct rq *rq, struct task_struct *p, int success)
 {
 	int cpu = smp_processor_id();
 	unsigned long flags;
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 54960ed..01becf1 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -234,20 +234,10 @@
 		stop_stack_timer(cpu);
 }
 
-static void stack_reset(struct trace_array *tr)
-{
-	int cpu;
-
-	tr->time_start = ftrace_now(tr->cpu);
-
-	for_each_online_cpu(cpu)
-		tracing_reset(tr, cpu);
-}
-
 static void start_stack_trace(struct trace_array *tr)
 {
 	mutex_lock(&sample_timer_lock);
-	stack_reset(tr);
+	tracing_reset_online_cpus(tr);
 	start_stack_timers();
 	tracer_enabled = 1;
 	mutex_unlock(&sample_timer_lock);