Merge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux

Pull turbostat update from Len Brown:
 "Updates to the turbostat utility.

  Just one kernel dependency in this batch -- added a #define to
  msr-index.h"

* 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux:
  tools/power turbostat: correct dumped pkg-cstate-limit value
  tools/power turbostat: calculate TSC frequency from CPUID(0x15) on SKL
  tools/power turbostat: correct DRAM RAPL units on recent Xeon processors
  tools/power turbostat: Initial Skylake support
  tools/power turbostat: Use $(CURDIR) instead of $(PWD) and add support for O= option in Makefile
  tools/power turbostat: modprobe msr, if needed
  tools/power turbostat: dump MSR_TURBO_RATIO_LIMIT2
  tools/power turbostat: use new MSR_TURBO_RATIO_LIMIT names
  x86 msr-index: define MSR_TURBO_RATIO_LIMIT,1,2
  tools/power turbostat: label base frequency
  tools/power turbostat: update PERF_LIMIT_REASONS decoding
  tools/power turbostat: simplify default output
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 1a4eae6..c469490 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -61,6 +61,9 @@
 #define MSR_OFFCORE_RSP_1		0x000001a7
 #define MSR_NHM_TURBO_RATIO_LIMIT	0x000001ad
 #define MSR_IVT_TURBO_RATIO_LIMIT	0x000001ae
+#define MSR_TURBO_RATIO_LIMIT		0x000001ad
+#define MSR_TURBO_RATIO_LIMIT1		0x000001ae
+#define MSR_TURBO_RATIO_LIMIT2		0x000001af
 
 #define MSR_LBR_SELECT			0x000001c8
 #define MSR_LBR_TOS			0x000001c9
@@ -165,6 +168,11 @@
 #define MSR_PP1_ENERGY_STATUS		0x00000641
 #define MSR_PP1_POLICY			0x00000642
 
+#define MSR_PKG_WEIGHTED_CORE_C0_RES	0x00000658
+#define MSR_PKG_ANY_CORE_C0_RES		0x00000659
+#define MSR_PKG_ANY_GFXE_C0_RES		0x0000065A
+#define MSR_PKG_BOTH_CORE_GFXE_C0_RES	0x0000065B
+
 #define MSR_CORE_C1_RES			0x00000660
 
 #define MSR_CC6_DEMOTION_POLICY_CONFIG	0x00000668
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index d1b3a36..4039854 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -1,8 +1,12 @@
 CC		= $(CROSS_COMPILE)gcc
-BUILD_OUTPUT	:= $(PWD)
+BUILD_OUTPUT	:= $(CURDIR)
 PREFIX		:= /usr
 DESTDIR		:=
 
+ifeq ("$(origin O)", "command line")
+	BUILD_OUTPUT := $(O)
+endif
+
 turbostat : turbostat.c
 CFLAGS +=	-Wall
 CFLAGS +=	-DMSRHEADER='"../../../../arch/x86/include/uapi/asm/msr-index.h"'
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index feea7ad..05b8fc3 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -20,9 +20,11 @@
 The second method is to omit the command,
 and turbostat displays statistics every 5 seconds.
 The 5-second interval can be changed using the --interval option.
-
+.PP
 Some information is not available on older processors.
 .SS Options
+Options can be specified with a single or double '-', and only as much of the option
+name as necessary to disambiguate it from others is necessary.  Note that options are case-sensitive.
 \fB--Counter MSR#\fP shows the delta of the specified 64-bit MSR counter.
 .PP
 \fB--counter MSR#\fP shows the delta of the specified 32-bit MSR counter.
@@ -55,16 +57,20 @@
 The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit,
 displays the statistics gathered since it was forked.
 .PP
-.SH FIELD DESCRIPTIONS
+.SH DEFAULT FIELD DESCRIPTIONS
 .nf
-\fBPackage\fP processor package number.
-\fBCore\fP processor core number.
-\fBCPU\fP Linux CPU (logical processor) number.
-Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology.
+\fBCPU\fP Linux CPU (logical processor) number.  Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
 \fBAVG_MHz\fP number of cycles executed divided by time elapsed.
 \fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state.
 \fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state).
 \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval.
+.fi
+.PP
+.SH DEBUG FIELD DESCRIPTIONS
+.nf
+\fBPackage\fP processor package number.
+\fBCore\fP processor core number.
+Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
 \fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states.
 \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
 \fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
@@ -81,63 +87,76 @@
 Without any parameters, turbostat displays statistics ever 5 seconds.
 (override interval with "-i sec" option, or specify a command
 for turbostat to fork).
+.nf
+[root@hsw]# ./turbostat
+     CPU Avg_MHz   %Busy Bzy_MHz TSC_MHz
+       -     488   12.51    3898    3498
+       0       0    0.01    3885    3498
+       4    3897   99.99    3898    3498
+       1       0    0.00    3861    3498
+       5       0    0.00    3882    3498
+       2       1    0.02    3894    3498
+       6       2    0.06    3898    3498
+       3       0    0.00    3849    3498
+       7       0    0.00    3877    3498
+
+.fi
+.SH DEBUG EXAMPLE
+The "--debug" option prints additional system information before measurements:
 
 The first row of statistics is a summary for the entire system.
 For residency % columns, the summary is a weighted average.
 For Temperature columns, the summary is the column maximum.
 For Watts columns, the summary is a system total.
 Subsequent rows show per-CPU statistics.
-
 .nf
-[root@ivy]# ./turbostat
-    Core     CPU Avg_MHz   %Busy Bzy_MHz TSC_MHz     SMI  CPU%c1  CPU%c3  CPU%c6  CPU%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt 
-       -       -       6    0.36    1596    3492       0    0.59    0.01   99.04    0.00      23      24   23.82    0.01   72.47    0.00    6.40    1.01    0.00
-       0       0       9    0.58    1596    3492       0    0.28    0.01   99.13    0.00      23      24   23.82    0.01   72.47    0.00    6.40    1.01    0.00
-       0       4       1    0.07    1596    3492       0    0.79
-       1       1      10    0.65    1596    3492       0    0.59    0.00   98.76    0.00      23
-       1       5       5    0.28    1596    3492       0    0.95
-       2       2      10    0.66    1596    3492       0    0.41    0.01   98.92    0.00      23
-       2       6       2    0.10    1597    3492       0    0.97
-       3       3       3    0.20    1596    3492       0    0.44    0.00   99.37    0.00      23
-       3       7       5    0.31    1596    3492       0    0.33
-.fi
-.SH DEBUG EXAMPLE
-The "--debug" option prints additional system information before measurements:
-
-.nf
-turbostat version 4.0 10-Feb, 2015 - Len Brown <lenb@kernel.org>
-CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9)
+turbostat version 4.1 10-Feb, 2015 - Len Brown <lenb@kernel.org>
+CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3c:3 (6:60:3)
 CPUID(6): APERF, DTS, PTM, EPB
-RAPL: 851 sec. Joule Counter Range, at 77 Watts
-cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300
-16 * 100 = 1600 MHz max efficiency
+RAPL: 3121 sec. Joule Counter Range, at 84 Watts
+cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300
+8 * 100 = 800 MHz max efficiency
 35 * 100 = 3500 MHz TSC frequency
-cpu0: MSR_IA32_POWER_CTL: 0x0014005d (C1E auto-promotion: DISabled)
-cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6n)
+cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled)
+cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0)
 cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727
 37 * 100 = 3700 MHz max turbo 4 active cores
 38 * 100 = 3800 MHz max turbo 3 active cores
 39 * 100 = 3900 MHz max turbo 2 active cores
 39 * 100 = 3900 MHz max turbo 1 active cores
 cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced)
-cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.)
-cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.)
-cpu0: MSR_PKG_POWER_LIMIT: 0x30000148268 (UNlocked)
-cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled)
-cpu0: PKG Limit #2: DISabled (96.000000 Watts, 0.000977* sec, clamp DISabled)
+cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Auto-HWP, Amps, MultiCoreTurbo, Transitions, )
+cpu0: MSR_GFX_PERF_LIMIT_REASONS, 0x00000000 (Active: ) (Logged: )
+cpu0: MSR_RING_PERF_LIMIT_REASONS, 0x0d000000 (Active: ) (Logged: Amps, PkgPwrL1, PkgPwrL2, )
+cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.)
+cpu0: MSR_PKG_POWER_INFO: 0x000002a0 (84 W TDP, RAPL 0 - 0 W, 0.000000 sec.)
+cpu0: MSR_PKG_POWER_LIMIT: 0x428348001a82a0 (UNlocked)
+cpu0: PKG Limit #1: ENabled (84.000000 Watts, 8.000000 sec, clamp DISabled)
+cpu0: PKG Limit #2: ENabled (105.000000 Watts, 0.002441* sec, clamp DISabled)
 cpu0: MSR_PP0_POLICY: 0
 cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked)
 cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
 cpu0: MSR_PP1_POLICY: 0
 cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked)
 cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
-cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C)
-cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C)
-cpu0: MSR_IA32_THERM_STATUS: 0x88580000 (17 C +/- 1)
-cpu1: MSR_IA32_THERM_STATUS: 0x885a0000 (15 C +/- 1)
-cpu2: MSR_IA32_THERM_STATUS: 0x88570000 (18 C +/- 1)
-cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1)
- ...
+cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C)
+cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x88340800 (48 C)
+cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1)
+cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1)
+cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1)
+cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1)
+    Core     CPU Avg_MHz   %Busy Bzy_MHz TSC_MHz     SMI  CPU%c1  CPU%c3  CPU%c6  CPU%c7 CoreTmp  PkgTmp PkgWatt CorWatt GFXWatt
+       -       -     493   12.64    3898    3498       0   12.64    0.00    0.00   74.72      47      47   21.62   13.74    0.00
+       0       0       4    0.11    3894    3498       0   99.89    0.00    0.00    0.00      47      47   21.62   13.74    0.00
+       0       4    3897   99.98    3898    3498       0    0.02
+       1       1       7    0.17    3887    3498       0    0.04    0.00    0.00   99.79      32
+       1       5       0    0.00    3885    3498       0    0.21
+       2       2      29    0.76    3895    3498       0    0.10    0.01    0.01   99.13      32
+       2       6       2    0.06    3896    3498       0    0.80
+       3       3       1    0.02    3832    3498       0    0.03    0.00    0.00   99.95      28
+       3       7       0    0.00    3879    3498       0    0.04
+^C
+
 .fi
 The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
 available at the minimum package voltage.  The \fBTSC frequency\fP is the base
@@ -147,6 +166,9 @@
 The remaining rows show what maximum turbo frequency is possible
 depending on the number of idle cores.  Note that not all information is
 available on all processors.
+.PP
+The --debug option adds additional columns to the measurement ouput, including CPU idle power-state residency processor temperature sensor readinds.
+See the field definitions above.
 .SH FORK EXAMPLE
 If turbostat is invoked with a command, it will fork that command
 and output the statistics gathered when the command exits.
@@ -154,27 +176,23 @@
 until ^C while the other CPUs are mostly idle:
 
 .nf
-root@ivy: turbostat cat /dev/zero > /dev/null
+root@hsw: turbostat cat /dev/zero > /dev/null
 ^C
-    Core     CPU Avg_MHz   %Busy Bzy_MHz TSC_MHz     SMI  CPU%c1  CPU%c3  CPU%c6  CPU%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt 
-       -       -     496   12.75    3886    3492       0   13.16    0.04   74.04    0.00      36      36    0.00    0.00    0.00    0.00   23.15   17.65    0.00
-       0       0      22    0.57    3830    3492       0    0.83    0.02   98.59    0.00      27      36    0.00    0.00    0.00    0.00   23.15   17.65    0.00
-       0       4       9    0.24    3829    3492       0    1.15
-       1       1       4    0.09    3783    3492       0   99.91    0.00    0.00    0.00      36
-       1       5    3880   99.82    3888    3492       0    0.18
-       2       2      17    0.44    3813    3492       0    0.77    0.04   98.75    0.00      28
-       2       6      12    0.32    3823    3492       0    0.89
-       3       3      16    0.43    3844    3492       0    0.63    0.11   98.84    0.00      30
-       3       7       4    0.11    3827    3492       0    0.94
-30.372243 sec
+     CPU Avg_MHz   %Busy Bzy_MHz TSC_MHz
+       -     482   12.51    3854    3498
+       0       0    0.01    1960    3498
+       4       0    0.00    2128    3498
+       1       0    0.00    3003    3498
+       5    3854   99.98    3855    3498
+       2       0    0.01    3504    3498
+       6       3    0.08    3884    3498
+       3       0    0.00    2553    3498
+       7       0    0.00    2126    3498
+10.783983 sec
 
 .fi
-Above the cycle soaker drives cpu5 up its 3.8 GHz turbo limit
-while the other processors are generally in various states of idle.
-
-Note that cpu1 and cpu5 are HT siblings within core1.
-As cpu5 is very busy, it prevents its sibling, cpu1,
-from entering a c-state deeper than c1.
+Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit.
+The first row shows the average MHz and %Busy across all the processors in the system.
 
 Note that the Avg_MHz column reflects the total number of cycles executed
 divided by the measurement interval.  If the %Busy column is 100%,
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 2d089ca..bac98ca 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -57,6 +57,7 @@
 unsigned int do_pc6;
 unsigned int do_pc7;
 unsigned int do_c8_c9_c10;
+unsigned int do_skl_residency;
 unsigned int do_slm_cstates;
 unsigned int use_c1_residency_msr;
 unsigned int has_aperf;
@@ -65,8 +66,6 @@
 unsigned int genuine_intel;
 unsigned int has_invariant_tsc;
 unsigned int do_nhm_platform_info;
-unsigned int do_nhm_turbo_ratio_limit;
-unsigned int do_ivt_turbo_ratio_limit;
 unsigned int extra_msr_offset32;
 unsigned int extra_msr_offset64;
 unsigned int extra_delta_offset32;
@@ -84,11 +83,14 @@
 unsigned int do_ptm;
 unsigned int tcc_activation_temp;
 unsigned int tcc_activation_temp_override;
-double rapl_power_units, rapl_energy_units, rapl_time_units;
+double rapl_power_units, rapl_time_units;
+double rapl_dram_energy_units, rapl_energy_units;
 double rapl_joule_counter_range;
 unsigned int do_core_perf_limit_reasons;
 unsigned int do_gfx_perf_limit_reasons;
 unsigned int do_ring_perf_limit_reasons;
+unsigned int crystal_hz;
+unsigned long long tsc_hz;
 
 #define RAPL_PKG		(1 << 0)
 					/* 0x610 MSR_PKG_POWER_LIMIT */
@@ -101,18 +103,18 @@
 #define RAPL_DRAM		(1 << 3)
 					/* 0x618 MSR_DRAM_POWER_LIMIT */
 					/* 0x619 MSR_DRAM_ENERGY_STATUS */
-					/* 0x61c MSR_DRAM_POWER_INFO */
 #define RAPL_DRAM_PERF_STATUS	(1 << 4)
 					/* 0x61b MSR_DRAM_PERF_STATUS */
+#define RAPL_DRAM_POWER_INFO	(1 << 5)
+					/* 0x61c MSR_DRAM_POWER_INFO */
 
-#define RAPL_CORES		(1 << 5)
+#define RAPL_CORES		(1 << 6)
 					/* 0x638 MSR_PP0_POWER_LIMIT */
 					/* 0x639 MSR_PP0_ENERGY_STATUS */
-#define RAPL_CORE_POLICY	(1 << 6)
+#define RAPL_CORE_POLICY	(1 << 7)
 					/* 0x63a MSR_PP0_POLICY */
 
-
-#define RAPL_GFX		(1 << 7)
+#define RAPL_GFX		(1 << 8)
 					/* 0x640 MSR_PP1_POWER_LIMIT */
 					/* 0x641 MSR_PP1_ENERGY_STATUS */
 					/* 0x642 MSR_PP1_POLICY */
@@ -159,6 +161,10 @@
 	unsigned long long pc8;
 	unsigned long long pc9;
 	unsigned long long pc10;
+	unsigned long long pkg_wtd_core_c0;
+	unsigned long long pkg_any_core_c0;
+	unsigned long long pkg_any_gfxe_c0;
+	unsigned long long pkg_both_core_gfxe_c0;
 	unsigned int package_id;
 	unsigned int energy_pkg;	/* MSR_PKG_ENERGY_STATUS */
 	unsigned int energy_dram;	/* MSR_DRAM_ENERGY_STATUS */
@@ -292,8 +298,7 @@
 	if (has_aperf)
 		outp += sprintf(outp, " Bzy_MHz");
 	outp += sprintf(outp, " TSC_MHz");
-	if (do_smi)
-		outp += sprintf(outp, "     SMI");
+
 	if (extra_delta_offset32)
 		outp += sprintf(outp, "  count 0x%03X", extra_delta_offset32);
 	if (extra_delta_offset64)
@@ -302,6 +307,13 @@
 		outp += sprintf(outp, "   MSR 0x%03X", extra_msr_offset32);
 	if (extra_msr_offset64)
 		outp += sprintf(outp, "           MSR 0x%03X", extra_msr_offset64);
+
+	if (!debug)
+		goto done;
+
+	if (do_smi)
+		outp += sprintf(outp, "     SMI");
+
 	if (do_nhm_cstates)
 		outp += sprintf(outp, "  CPU%%c1");
 	if (do_nhm_cstates && !do_slm_cstates)
@@ -316,6 +328,13 @@
 	if (do_ptm)
 		outp += sprintf(outp, "  PkgTmp");
 
+	if (do_skl_residency) {
+		outp += sprintf(outp, " Totl%%C0");
+		outp += sprintf(outp, "  Any%%C0");
+		outp += sprintf(outp, "  GFX%%C0");
+		outp += sprintf(outp, " CPUGFX%%");
+	}
+
 	if (do_pc2)
 		outp += sprintf(outp, " Pkg%%pc2");
 	if (do_pc3)
@@ -359,6 +378,7 @@
 		outp += sprintf(outp, "   time");
 
 	}
+    done:
 	outp += sprintf(outp, "\n");
 }
 
@@ -396,6 +416,12 @@
 
 	if (p) {
 		outp += sprintf(outp, "package: %d\n", p->package_id);
+
+		outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
+		outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
+		outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
+		outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
+
 		outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
 		if (do_pc3)
 			outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
@@ -487,10 +513,6 @@
 	/* TSC_MHz */
 	outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float);
 
-	/* SMI */
-	if (do_smi)
-		outp += sprintf(outp, "%8d", t->smi_count);
-
 	/* delta */
 	if (extra_delta_offset32)
 		outp += sprintf(outp, "  %11llu", t->extra_delta32);
@@ -506,6 +528,13 @@
 	if (extra_msr_offset64)
 		outp += sprintf(outp, "  0x%016llx", t->extra_msr64);
 
+	if (!debug)
+		goto done;
+
+	/* SMI */
+	if (do_smi)
+		outp += sprintf(outp, "%8d", t->smi_count);
+
 	if (do_nhm_cstates) {
 		if (!skip_c1)
 			outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc);
@@ -531,9 +560,18 @@
 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 		goto done;
 
+	/* PkgTmp */
 	if (do_ptm)
 		outp += sprintf(outp, "%8d", p->pkg_temp_c);
 
+	/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
+	if (do_skl_residency) {
+		outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
+		outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
+		outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
+		outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
+	}
+
 	if (do_pc2)
 		outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc);
 	if (do_pc3)
@@ -565,7 +603,7 @@
 		if (do_rapl & RAPL_GFX)
 			outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
 		if (do_rapl & RAPL_DRAM)
-			outp += sprintf(outp, fmt8, p->energy_dram * rapl_energy_units / interval_float);
+			outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
 		if (do_rapl & RAPL_PKG_PERF_STATUS)
 			outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
@@ -582,7 +620,7 @@
 					p->energy_gfx * rapl_energy_units);
 		if (do_rapl & RAPL_DRAM)
 			outp += sprintf(outp, fmt8,
-					p->energy_dram * rapl_energy_units);
+					p->energy_dram * rapl_dram_energy_units);
 		if (do_rapl & RAPL_PKG_PERF_STATUS)
 			outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
@@ -636,6 +674,13 @@
 void
 delta_package(struct pkg_data *new, struct pkg_data *old)
 {
+
+	if (do_skl_residency) {
+		old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
+		old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
+		old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
+		old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
+	}
 	old->pc2 = new->pc2 - old->pc2;
 	if (do_pc3)
 		old->pc3 = new->pc3 - old->pc3;
@@ -782,6 +827,11 @@
 	c->c7 = 0;
 	c->core_temp_c = 0;
 
+	p->pkg_wtd_core_c0 = 0;
+	p->pkg_any_core_c0 = 0;
+	p->pkg_any_gfxe_c0 = 0;
+	p->pkg_both_core_gfxe_c0 = 0;
+
 	p->pc2 = 0;
 	if (do_pc3)
 		p->pc3 = 0;
@@ -826,6 +876,13 @@
 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 		return 0;
 
+	if (do_skl_residency) {
+		average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
+		average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
+		average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
+		average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
+	}
+
 	average.packages.pc2 += p->pc2;
 	if (do_pc3)
 		average.packages.pc3 += p->pc3;
@@ -873,6 +930,13 @@
 	average.cores.c6 /= topo.num_cores;
 	average.cores.c7 /= topo.num_cores;
 
+	if (do_skl_residency) {
+		average.packages.pkg_wtd_core_c0 /= topo.num_packages;
+		average.packages.pkg_any_core_c0 /= topo.num_packages;
+		average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
+		average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
+	}
+
 	average.packages.pc2 /= topo.num_packages;
 	if (do_pc3)
 		average.packages.pc3 /= topo.num_packages;
@@ -979,6 +1043,16 @@
 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 		return 0;
 
+	if (do_skl_residency) {
+		if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
+			return -10;
+		if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
+			return -11;
+		if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
+			return -12;
+		if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
+			return -13;
+	}
 	if (do_pc3)
 		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
 			return -9;
@@ -1055,49 +1129,77 @@
 #define PCL_6R 9 /* PC6 Retention */
 #define PCL__7 10 /* PC7 */
 #define PCL_7S 11 /* PC7 Shrink */
-#define PCLUNL 12 /* Unlimited */
+#define PCL__8 12 /* PC8 */
+#define PCL__9 13 /* PC9 */
+#define PCLUNL 14 /* Unlimited */
 
 int pkg_cstate_limit = PCLUKN;
 char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
-	"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "unlimited"};
+	"pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
 
-int nhm_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL};
-int snb_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL};
-int hsw_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCLRSV, PCLUNL};
-int slv_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7};
-int amt_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
-int phi_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL};
+int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 
-void print_verbose_header(void)
+static void
+dump_nhm_platform_info(void)
 {
 	unsigned long long msr;
 	unsigned int ratio;
 
-	if (!do_nhm_platform_info)
-		return;
-
 	get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
 
 	fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
 
 	ratio = (msr >> 40) & 0xFF;
-	fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
+	fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n",
 		ratio, bclk, ratio * bclk);
 
 	ratio = (msr >> 8) & 0xFF;
-	fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
+	fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n",
 		ratio, bclk, ratio * bclk);
 
 	get_msr(0, MSR_IA32_POWER_CTL, &msr);
 	fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
 		msr, msr & 0x2 ? "EN" : "DIS");
 
-	if (!do_ivt_turbo_ratio_limit)
-		goto print_nhm_turbo_ratio_limits;
+	return;
+}
 
-	get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr);
+static void
+dump_hsw_turbo_ratio_limits(void)
+{
+	unsigned long long msr;
+	unsigned int ratio;
 
-	fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
+	get_msr(0, MSR_TURBO_RATIO_LIMIT2, &msr);
+
+	fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr);
+
+	ratio = (msr >> 8) & 0xFF;
+	if (ratio)
+		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
+			ratio, bclk, ratio * bclk);
+
+	ratio = (msr >> 0) & 0xFF;
+	if (ratio)
+		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
+			ratio, bclk, ratio * bclk);
+	return;
+}
+
+static void
+dump_ivt_turbo_ratio_limits(void)
+{
+	unsigned long long msr;
+	unsigned int ratio;
+
+	get_msr(0, MSR_TURBO_RATIO_LIMIT1, &msr);
+
+	fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr);
 
 	ratio = (msr >> 56) & 0xFF;
 	if (ratio)
@@ -1138,30 +1240,18 @@
 	if (ratio)
 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
 			ratio, bclk, ratio * bclk);
+	return;
+}
 
-print_nhm_turbo_ratio_limits:
-	get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+static void
+dump_nhm_turbo_ratio_limits(void)
+{
+	unsigned long long msr;
+	unsigned int ratio;
 
-#define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
-#define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
+	get_msr(0, MSR_TURBO_RATIO_LIMIT, &msr);
 
-	fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
-
-	fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
-		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
-		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
-		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
-		(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
-		(msr & (1 << 15)) ? "" : "UN",
-		(unsigned int)msr & 7,
-		pkg_cstate_limit_strings[pkg_cstate_limit]);
-
-	if (!do_nhm_turbo_ratio_limit)
-		return;
-
-	get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
-
-	fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
+	fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
 
 	ratio = (msr >> 56) & 0xFF;
 	if (ratio)
@@ -1202,7 +1292,30 @@
 	if (ratio)
 		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
 			ratio, bclk, ratio * bclk);
+	return;
+}
 
+static void
+dump_nhm_cst_cfg(void)
+{
+	unsigned long long msr;
+
+	get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+
+#define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
+#define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
+
+	fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
+
+	fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
+		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
+		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
+		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
+		(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
+		(msr & (1 << 15)) ? "" : "UN",
+		(unsigned int)msr & 7,
+		pkg_cstate_limit_strings[pkg_cstate_limit]);
+	return;
 }
 
 void free_all_buffers(void)
@@ -1483,7 +1596,8 @@
 	struct stat sb;
 
 	if (stat("/dev/cpu/0/msr", &sb))
-		err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
+ 		if (system("/sbin/modprobe msr > /dev/null 2>&1"))
+			err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
 }
 
 void check_permissions()
@@ -1573,6 +1687,8 @@
 	case 0x47:	/* BDW */
 	case 0x4F:	/* BDX */
 	case 0x56:	/* BDX-DE */
+	case 0x4E:	/* SKL */
+	case 0x5E:	/* SKL */
 		pkg_cstate_limits = hsw_pkg_cstate_limits;
 		break;
 	case 0x37:	/* BYT */
@@ -1590,7 +1706,7 @@
 	}
 	get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
 
-	pkg_cstate_limit = pkg_cstate_limits[msr & 0x7];
+	pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
 
 	return 1;
 }
@@ -1615,11 +1731,48 @@
 
 	switch (model) {
 	case 0x3E:	/* IVB Xeon */
+	case 0x3F:	/* HSW Xeon */
 		return 1;
 	default:
 		return 0;
 	}
 }
+int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+	if (!genuine_intel)
+		return 0;
+
+	if (family != 6)
+		return 0;
+
+	switch (model) {
+	case 0x3F:	/* HSW Xeon */
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static void
+dump_cstate_pstate_config_info(family, model)
+{
+	if (!do_nhm_platform_info)
+		return;
+
+	dump_nhm_platform_info();
+
+	if (has_hsw_turbo_ratio_limit(family, model))
+		dump_hsw_turbo_ratio_limits();
+
+	if (has_ivt_turbo_ratio_limit(family, model))
+		dump_ivt_turbo_ratio_limits();
+
+	if (has_nhm_turbo_ratio_limit(family, model))
+		dump_nhm_turbo_ratio_limits();
+
+	dump_nhm_cst_cfg();
+}
+
 
 /*
  * print_epb()
@@ -1690,35 +1843,35 @@
 		get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
 		fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
 		fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
-			(msr & 1 << 0) ? "PROCHOT, " : "",
-			(msr & 1 << 1) ? "ThermStatus, " : "",
-			(msr & 1 << 2) ? "bit2, " : "",
-			(msr & 1 << 4) ? "Graphics, " : "",
-			(msr & 1 << 5) ? "Auto-HWP, " : "",
-			(msr & 1 << 6) ? "VR-Therm, " : "",
-			(msr & 1 << 8) ? "Amps, " : "",
-			(msr & 1 << 9) ? "CorePwr, " : "",
-			(msr & 1 << 10) ? "PkgPwrL1, " : "",
-			(msr & 1 << 11) ? "PkgPwrL2, " : "",
-			(msr & 1 << 12) ? "MultiCoreTurbo, " : "",
-			(msr & 1 << 13) ? "Transitions, " : "",
+			(msr & 1 << 15) ? "bit15, " : "",
 			(msr & 1 << 14) ? "bit14, " : "",
-			(msr & 1 << 15) ? "bit15, " : "");
+			(msr & 1 << 13) ? "Transitions, " : "",
+			(msr & 1 << 12) ? "MultiCoreTurbo, " : "",
+			(msr & 1 << 11) ? "PkgPwrL2, " : "",
+			(msr & 1 << 10) ? "PkgPwrL1, " : "",
+			(msr & 1 << 9) ? "CorePwr, " : "",
+			(msr & 1 << 8) ? "Amps, " : "",
+			(msr & 1 << 6) ? "VR-Therm, " : "",
+			(msr & 1 << 5) ? "Auto-HWP, " : "",
+			(msr & 1 << 4) ? "Graphics, " : "",
+			(msr & 1 << 2) ? "bit2, " : "",
+			(msr & 1 << 1) ? "ThermStatus, " : "",
+			(msr & 1 << 0) ? "PROCHOT, " : "");
 		fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
-			(msr & 1 << 16) ? "PROCHOT, " : "",
-			(msr & 1 << 17) ? "ThermStatus, " : "",
-			(msr & 1 << 18) ? "bit18, " : "",
-			(msr & 1 << 20) ? "Graphics, " : "",
-			(msr & 1 << 21) ? "Auto-HWP, " : "",
-			(msr & 1 << 22) ? "VR-Therm, " : "",
-			(msr & 1 << 24) ? "Amps, " : "",
-			(msr & 1 << 25) ? "CorePwr, " : "",
-			(msr & 1 << 26) ? "PkgPwrL1, " : "",
-			(msr & 1 << 27) ? "PkgPwrL2, " : "",
-			(msr & 1 << 28) ? "MultiCoreTurbo, " : "",
-			(msr & 1 << 29) ? "Transitions, " : "",
+			(msr & 1 << 31) ? "bit31, " : "",
 			(msr & 1 << 30) ? "bit30, " : "",
-			(msr & 1 << 31) ? "bit31, " : "");
+			(msr & 1 << 29) ? "Transitions, " : "",
+			(msr & 1 << 28) ? "MultiCoreTurbo, " : "",
+			(msr & 1 << 27) ? "PkgPwrL2, " : "",
+			(msr & 1 << 26) ? "PkgPwrL1, " : "",
+			(msr & 1 << 25) ? "CorePwr, " : "",
+			(msr & 1 << 24) ? "Amps, " : "",
+			(msr & 1 << 22) ? "VR-Therm, " : "",
+			(msr & 1 << 21) ? "Auto-HWP, " : "",
+			(msr & 1 << 20) ? "Graphics, " : "",
+			(msr & 1 << 18) ? "bit18, " : "",
+			(msr & 1 << 17) ? "ThermStatus, " : "",
+			(msr & 1 << 16) ? "PROCHOT, " : "");
 
 	}
 	if (do_gfx_perf_limit_reasons) {
@@ -1784,6 +1937,25 @@
 	}
 }
 
+/*
+ * rapl_dram_energy_units_probe()
+ * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
+ */
+static double
+rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
+{
+	/* only called for genuine_intel, family 6 */
+
+	switch (model) {
+	case 0x3F:	/* HSX */
+	case 0x4F:	/* BDX */
+	case 0x56:	/* BDX-DE */
+		return (rapl_dram_energy_units = 15.3 / 1000000);
+	default:
+		return (rapl_energy_units);
+	}
+}
+
 
 /*
  * rapl_probe()
@@ -1812,14 +1984,18 @@
 	case 0x47:	/* BDW */
 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
 		break;
+	case 0x4E:	/* SKL */
+	case 0x5E:	/* SKL */
+		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+		break;
 	case 0x3F:	/* HSX */
 	case 0x4F:	/* BDX */
 	case 0x56:	/* BDX-DE */
-		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
 		break;
 	case 0x2D:
 	case 0x3E:
-		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
+		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
 		break;
 	case 0x37:	/* BYT */
 	case 0x4D:	/* AVN */
@@ -1839,6 +2015,8 @@
 	else
 		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
 
+	rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
+
 	time_unit = msr >> 16 & 0xF;
 	if (time_unit == 0)
 		time_unit = 0xA;
@@ -2009,19 +2187,18 @@
 			((msr >> 48) & 1) ? "EN" : "DIS");
 	}
 
-	if (do_rapl & RAPL_DRAM) {
+	if (do_rapl & RAPL_DRAM_POWER_INFO) {
 		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
                 	return -6;
 
-
 		fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
 			cpu, msr,
 			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
 			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
-
-
+	}
+	if (do_rapl & RAPL_DRAM) {
 		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
 			return -9;
 		fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
@@ -2090,6 +2267,8 @@
 	case 0x47:	/* BDW */
 	case 0x4F:	/* BDX */
 	case 0x56:	/* BDX-DE */
+	case 0x4E:	/* SKL */
+	case 0x5E:	/* SKL */
 		return 1;
 	}
 	return 0;
@@ -2110,11 +2289,35 @@
 	switch (model) {
 	case 0x45:	/* HSW */
 	case 0x3D:	/* BDW */
+	case 0x4E:	/* SKL */
+	case 0x5E:	/* SKL */
 		return 1;
 	}
 	return 0;
 }
 
+/*
+ * SKL adds support for additional MSRS:
+ *
+ * MSR_PKG_WEIGHTED_CORE_C0_RES    0x00000658
+ * MSR_PKG_ANY_CORE_C0_RES         0x00000659
+ * MSR_PKG_ANY_GFXE_C0_RES         0x0000065A
+ * MSR_PKG_BOTH_CORE_GFXE_C0_RES   0x0000065B
+ */
+int has_skl_msrs(unsigned int family, unsigned int model)
+{
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case 0x4E:	/* SKL */
+	case 0x5E:	/* SKL */
+		return 1;
+	}
+	return 0;
+}
+
+
 
 int is_slm(unsigned int family, unsigned int model)
 {
@@ -2228,7 +2431,7 @@
 
 	return 0;
 }
-void check_cpuid()
+void process_cpuid()
 {
 	unsigned int eax, ebx, ecx, edx, max_level;
 	unsigned int fms, family, model, stepping;
@@ -2294,6 +2497,41 @@
 			do_ptm ? "" : "No ",
 			has_epb ? "" : "No ");
 
+	if (max_level > 0x15) {
+		unsigned int eax_crystal;
+		unsigned int ebx_tsc;
+
+		/*
+		 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
+		 */
+		eax_crystal = ebx_tsc = crystal_hz = edx = 0;
+		__get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx);
+
+		if (ebx_tsc != 0) {
+
+			if (debug && (ebx != 0))
+				fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
+					eax_crystal, ebx_tsc, crystal_hz);
+
+			if (crystal_hz == 0)
+				switch(model) {
+				case 0x4E:	/* SKL */
+				case 0x5E:	/* SKL */
+					crystal_hz = 24000000;	/* 24 MHz */
+					break;
+				default:
+					crystal_hz = 0;
+			}
+
+			if (crystal_hz) {
+				tsc_hz =  (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
+				if (debug)
+					fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
+						tsc_hz / 1000000, crystal_hz, ebx_tsc,  eax_crystal);
+			}
+		}
+	}
+
 	do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model);
 	do_snb_cstates = has_snb_msrs(family, model);
 	do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
@@ -2301,18 +2539,19 @@
 	do_pc6 = (pkg_cstate_limit >= PCL__6);
 	do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
 	do_c8_c9_c10 = has_hsw_msrs(family, model);
+	do_skl_residency = has_skl_msrs(family, model);
 	do_slm_cstates = is_slm(family, model);
 	bclk = discover_bclk(family, model);
 
-	do_nhm_turbo_ratio_limit = do_nhm_platform_info && has_nhm_turbo_ratio_limit(family, model);
-	do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model);
 	rapl_probe(family, model);
 	perf_limit_reasons_probe(family, model);
 
+	if (debug)
+		dump_cstate_pstate_config_info();
+
 	return;
 }
 
-
 void help()
 {
 	fprintf(stderr,
@@ -2428,14 +2667,14 @@
 	if (debug > 1)
 		fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n",
 			max_core_id, topo.num_cores_per_pkg);
-	if (!summary_only && topo.num_cores_per_pkg > 1)
+	if (debug && !summary_only && topo.num_cores_per_pkg > 1)
 		show_core = 1;
 
 	topo.num_packages = max_package_id + 1;
 	if (debug > 1)
 		fprintf(stderr, "max_package_id %d, sizing for %d packages\n",
 			max_package_id, topo.num_packages);
-	if (!summary_only && topo.num_packages > 1)
+	if (debug && !summary_only && topo.num_packages > 1)
 		show_pkg = 1;
 
 	topo.num_threads_per_core = max_siblings;
@@ -2550,14 +2789,11 @@
 {
 	check_dev_msr();
 	check_permissions();
-	check_cpuid();
+	process_cpuid();
 
 	setup_all_buffers();
 
 	if (debug)
-		print_verbose_header();
-
-	if (debug)
 		for_all_cpus(print_epb, ODD_COUNTERS);
 
 	if (debug)
@@ -2634,7 +2870,7 @@
 }
 
 void print_version() {
-	fprintf(stderr, "turbostat version 4.1 10-Feb, 2015"
+	fprintf(stderr, "turbostat version 4.5 2 Apr, 2015"
 		" - Len Brown <lenb@kernel.org>\n");
 }