Merge tag 'arm64-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/cmarinas/linux-aarch64

Pull ARM64 updates from Catalin Marinas:

 - Generic execve, kernel_thread, fork/vfork/clone.

 - Preparatory patches for KVM support (initialising EL2 mode for later
   installing KVM support, hypervisor stub).

 - Signal handling corner case fix (alternative signal stack set up for
   a SEGV handler, which is raised in response to RLIMIT_STACK being
   reached).

 - Sub-nanosecond timer error fix.

* tag 'arm64-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/cmarinas/linux-aarch64: (30 commits)
  arm64: Update the MAINTAINERS entry
  arm64: compat for clock_adjtime(2) is miswired
  arm64: move FP-SIMD save/restore code to a macro
  arm64: hyp: initialize vttbr_el2 to zero
  arm64: add hypervisor stub
  arm64: record boot mode when entering the kernel
  arm64: move vector entry macro to assembler.h
  arm64: add AArch32 execution modes to ptrace.h
  arm64: expand register mapping between AArch32 and AArch64
  arm64: generic timer: use virtual counter instead of physical at EL0
  arm64: vdso: defer shifting of nanosecond component of timespec
  arm64: vdso: rework __do_get_tspec register allocation and return shift
  arm64: vdso: check sequence counter even for coarse realtime operations
  arm64: vdso: fix clocksource mask when extracting bottom 56 bits
  ARM64: Remove incorrect Kconfig symbol HAVE_SPARSE_IRQ
  Documentation: Fixes a word in Documentation/arm64/memory.txt
  arm64: Make !dirty ptes read-only
  arm64: Convert empty flush_cache_{mm,page} functions to static inline
  arm64: signal: let the compiler inline compat_get_sigframe
  arm64: signal: return struct rt_sigframe from get_sigframe
  ...

Conflicts:
	arch/arm64/include/asm/unistd32.h
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index 4110cca..d758702 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -41,7 +41,7 @@
 
 ffffffbffc000000	ffffffbfffffffff	  64MB		modules
 
-ffffffc000000000	ffffffffffffffff	 256GB		memory
+ffffffc000000000	ffffffffffffffff	 256GB		kernel logical memory map
 
 
 Translation table lookup with 4KB pages:
diff --git a/MAINTAINERS b/MAINTAINERS
index 23b9584..ae56d94 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1248,9 +1248,11 @@
 
 ARM64 PORT (AARCH64 ARCHITECTURE)
 M:	Catalin Marinas <catalin.marinas@arm.com>
+M:	Will Deacon <will.deacon@arm.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm64/
+F:	Documentation/arm64/
 
 ASC7621 HARDWARE MONITOR DRIVER
 M:	George Joseph <george.joseph@fairview5.com>
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 15ac18a..2adf340 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2,11 +2,14 @@
 	def_bool y
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
+	select COMMON_CLK
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_HARDIRQS_NO_DEPRECATED
 	select GENERIC_IOMAP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
+	select GENERIC_KERNEL_EXECVE
+	select GENERIC_KERNEL_THREAD
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
 	select HARDIRQS_SW_RESEND
@@ -21,7 +24,6 @@
 	select HAVE_IRQ_WORK
 	select HAVE_MEMBLOCK
 	select HAVE_PERF_EVENTS
-	select HAVE_SPARSE_IRQ
 	select IRQ_DOMAIN
 	select MODULES_USE_ELF_RELA
 	select NO_BOOTMEM
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 6e9ca46..14a9d5a 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -3,6 +3,7 @@
 generic-y += bug.h
 generic-y += bugs.h
 generic-y += checksum.h
+generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += current.h
 generic-y += delay.h
diff --git a/arch/arm64/include/asm/arm_generic.h b/arch/arm64/include/asm/arm_generic.h
index e4cec9d..df2aeb8 100644
--- a/arch/arm64/include/asm/arm_generic.h
+++ b/arch/arm64/include/asm/arm_generic.h
@@ -70,12 +70,12 @@
 {
 	u32 cntkctl;
 
-	/* Disable user access to the timers and the virtual counter. */
+	/* Disable user access to the timers and the physical counter. */
 	asm volatile("mrs	%0, cntkctl_el1" : "=r" (cntkctl));
-	cntkctl &= ~((3 << 8) | (1 << 1));
+	cntkctl &= ~((3 << 8) | (1 << 0));
 
-	/* Enable user access to the physical counter and frequency. */
-	cntkctl |= 1;
+	/* Enable user access to the virtual counter and frequency. */
+	cntkctl |= (1 << 1);
 	asm volatile("msr	cntkctl_el1, %0" : : "r" (cntkctl));
 }
 
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index da2a13e..c8eedc6 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -107,3 +107,11 @@
  * Register aliases.
  */
 lr	.req	x30		// link register
+
+/*
+ * Vector entry
+ */
+	 .macro	ventry	label
+	.align	7
+	b	\label
+	.endm
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index aa3132a..3300cbd 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -70,13 +70,20 @@
  *		- size   - region size
  */
 extern void flush_cache_all(void);
-extern void flush_cache_mm(struct mm_struct *mm);
 extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
-extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn);
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
 extern void __flush_cache_user_range(unsigned long start, unsigned long end);
 
+static inline void flush_cache_mm(struct mm_struct *mm)
+{
+}
+
+static inline void flush_cache_page(struct vm_area_struct *vma,
+				    unsigned long user_addr, unsigned long pfn)
+{
+}
+
 /*
  * Copy user data from/to a page which is mapped into a different
  * processes address space.  Really, we want to allow our "user
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
new file mode 100644
index 0000000..bbec599
--- /dev/null
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -0,0 +1,64 @@
+/*
+ * FP/SIMD state saving and restoring macros
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+.macro fpsimd_save state, tmpnr
+	stp	q0, q1, [\state, #16 * 0]
+	stp	q2, q3, [\state, #16 * 2]
+	stp	q4, q5, [\state, #16 * 4]
+	stp	q6, q7, [\state, #16 * 6]
+	stp	q8, q9, [\state, #16 * 8]
+	stp	q10, q11, [\state, #16 * 10]
+	stp	q12, q13, [\state, #16 * 12]
+	stp	q14, q15, [\state, #16 * 14]
+	stp	q16, q17, [\state, #16 * 16]
+	stp	q18, q19, [\state, #16 * 18]
+	stp	q20, q21, [\state, #16 * 20]
+	stp	q22, q23, [\state, #16 * 22]
+	stp	q24, q25, [\state, #16 * 24]
+	stp	q26, q27, [\state, #16 * 26]
+	stp	q28, q29, [\state, #16 * 28]
+	stp	q30, q31, [\state, #16 * 30]!
+	mrs	x\tmpnr, fpsr
+	str	w\tmpnr, [\state, #16 * 2]
+	mrs	x\tmpnr, fpcr
+	str	w\tmpnr, [\state, #16 * 2 + 4]
+.endm
+
+.macro fpsimd_restore state, tmpnr
+	ldp	q0, q1, [\state, #16 * 0]
+	ldp	q2, q3, [\state, #16 * 2]
+	ldp	q4, q5, [\state, #16 * 4]
+	ldp	q6, q7, [\state, #16 * 6]
+	ldp	q8, q9, [\state, #16 * 8]
+	ldp	q10, q11, [\state, #16 * 10]
+	ldp	q12, q13, [\state, #16 * 12]
+	ldp	q14, q15, [\state, #16 * 14]
+	ldp	q16, q17, [\state, #16 * 16]
+	ldp	q18, q19, [\state, #16 * 18]
+	ldp	q20, q21, [\state, #16 * 20]
+	ldp	q22, q23, [\state, #16 * 22]
+	ldp	q24, q25, [\state, #16 * 24]
+	ldp	q26, q27, [\state, #16 * 26]
+	ldp	q28, q29, [\state, #16 * 28]
+	ldp	q30, q31, [\state, #16 * 30]!
+	ldr	w\tmpnr, [\state, #16 * 2]
+	msr	fpsr, x\tmpnr
+	ldr	w\tmpnr, [\state, #16 * 2 + 4]
+	msr	fpcr, x\tmpnr
+.endm
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 14aba2d..64b1339 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -159,6 +159,8 @@
 {
 	if (pte_present_exec_user(pte))
 		__sync_icache_dcache(pte, addr);
+	if (!pte_dirty(pte))
+		pte = pte_wrprotect(pte);
 	set_pte(ptep, pte);
 }
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 77f696c..ab239b2 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -128,11 +128,6 @@
 extern struct task_struct *cpu_switch_to(struct task_struct *prev,
 					 struct task_struct *next);
 
-/*
- * Create a new kernel thread
- */
-extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
 #define task_pt_regs(p) \
 	((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
 
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index b04d340..4ce845f 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -30,7 +30,17 @@
 #define COMPAT_PTRACE_SETVFPREGS	28
 #define COMPAT_PTRACE_GETHBPREGS	29
 #define COMPAT_PTRACE_SETHBPREGS	30
+
+/* AArch32 CPSR bits */
+#define COMPAT_PSR_MODE_MASK	0x0000001f
 #define COMPAT_PSR_MODE_USR	0x00000010
+#define COMPAT_PSR_MODE_FIQ	0x00000011
+#define COMPAT_PSR_MODE_IRQ	0x00000012
+#define COMPAT_PSR_MODE_SVC	0x00000013
+#define COMPAT_PSR_MODE_ABT	0x00000017
+#define COMPAT_PSR_MODE_HYP	0x0000001a
+#define COMPAT_PSR_MODE_UND	0x0000001b
+#define COMPAT_PSR_MODE_SYS	0x0000001f
 #define COMPAT_PSR_T_BIT	0x00000020
 #define COMPAT_PSR_IT_MASK	0x0600fc00	/* If-Then execution state mask */
 /*
@@ -44,10 +54,27 @@
 
 /* sizeof(struct user) for AArch32 */
 #define COMPAT_USER_SZ	296
-/* AArch32 uses x13 as the stack pointer... */
+
+/* Architecturally defined mapping between AArch32 and AArch64 registers */
+#define compat_usr(x)	regs[(x)]
 #define compat_sp	regs[13]
-/* ... and x14 as the link register. */
 #define compat_lr	regs[14]
+#define compat_sp_hyp	regs[15]
+#define compat_sp_irq	regs[16]
+#define compat_lr_irq	regs[17]
+#define compat_sp_svc	regs[18]
+#define compat_lr_svc	regs[19]
+#define compat_sp_abt	regs[20]
+#define compat_lr_abt	regs[21]
+#define compat_sp_und	regs[22]
+#define compat_lr_und	regs[23]
+#define compat_r8_fiq	regs[24]
+#define compat_r9_fiq	regs[25]
+#define compat_r10_fiq	regs[26]
+#define compat_r11_fiq	regs[27]
+#define compat_r12_fiq	regs[28]
+#define compat_sp_fiq	regs[29]
+#define compat_lr_fiq	regs[30]
 
 /*
  * This struct defines the way the registers are stored on the stack during an
diff --git a/arch/arm64/include/asm/syscalls.h b/arch/arm64/include/asm/syscalls.h
index 09ff335..a1b00cd 100644
--- a/arch/arm64/include/asm/syscalls.h
+++ b/arch/arm64/include/asm/syscalls.h
@@ -23,18 +23,16 @@
 /*
  * System call wrappers implemented in kernel/entry.S.
  */
-asmlinkage long sys_execve_wrapper(const char __user *filename,
-				   const char __user *const __user *argv,
-				   const char __user *const __user *envp);
-asmlinkage long sys_clone_wrapper(unsigned long clone_flags,
-				  unsigned long newsp,
-				  void __user *parent_tid,
-				  unsigned long tls_val,
-				  void __user *child_tid);
 asmlinkage long sys_rt_sigreturn_wrapper(void);
 asmlinkage long sys_sigaltstack_wrapper(const stack_t __user *uss,
 					stack_t __user *uoss);
 
+/*
+ * AArch64 sys_clone implementation has a different prototype than the generic
+ * one (additional TLS value argument).
+ */
+#define sys_clone	sys_clone
+
 #include <asm-generic/syscalls.h>
 
 #endif	/* __ASM_SYSCALLS_H */
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 68aff28..43064a8 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -25,4 +25,5 @@
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #endif
+#define __ARCH_WANT_SYS_EXECVE
 #include <uapi/asm/unistd.h>
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 656a6f2..50104e8 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -23,7 +23,7 @@
 
 __SYSCALL(0,   sys_restart_syscall)
 __SYSCALL(1,   sys_exit)
-__SYSCALL(2,   compat_sys_fork_wrapper)
+__SYSCALL(2,   compat_sys_fork)
 __SYSCALL(3,   sys_read)
 __SYSCALL(4,   sys_write)
 __SYSCALL(5,   compat_sys_open)
@@ -32,7 +32,7 @@
 __SYSCALL(8,   sys_creat)
 __SYSCALL(9,   sys_link)
 __SYSCALL(10,  sys_unlink)
-__SYSCALL(11,  compat_sys_execve_wrapper)
+__SYSCALL(11,  compat_sys_execve)
 __SYSCALL(12,  sys_chdir)
 __SYSCALL(13,  sys_ni_syscall)			/* 13 was sys_time */
 __SYSCALL(14,  sys_mknod)
@@ -141,7 +141,7 @@
 __SYSCALL(117, sys_ni_syscall)			/* 117 was sys_ipc */
 __SYSCALL(118, sys_fsync)
 __SYSCALL(119, compat_sys_sigreturn_wrapper)
-__SYSCALL(120, compat_sys_clone_wrapper)
+__SYSCALL(120, sys_clone)
 __SYSCALL(121, sys_setdomainname)
 __SYSCALL(122, sys_newuname)
 __SYSCALL(123, sys_ni_syscall)			/* 123 was sys_modify_ldt */
@@ -211,7 +211,7 @@
 __SYSCALL(187, compat_sys_sendfile)
 __SYSCALL(188, sys_ni_syscall)			/* 188 reserved */
 __SYSCALL(189, sys_ni_syscall)			/* 189 reserved */
-__SYSCALL(190, compat_sys_vfork_wrapper)
+__SYSCALL(190, compat_sys_vfork)
 __SYSCALL(191, compat_sys_getrlimit)		/* SuS compliant getrlimit */
 __SYSCALL(192, sys_mmap_pgoff)
 __SYSCALL(193, compat_sys_truncate64_wrapper)
@@ -393,7 +393,7 @@
 __SYSCALL(369, sys_prlimit64)
 __SYSCALL(370, sys_name_to_handle_at)
 __SYSCALL(371, compat_sys_open_by_handle_at)
-__SYSCALL(372, sys_clock_adjtime)
+__SYSCALL(372, compat_sys_clock_adjtime)
 __SYSCALL(373, sys_syncfs)
 
 #define __NR_compat_syscalls		374
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
new file mode 100644
index 0000000..4398272
--- /dev/null
+++ b/arch/arm64/include/asm/virt.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM__VIRT_H
+#define __ASM__VIRT_H
+
+#define BOOT_CPU_MODE_EL2	(0x0e12b007)
+
+#ifndef __ASSEMBLY__
+
+/*
+ * __boot_cpu_mode records what mode CPUs were booted in.
+ * A correctly-implemented bootloader must start all CPUs in the same mode:
+ * In this case, both 32bit halves of __boot_cpu_mode will contain the
+ * same value (either 0 if booted in EL1, BOOT_CPU_MODE_EL2 if booted in EL2).
+ *
+ * Should the bootloader fail to do this, the two values will be different.
+ * This allows the kernel to flag an error when the secondaries have come up.
+ */
+extern u32 __boot_cpu_mode[2];
+
+void __hyp_set_vectors(phys_addr_t phys_vector_base);
+phys_addr_t __hyp_get_vectors(void);
+
+/* Reports the availability of HYP mode */
+static inline bool is_hyp_mode_available(void)
+{
+	return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 &&
+		__boot_cpu_mode[1] == BOOT_CPU_MODE_EL2);
+}
+
+/* Check if the bootloader has booted CPUs in different modes */
+static inline bool is_hyp_mode_mismatched(void)
+{
+	return __boot_cpu_mode[0] != __boot_cpu_mode[1];
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* ! __ASM__VIRT_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index e2caff1..74239c3 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -8,7 +8,8 @@
 # Object file lists.
 arm64-obj-y		:= cputable.o debug-monitors.o entry.o irq.o fpsimd.o	\
 			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
-			   sys.o stacktrace.o time.o traps.o io.o vdso.o
+			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\
+			   hyp-stub.o
 
 arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
 					   sys_compat.o
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 17988a6..6a27cd6 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -20,6 +20,7 @@
 #include <linux/linkage.h>
 
 #include <asm/assembler.h>
+#include <asm/fpsimdmacros.h>
 
 /*
  * Save the FP registers.
@@ -27,26 +28,7 @@
  * x0 - pointer to struct fpsimd_state
  */
 ENTRY(fpsimd_save_state)
-	stp	q0, q1, [x0, #16 * 0]
-	stp	q2, q3, [x0, #16 * 2]
-	stp	q4, q5, [x0, #16 * 4]
-	stp	q6, q7, [x0, #16 * 6]
-	stp	q8, q9, [x0, #16 * 8]
-	stp	q10, q11, [x0, #16 * 10]
-	stp	q12, q13, [x0, #16 * 12]
-	stp	q14, q15, [x0, #16 * 14]
-	stp	q16, q17, [x0, #16 * 16]
-	stp	q18, q19, [x0, #16 * 18]
-	stp	q20, q21, [x0, #16 * 20]
-	stp	q22, q23, [x0, #16 * 22]
-	stp	q24, q25, [x0, #16 * 24]
-	stp	q26, q27, [x0, #16 * 26]
-	stp	q28, q29, [x0, #16 * 28]
-	stp	q30, q31, [x0, #16 * 30]!
-	mrs	x8, fpsr
-	str	w8, [x0, #16 * 2]
-	mrs	x8, fpcr
-	str	w8, [x0, #16 * 2 + 4]
+	fpsimd_save x0, 8
 	ret
 ENDPROC(fpsimd_save_state)
 
@@ -56,25 +38,6 @@
  * x0 - pointer to struct fpsimd_state
  */
 ENTRY(fpsimd_load_state)
-	ldp	q0, q1, [x0, #16 * 0]
-	ldp	q2, q3, [x0, #16 * 2]
-	ldp	q4, q5, [x0, #16 * 4]
-	ldp	q6, q7, [x0, #16 * 6]
-	ldp	q8, q9, [x0, #16 * 8]
-	ldp	q10, q11, [x0, #16 * 10]
-	ldp	q12, q13, [x0, #16 * 12]
-	ldp	q14, q15, [x0, #16 * 14]
-	ldp	q16, q17, [x0, #16 * 16]
-	ldp	q18, q19, [x0, #16 * 18]
-	ldp	q20, q21, [x0, #16 * 20]
-	ldp	q22, q23, [x0, #16 * 22]
-	ldp	q24, q25, [x0, #16 * 24]
-	ldp	q26, q27, [x0, #16 * 26]
-	ldp	q28, q29, [x0, #16 * 28]
-	ldp	q30, q31, [x0, #16 * 30]!
-	ldr	w8, [x0, #16 * 2]
-	ldr	w9, [x0, #16 * 2 + 4]
-	msr	fpsr, x8
-	msr	fpcr, x9
+	fpsimd_restore x0, 8
 	ret
 ENDPROC(fpsimd_load_state)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index a6f3f7d..9c94f40 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -148,10 +148,6 @@
 /*
  * Exception vectors.
  */
-	.macro	ventry	label
-	.align	7
-	b	\label
-	.endm
 
 	.align	11
 ENTRY(vectors)
@@ -594,7 +590,7 @@
 /*
  * "slow" syscall return path.
  */
-ENTRY(ret_to_user)
+ret_to_user:
 	disable_irq				// disable interrupts
 	ldr	x1, [tsk, #TI_FLAGS]
 	and	x2, x1, #_TIF_WORK_MASK
@@ -611,7 +607,10 @@
  */
 ENTRY(ret_from_fork)
 	bl	schedule_tail
-	get_thread_info tsk
+	cbz	x19, 1f				// not a kernel thread
+	mov	x0, x20
+	blr	x19
+1:	get_thread_info tsk
 	b	ret_to_user
 ENDPROC(ret_from_fork)
 
@@ -673,16 +672,6 @@
 /*
  * Special system call wrappers.
  */
-ENTRY(sys_execve_wrapper)
-	mov	x3, sp
-	b	sys_execve
-ENDPROC(sys_execve_wrapper)
-
-ENTRY(sys_clone_wrapper)
-	mov	x5, sp
-	b	sys_clone
-ENDPROC(sys_clone_wrapper)
-
 ENTRY(sys_rt_sigreturn_wrapper)
 	mov	x0, sp
 	b	sys_rt_sigreturn
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index a2f02b6..368ad1f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -31,6 +31,7 @@
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
+#include <asm/virt.h>
 
 /*
  * swapper_pg_dir is the virtual address of the initial page table. We place
@@ -115,13 +116,13 @@
 
 ENTRY(stext)
 	mov	x21, x0				// x21=FDT
+	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
 	bl	el2_setup			// Drop to EL1
 	mrs	x22, midr_el1			// x22=cpuid
 	mov	x0, x22
 	bl	lookup_processor_type
 	mov	x23, x0				// x23=current cpu_table
 	cbz	x23, __error_p			// invalid processor (x23=0)?
-	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
 	bl	__vet_fdt
 	bl	__create_page_tables		// x25=TTBR0, x26=TTBR1
 	/*
@@ -147,17 +148,23 @@
 	mrs	x0, CurrentEL
 	cmp	x0, #PSR_MODE_EL2t
 	ccmp	x0, #PSR_MODE_EL2h, #0x4, ne
+	ldr	x0, =__boot_cpu_mode		// Compute __boot_cpu_mode
+	add	x0, x0, x28
 	b.eq	1f
+	str	wzr, [x0]			// Remember we don't have EL2...
 	ret
 
 	/* Hyp configuration. */
-1:	mov	x0, #(1 << 31)			// 64-bit EL1
+1:	ldr	w1, =BOOT_CPU_MODE_EL2
+	str	w1, [x0, #4]			// This CPU has EL2
+	mov	x0, #(1 << 31)			// 64-bit EL1
 	msr	hcr_el2, x0
 
 	/* Generic timers. */
 	mrs	x0, cnthctl_el2
 	orr	x0, x0, #3			// Enable EL1 physical timers
 	msr	cnthctl_el2, x0
+	msr	cntvoff_el2, xzr		// Clear virtual offset
 
 	/* Populate ID registers. */
 	mrs	x0, midr_el1
@@ -178,6 +185,13 @@
 	msr	hstr_el2, xzr			// Disable CP15 traps to EL2
 #endif
 
+	/* Stage-2 translation */
+	msr	vttbr_el2, xzr
+
+	/* Hypervisor stub */
+	adr	x0, __hyp_stub_vectors
+	msr	vbar_el2, x0
+
 	/* spsr */
 	mov	x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
 		      PSR_MODE_EL1h)
@@ -186,6 +200,19 @@
 	eret
 ENDPROC(el2_setup)
 
+/*
+ * We need to find out the CPU boot mode long after boot, so we need to
+ * store it in a writable variable.
+ *
+ * This is not in .bss, because we set it sufficiently early that the boot-time
+ * zeroing of .bss would clobber it.
+ */
+	.pushsection	.data
+ENTRY(__boot_cpu_mode)
+	.long	BOOT_CPU_MODE_EL2
+	.long	0
+	.popsection
+
 	.align	3
 2:	.quad	.
 	.quad	PAGE_OFFSET
@@ -201,6 +228,7 @@
 	 * cores are held until we're ready for them to initialise.
 	 */
 ENTRY(secondary_holding_pen)
+	bl	__calc_phys_offset		// x24=phys offset
 	bl	el2_setup			// Drop to EL1
 	mrs	x0, mpidr_el1
 	and	x0, x0, #15			// CPU number
@@ -226,7 +254,6 @@
 	mov	x23, x0				// x23=current cpu_table
 	cbz	x23, __error_p			// invalid processor (x23=0)?
 
-	bl	__calc_phys_offset		// x24=phys offset
 	pgtbl	x25, x26, x24			// x25=TTBR0, x26=TTBR1
 	ldr	x12, [x23, #CPU_INFO_SETUP]
 	add	x12, x12, x28			// __virt_to_phys
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
new file mode 100644
index 0000000..0959611
--- /dev/null
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -0,0 +1,109 @@
+/*
+ * Hypervisor stub
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author:	Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/ptrace.h>
+#include <asm/virt.h>
+
+	.text
+	.align 11
+
+ENTRY(__hyp_stub_vectors)
+	ventry	el2_sync_invalid		// Synchronous EL2t
+	ventry	el2_irq_invalid			// IRQ EL2t
+	ventry	el2_fiq_invalid			// FIQ EL2t
+	ventry	el2_error_invalid		// Error EL2t
+
+	ventry	el2_sync_invalid		// Synchronous EL2h
+	ventry	el2_irq_invalid			// IRQ EL2h
+	ventry	el2_fiq_invalid			// FIQ EL2h
+	ventry	el2_error_invalid		// Error EL2h
+
+	ventry	el1_sync			// Synchronous 64-bit EL1
+	ventry	el1_irq_invalid			// IRQ 64-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
+	ventry	el1_error_invalid		// Error 64-bit EL1
+
+	ventry	el1_sync_invalid		// Synchronous 32-bit EL1
+	ventry	el1_irq_invalid			// IRQ 32-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
+	ventry	el1_error_invalid		// Error 32-bit EL1
+ENDPROC(__hyp_stub_vectors)
+
+	.align 11
+
+el1_sync:
+	mrs	x1, esr_el2
+	lsr	x1, x1, #26
+	cmp	x1, #0x16
+	b.ne	2f				// Not an HVC trap
+	cbz	x0, 1f
+	msr	vbar_el2, x0			// Set vbar_el2
+	b	2f
+1:	mrs	x0, vbar_el2			// Return vbar_el2
+2:	eret
+ENDPROC(el1_sync)
+
+.macro invalid_vector	label
+\label:
+	b \label
+ENDPROC(\label)
+.endm
+
+	invalid_vector	el2_sync_invalid
+	invalid_vector	el2_irq_invalid
+	invalid_vector	el2_fiq_invalid
+	invalid_vector	el2_error_invalid
+	invalid_vector	el1_sync_invalid
+	invalid_vector	el1_irq_invalid
+	invalid_vector	el1_fiq_invalid
+	invalid_vector	el1_error_invalid
+
+/*
+ * __hyp_set_vectors: Call this after boot to set the initial hypervisor
+ * vectors as part of hypervisor installation.  On an SMP system, this should
+ * be called on each CPU.
+ *
+ * x0 must be the physical address of the new vector table, and must be
+ * 2KB aligned.
+ *
+ * Before calling this, you must check that the stub hypervisor is installed
+ * everywhere, by waiting for any secondary CPUs to be brought up and then
+ * checking that is_hyp_mode_available() is true.
+ *
+ * If not, there is a pre-existing hypervisor, some CPUs failed to boot, or
+ * something else went wrong... in such cases, trying to install a new
+ * hypervisor is unlikely to work as desired.
+ *
+ * When you call into your shiny new hypervisor, sp_el2 will contain junk,
+ * so you will need to set that to something sensible at the new hypervisor's
+ * initialisation entry point.
+ */
+
+ENTRY(__hyp_get_vectors)
+	mov	x0, xzr
+	// fall through
+ENTRY(__hyp_set_vectors)
+	hvc	#0
+	ret
+ENDPROC(__hyp_get_vectors)
+ENDPROC(__hyp_set_vectors)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index e04cebd..8a5f334 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -240,27 +240,41 @@
 	struct pt_regs *childregs = task_pt_regs(p);
 	unsigned long tls = p->thread.tp_value;
 
-	*childregs = *regs;
-	childregs->regs[0] = 0;
-
-	if (is_compat_thread(task_thread_info(p)))
-		childregs->compat_sp = stack_start;
-	else {
-		/*
-		 * Read the current TLS pointer from tpidr_el0 as it may be
-		 * out-of-sync with the saved value.
-		 */
-		asm("mrs %0, tpidr_el0" : "=r" (tls));
-		childregs->sp = stack_start;
-	}
-
 	memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
-	p->thread.cpu_context.sp = (unsigned long)childregs;
-	p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
 
-	/* If a TLS pointer was passed to clone, use that for the new thread. */
-	if (clone_flags & CLONE_SETTLS)
-		tls = regs->regs[3];
+	if (likely(regs)) {
+		*childregs = *regs;
+		childregs->regs[0] = 0;
+		if (is_compat_thread(task_thread_info(p))) {
+			if (stack_start)
+				childregs->compat_sp = stack_start;
+		} else {
+			/*
+			 * Read the current TLS pointer from tpidr_el0 as it may be
+			 * out-of-sync with the saved value.
+			 */
+			asm("mrs %0, tpidr_el0" : "=r" (tls));
+			if (stack_start) {
+				/* 16-byte aligned stack mandatory on AArch64 */
+				if (stack_start & 15)
+					return -EINVAL;
+				childregs->sp = stack_start;
+			}
+		}
+		/*
+		 * If a TLS pointer was passed to clone (4th argument), use it
+		 * for the new thread.
+		 */
+		if (clone_flags & CLONE_SETTLS)
+			tls = regs->regs[3];
+	} else {
+		memset(childregs, 0, sizeof(struct pt_regs));
+		childregs->pstate = PSR_MODE_EL1h;
+		p->thread.cpu_context.x19 = stack_start;
+		p->thread.cpu_context.x20 = stk_sz;
+	}
+	p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
+	p->thread.cpu_context.sp = (unsigned long)childregs;
 	p->thread.tp_value = tls;
 
 	ptrace_hw_copy_thread(p);
@@ -309,43 +323,6 @@
 	return last;
 }
 
-/*
- * Shuffle the argument into the correct register before calling the
- * thread function.  x1 is the thread argument, x2 is the pointer to
- * the thread function, and x3 points to the exit function.
- */
-extern void kernel_thread_helper(void);
-asm(	".section .text\n"
-"	.align\n"
-"	.type	kernel_thread_helper, #function\n"
-"kernel_thread_helper:\n"
-"	mov	x0, x1\n"
-"	mov	x30, x3\n"
-"	br	x2\n"
-"	.size	kernel_thread_helper, . - kernel_thread_helper\n"
-"	.previous");
-
-#define kernel_thread_exit	do_exit
-
-/*
- * Create a kernel thread.
- */
-pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-
-	regs.regs[1] = (unsigned long)arg;
-	regs.regs[2] = (unsigned long)fn;
-	regs.regs[3] = (unsigned long)kernel_thread_exit;
-	regs.pc = (unsigned long)kernel_thread_helper;
-	regs.pstate = PSR_MODE_EL1h;
-
-	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
 unsigned long get_wchan(struct task_struct *p)
 {
 	struct stackframe frame;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 8807ba2..abd7563 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -41,6 +41,8 @@
 struct rt_sigframe {
 	struct siginfo info;
 	struct ucontext uc;
+	u64 fp;
+	u64 lr;
 };
 
 static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
@@ -175,6 +177,10 @@
 	struct aux_context __user *aux =
 		(struct aux_context __user *)sf->uc.uc_mcontext.__reserved;
 
+	/* set up the stack frame for unwinding */
+	__put_user_error(regs->regs[29], &sf->fp, err);
+	__put_user_error(regs->regs[30], &sf->lr, err);
+
 	for (i = 0; i < 31; i++)
 		__put_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i],
 				 err);
@@ -196,11 +202,11 @@
 	return err;
 }
 
-static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
-				 int framesize)
+static struct rt_sigframe __user *get_sigframe(struct k_sigaction *ka,
+					       struct pt_regs *regs)
 {
 	unsigned long sp, sp_top;
-	void __user *frame;
+	struct rt_sigframe __user *frame;
 
 	sp = sp_top = regs->sp;
 
@@ -210,11 +216,8 @@
 	if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp))
 		sp = sp_top = current->sas_ss_sp + current->sas_ss_size;
 
-	/* room for stack frame (FP, LR) */
-	sp -= 16;
-
-	sp = (sp - framesize) & ~15;
-	frame = (void __user *)sp;
+	sp = (sp - sizeof(struct rt_sigframe)) & ~15;
+	frame = (struct rt_sigframe __user *)sp;
 
 	/*
 	 * Check that we can actually write to the signal frame.
@@ -225,20 +228,14 @@
 	return frame;
 }
 
-static int setup_return(struct pt_regs *regs, struct k_sigaction *ka,
-			void __user *frame, int usig)
+static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
+			 void __user *frame, int usig)
 {
-	int err = 0;
 	__sigrestore_t sigtramp;
-	unsigned long __user *sp = (unsigned long __user *)regs->sp;
-
-	/* set up the stack frame */
-	__put_user_error(regs->regs[29], sp - 2, err);
-	__put_user_error(regs->regs[30], sp - 1, err);
 
 	regs->regs[0] = usig;
-	regs->regs[29] = regs->sp - 16;
 	regs->sp = (unsigned long)frame;
+	regs->regs[29] = regs->sp + offsetof(struct rt_sigframe, fp);
 	regs->pc = (unsigned long)ka->sa.sa_handler;
 
 	if (ka->sa.sa_flags & SA_RESTORER)
@@ -247,8 +244,6 @@
 		sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp);
 
 	regs->regs[30] = (unsigned long)sigtramp;
-
-	return err;
 }
 
 static int setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info,
@@ -258,7 +253,7 @@
 	stack_t stack;
 	int err = 0;
 
-	frame = get_sigframe(ka, regs, sizeof(*frame));
+	frame = get_sigframe(ka, regs);
 	if (!frame)
 		return 1;
 
@@ -272,13 +267,13 @@
 	err |= __copy_to_user(&frame->uc.uc_stack, &stack, sizeof(stack));
 
 	err |= setup_sigframe(frame, regs, set);
-	if (err == 0)
-		err = setup_return(regs, ka, frame, usig);
-
-	if (err == 0 && ka->sa.sa_flags & SA_SIGINFO) {
-		err |= copy_siginfo_to_user(&frame->info, info);
-		regs->regs[1] = (unsigned long)&frame->info;
-		regs->regs[2] = (unsigned long)&frame->uc;
+	if (err == 0) {
+		setup_return(regs, ka, frame, usig);
+		if (ka->sa.sa_flags & SA_SIGINFO) {
+			err |= copy_siginfo_to_user(&frame->info, info);
+			regs->regs[1] = (unsigned long)&frame->info;
+			regs->regs[2] = (unsigned long)&frame->uc;
+		}
 	}
 
 	return err;
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 4654824..a4db3d2 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -578,9 +578,9 @@
 	return 0;
 }
 
-static inline void __user *compat_get_sigframe(struct k_sigaction *ka,
-					       struct pt_regs *regs,
-					       int framesize)
+static void __user *compat_get_sigframe(struct k_sigaction *ka,
+					struct pt_regs *regs,
+					int framesize)
 {
 	compat_ulong_t sp = regs->compat_sp;
 	void __user *frame;
@@ -605,9 +605,9 @@
 	return frame;
 }
 
-static int compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,
-			       compat_ulong_t __user *rc, void __user *frame,
-			       int usig)
+static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,
+				compat_ulong_t __user *rc, void __user *frame,
+				int usig)
 {
 	compat_ulong_t handler = ptr_to_compat(ka->sa.sa_handler);
 	compat_ulong_t retcode;
@@ -643,8 +643,6 @@
 	regs->compat_lr	= retcode;
 	regs->pc	= handler;
 	regs->pstate	= spsr;
-
-	return 0;
 }
 
 static int compat_setup_sigframe(struct compat_sigframe __user *sf,
@@ -714,11 +712,9 @@
 	err |= __copy_to_user(&frame->sig.uc.uc_stack, &stack, sizeof(stack));
 
 	err |= compat_setup_sigframe(&frame->sig, regs, set);
-	if (err == 0)
-		err = compat_setup_return(regs, ka, frame->sig.retcode, frame,
-					  usig);
 
 	if (err == 0) {
+		compat_setup_return(regs, ka, frame->sig.retcode, frame, usig);
 		regs->regs[1] = (compat_ulong_t)(unsigned long)&frame->info;
 		regs->regs[2] = (compat_ulong_t)(unsigned long)&frame->sig.uc;
 	}
@@ -741,7 +737,7 @@
 
 	err |= compat_setup_sigframe(frame, regs, set);
 	if (err == 0)
-		err = compat_setup_return(regs, ka, frame->retcode, frame, usig);
+		compat_setup_return(regs, ka, frame->retcode, frame, usig);
 
 	return err;
 }
diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c
index b120df3..4364df8 100644
--- a/arch/arm64/kernel/sys.c
+++ b/arch/arm64/kernel/sys.c
@@ -31,80 +31,12 @@
  */
 asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp,
 			  int __user *parent_tidptr, unsigned long tls_val,
-			  int __user *child_tidptr, struct pt_regs *regs)
+			  int __user *child_tidptr)
 {
-	if (!newsp)
-		newsp = regs->sp;
-	/* 16-byte aligned stack mandatory on AArch64 */
-	if (newsp & 15)
-		return -EINVAL;
-	return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
+	return do_fork(clone_flags, newsp, current_pt_regs(), 0,
+			parent_tidptr, child_tidptr);
 }
 
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage long sys_execve(const char __user *filenamei,
-			   const char __user *const __user *argv,
-			   const char __user *const __user *envp,
-			   struct pt_regs *regs)
-{
-	long error;
-	struct filename *filename;
-
-	filename = getname(filenamei);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = do_execve(filename->name, argv, envp, regs);
-	putname(filename);
-out:
-	return error;
-}
-
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	struct pt_regs regs;
-	int ret;
-
-	memset(&regs, 0, sizeof(struct pt_regs));
-	ret = do_execve(filename,
-			(const char __user *const __user *)argv,
-			(const char __user *const __user *)envp, &regs);
-	if (ret < 0)
-		goto out;
-
-	/*
-	 * Save argc to the register structure for userspace.
-	 */
-	regs.regs[0] = ret;
-
-	/*
-	 * We were successful.  We won't be returning to our caller, but
-	 * instead to user space by manipulating the kernel stack.
-	 */
-	asm(	"add	x0, %0, %1\n\t"
-		"mov	x1, %2\n\t"
-		"mov	x2, %3\n\t"
-		"bl	memmove\n\t"	/* copy regs to top of stack */
-		"mov	x27, #0\n\t"	/* not a syscall */
-		"mov	x28, %0\n\t"	/* thread structure */
-		"mov	sp, x0\n\t"	/* reposition stack pointer */
-		"b	ret_to_user"
-		:
-		: "r" (current_thread_info()),
-		  "Ir" (THREAD_START_SP - sizeof(regs)),
-		  "r" (&regs),
-		  "Ir" (sizeof(regs))
-		: "x0", "x1", "x2", "x27", "x28", "x30", "memory");
-
- out:
-	return ret;
-}
-EXPORT_SYMBOL(kernel_execve);
-
 asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
 			 unsigned long prot, unsigned long flags,
 			 unsigned long fd, off_t off)
@@ -118,8 +50,6 @@
 /*
  * Wrappers to pass the pt_regs argument.
  */
-#define sys_execve		sys_execve_wrapper
-#define sys_clone		sys_clone_wrapper
 #define sys_rt_sigreturn	sys_rt_sigreturn_wrapper
 #define sys_sigaltstack		sys_sigaltstack_wrapper
 
diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/sys32.S
index 54c4aec..7ef59e9 100644
--- a/arch/arm64/kernel/sys32.S
+++ b/arch/arm64/kernel/sys32.S
@@ -26,25 +26,6 @@
 /*
  * System call wrappers for the AArch32 compatibility layer.
  */
-compat_sys_fork_wrapper:
-	mov	x0, sp
-	b	compat_sys_fork
-ENDPROC(compat_sys_fork_wrapper)
-
-compat_sys_vfork_wrapper:
-	mov	x0, sp
-	b	compat_sys_vfork
-ENDPROC(compat_sys_vfork_wrapper)
-
-compat_sys_execve_wrapper:
-	mov	x3, sp
-	b	compat_sys_execve
-ENDPROC(compat_sys_execve_wrapper)
-
-compat_sys_clone_wrapper:
-	mov	x5, sp
-	b	compat_sys_clone
-ENDPROC(compat_sys_clone_wrapper)
 
 compat_sys_sigreturn_wrapper:
 	mov	x0, sp
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 906e3bd..6fabc19 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -28,43 +28,15 @@
 #include <asm/cacheflush.h>
 #include <asm/unistd32.h>
 
-asmlinkage int compat_sys_fork(struct pt_regs *regs)
+asmlinkage int compat_sys_fork(void)
 {
-	return do_fork(SIGCHLD, regs->compat_sp, regs, 0, NULL, NULL);
+	return do_fork(SIGCHLD, 0, current_pt_regs(), 0, NULL, NULL);
 }
 
-asmlinkage int compat_sys_clone(unsigned long clone_flags, unsigned long newsp,
-			  int __user *parent_tidptr, int tls_val,
-			  int __user *child_tidptr, struct pt_regs *regs)
+asmlinkage int compat_sys_vfork(void)
 {
-	if (!newsp)
-		newsp = regs->compat_sp;
-
-	return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
-}
-
-asmlinkage int compat_sys_vfork(struct pt_regs *regs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->compat_sp,
-		       regs, 0, NULL, NULL);
-}
-
-asmlinkage int compat_sys_execve(const char __user *filenamei,
-				 compat_uptr_t argv, compat_uptr_t envp,
-				 struct pt_regs *regs)
-{
-	int error;
-	struct filename *filename;
-
-	filename = getname(filenamei);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = compat_do_execve(filename->name, compat_ptr(argv),
-					compat_ptr(envp), regs);
-	putname(filename);
-out:
-	return error;
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
+		       current_pt_regs(), 0, NULL, NULL);
 }
 
 asmlinkage int compat_sys_sched_rr_get_interval(compat_pid_t pid,
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index ba45794..c958cb8 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -239,7 +239,7 @@
 	if (!use_syscall) {
 		vdso_data->cs_cycle_last	= tk->clock->cycle_last;
 		vdso_data->xtime_clock_sec	= tk->xtime_sec;
-		vdso_data->xtime_clock_nsec	= tk->xtime_nsec >> tk->shift;
+		vdso_data->xtime_clock_nsec	= tk->xtime_nsec;
 		vdso_data->cs_mult		= tk->mult;
 		vdso_data->cs_shift		= tk->shift;
 		vdso_data->wtm_clock_sec	= tk->wall_to_monotonic.tv_sec;
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index dcb8c20..8bf658d 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -62,18 +62,19 @@
 	/* If tv is NULL, skip to the timezone code. */
 	cbz	x0, 2f
 	bl	__do_get_tspec
-	seqcnt_check w13, 1b
+	seqcnt_check w9, 1b
 
 	/* Convert ns to us. */
-	mov	x11, #1000
-	udiv	x10, x10, x11
-	stp	x9, x10, [x0, #TVAL_TV_SEC]
+	mov	x13, #1000
+	lsl	x13, x13, x12
+	udiv	x11, x11, x13
+	stp	x10, x11, [x0, #TVAL_TV_SEC]
 2:
 	/* If tz is NULL, return 0. */
 	cbz	x1, 3f
 	ldp	w4, w5, [vdso_data, #VDSO_TZ_MINWEST]
-	seqcnt_read w13
-	seqcnt_check w13, 1b
+	seqcnt_read w9
+	seqcnt_check w9, 1b
 	stp	w4, w5, [x1, #TZ_MINWEST]
 3:
 	mov	x0, xzr
@@ -102,17 +103,17 @@
 	cbnz	use_syscall, 7f
 
 	bl	__do_get_tspec
-	seqcnt_check w13, 1b
+	seqcnt_check w9, 1b
 
 	cmp	w0, #CLOCK_MONOTONIC
 	b.ne	6f
 
 	/* Get wtm timespec. */
-	ldp	x14, x15, [vdso_data, #VDSO_WTM_CLK_SEC]
+	ldp	x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
 
 	/* Check the sequence counter. */
-	seqcnt_read w13
-	seqcnt_check w13, 1b
+	seqcnt_read w9
+	seqcnt_check w9, 1b
 	b	4f
 2:
 	cmp	w0, #CLOCK_REALTIME_COARSE
@@ -122,37 +123,40 @@
 	/* Get coarse timespec. */
 	adr	vdso_data, _vdso_data
 3:	seqcnt_acquire
-	ldp	x9, x10, [vdso_data, #VDSO_XTIME_CRS_SEC]
+	ldp	x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
+
+	/* Get wtm timespec. */
+	ldp	x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
+
+	/* Check the sequence counter. */
+	seqcnt_read w9
+	seqcnt_check w9, 3b
 
 	cmp	w0, #CLOCK_MONOTONIC_COARSE
 	b.ne	6f
-
-	/* Get wtm timespec. */
-	ldp	x14, x15, [vdso_data, #VDSO_WTM_CLK_SEC]
-
-	/* Check the sequence counter. */
-	seqcnt_read w13
-	seqcnt_check w13, 3b
 4:
 	/* Add on wtm timespec. */
-	add	x9, x9, x14
-	add	x10, x10, x15
+	add	x10, x10, x13
+	lsl	x14, x14, x12
+	add	x11, x11, x14
 
 	/* Normalise the new timespec. */
-	mov	x14, #NSEC_PER_SEC_LO16
-	movk	x14, #NSEC_PER_SEC_HI16, lsl #16
-	cmp	x10, x14
+	mov	x15, #NSEC_PER_SEC_LO16
+	movk	x15, #NSEC_PER_SEC_HI16, lsl #16
+	lsl	x15, x15, x12
+	cmp	x11, x15
 	b.lt	5f
-	sub	x10, x10, x14
-	add	x9, x9, #1
+	sub	x11, x11, x15
+	add	x10, x10, #1
 5:
-	cmp	x10, #0
+	cmp	x11, #0
 	b.ge	6f
-	add	x10, x10, x14
-	sub	x9, x9, #1
+	add	x11, x11, x15
+	sub	x10, x10, #1
 
 6:	/* Store to the user timespec. */
-	stp	x9, x10, [x1, #TSPEC_TV_SEC]
+	lsr	x11, x11, x12
+	stp	x10, x11, [x1, #TSPEC_TV_SEC]
 	mov	x0, xzr
 	ret	x2
 7:
@@ -203,39 +207,39 @@
  * Expects vdso_data to be initialised.
  * Clobbers the temporary registers (x9 - x15).
  * Returns:
- *  - (x9, x10) = (ts->tv_sec, ts->tv_nsec)
- *  - (x11, x12) = (xtime->tv_sec, xtime->tv_nsec)
- *  - w13 = vDSO sequence counter
+ *  - w9		= vDSO sequence counter
+ *  - (x10, x11)	= (ts->tv_sec, shifted ts->tv_nsec)
+ *  - w12		= cs_shift
  */
 ENTRY(__do_get_tspec)
 	.cfi_startproc
 
 	/* Read from the vDSO data page. */
 	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
-	ldp	x11, x12, [vdso_data, #VDSO_XTIME_CLK_SEC]
-	ldp	w14, w15, [vdso_data, #VDSO_CS_MULT]
-	seqcnt_read w13
+	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
+	ldp	w11, w12, [vdso_data, #VDSO_CS_MULT]
+	seqcnt_read w9
 
-	/* Read the physical counter. */
+	/* Read the virtual counter. */
 	isb
-	mrs	x9, cntpct_el0
+	mrs	x15, cntvct_el0
 
 	/* Calculate cycle delta and convert to ns. */
-	sub	x10, x9, x10
+	sub	x10, x15, x10
 	/* We can only guarantee 56 bits of precision. */
-	movn	x9, #0xff0, lsl #48
-	and	x10, x9, x10
-	mul	x10, x10, x14
-	lsr	x10, x10, x15
+	movn	x15, #0xff00, lsl #48
+	and	x10, x15, x10
+	mul	x10, x10, x11
 
 	/* Use the kernel time to calculate the new timespec. */
-	add	x10, x12, x10
-	mov	x14, #NSEC_PER_SEC_LO16
-	movk	x14, #NSEC_PER_SEC_HI16, lsl #16
-	udiv	x15, x10, x14
-	add	x9, x15, x11
-	mul	x14, x14, x15
-	sub	x10, x10, x14
+	mov	x11, #NSEC_PER_SEC_LO16
+	movk	x11, #NSEC_PER_SEC_HI16, lsl #16
+	lsl	x11, x11, x12
+	add	x15, x10, x14
+	udiv	x14, x15, x11
+	add	x10, x13, x14
+	mul	x13, x14, x11
+	sub	x11, x15, x13
 
 	ret
 	.cfi_endproc
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 1909a69..afadae6 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -36,6 +36,8 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
+static const char *fault_name(unsigned int esr);
+
 /*
  * Dump out the page tables associated with 'addr' in mm 'mm'.
  */
@@ -112,8 +114,9 @@
 	struct siginfo si;
 
 	if (show_unhandled_signals) {
-		pr_info("%s[%d]: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
-			tsk->comm, task_pid_nr(tsk), sig, addr, esr);
+		pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
+			tsk->comm, task_pid_nr(tsk), fault_name(esr), sig,
+			addr, esr);
 		show_pte(tsk->mm, addr);
 		show_regs(regs);
 	}
@@ -450,6 +453,12 @@
 	{ do_bad,		SIGBUS,  0,		"unknown 63"			},
 };
 
+static const char *fault_name(unsigned int esr)
+{
+	const struct fault_info *inf = fault_info + (esr & 63);
+	return inf->name;
+}
+
 /*
  * Dispatch a data abort to the relevant handler.
  */
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index c144adb..88611c3 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -27,10 +27,6 @@
 
 #include "mm.h"
 
-void flush_cache_mm(struct mm_struct *mm)
-{
-}
-
 void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 		       unsigned long end)
 {
@@ -38,11 +34,6 @@
 		__flush_icache_all();
 }
 
-void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr,
-		      unsigned long pfn)
-{
-}
-
 static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
 				unsigned long uaddr, void *kaddr,
 				unsigned long len)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 4cd2893..800aac3 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -79,8 +79,8 @@
 
 #ifdef CONFIG_ZONE_DMA32
 	/* 4GB maximum for 32-bit only capable devices */
-	max_dma32 = min(max, MAX_DMA32_PFN);
-	zone_size[ZONE_DMA32] = max(min, max_dma32) - min;
+	max_dma32 = max(min, min(max, MAX_DMA32_PFN));
+	zone_size[ZONE_DMA32] = max_dma32 - min;
 #endif
 	zone_size[ZONE_NORMAL] = max - max_dma32;
 
diff --git a/drivers/clocksource/arm_generic.c b/drivers/clocksource/arm_generic.c
index c210f4f..8ae1a61 100644
--- a/drivers/clocksource/arm_generic.c
+++ b/drivers/clocksource/arm_generic.c
@@ -109,7 +109,7 @@
 
 	enable_percpu_irq(clk->irq, 0);
 
-	/* Ensure the physical counter is visible to userspace for the vDSO. */
+	/* Ensure the virtual counter is visible to userspace for the vDSO. */
 	arch_counter_enable_user_access();
 }