[PATCH] x86_64: avoid some atomic operations during address space destruction
Any architecture that has hardware updated A/D bits that require
synchronization against other processors during PTE operations can benefit
from doing non-atomic PTE updates during address space destruction.
Originally done on i386, now ported to x86_64.
Doing a read/write pair instead of an xchg() operation saves the implicit
lock, which turns out to be a big win on 32-bit (esp w PAE).
Signed-off-by: Zachary Amsden <zach@vmware.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index a1ada85..5e0f2fd 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -104,6 +104,19 @@
((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte, 0))
+
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
+{
+ pte_t pte;
+ if (full) {
+ pte = *ptep;
+ *ptep = __pte(0);
+ } else {
+ pte = ptep_get_and_clear(mm, addr, ptep);
+ }
+ return pte;
+}
+
#define pte_same(a, b) ((a).pte == (b).pte)
#define PMD_SIZE (1UL << PMD_SHIFT)
@@ -434,6 +447,7 @@
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
#define __HAVE_ARCH_PTE_SAME
#include <asm-generic/pgtable.h>