kmemcheck: add mm functions

With kmemcheck enabled, the slab allocator needs to do this:

1. Tell kmemcheck to allocate the shadow memory which stores the status of
   each byte in the allocation proper, e.g. whether it is initialized or
   uninitialized.
2. Tell kmemcheck which parts of memory that should be marked uninitialized.
   There are actually a few more states, such as "not yet allocated" and
   "recently freed".

If a slab cache is set up using the SLAB_NOTRACK flag, it will never return
memory that can take page faults because of kmemcheck.

If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still
request memory with the __GFP_NOTRACK flag. This does not prevent the page
faults from occuring, however, but marks the object in question as being
initialized so that no warnings will ever be produced for this object.

In addition to (and in contrast to) __GFP_NOTRACK, the
__GFP_NOTRACK_FALSE_POSITIVE flag indicates that the allocation should
not be tracked _because_ it would produce a false positive. Their values
are identical, but need not be so in the future (for example, we could now
enable/disable false positives with a config option).

Parts of this patch were contributed by Pekka Enberg but merged for
atomicity.

Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

[rebased for mainline inclusion]
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3bb2be1..994dd6a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -63,7 +63,7 @@
         task_xstate_cachep =
         	kmem_cache_create("task_xstate", xstate_size,
 				  __alignof__(union thread_xstate),
-				  SLAB_PANIC, NULL);
+				  SLAB_PANIC | SLAB_NOTRACK, NULL);
 }
 
 /*
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0bbc15f..daeaa8f 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -51,8 +51,15 @@
 #define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
 #define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
 #define __GFP_MOVABLE	((__force gfp_t)0x100000u)  /* Page is movable */
+#define __GFP_NOTRACK	((__force gfp_t)0x200000u)  /* Don't track with kmemcheck */
 
-#define __GFP_BITS_SHIFT 21	/* Room for 21 __GFP_FOO bits */
+/*
+ * This may seem redundant, but it's a way of annotating false positives vs.
+ * allocations that simply cannot be supported (e.g. page tables).
+ */
+#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
+
+#define __GFP_BITS_SHIFT 22	/* Room for 22 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /* This equals 0, but use constants in case they ever change */
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 39480c9..5b65f4e 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -7,11 +7,58 @@
 #ifdef CONFIG_KMEMCHECK
 extern int kmemcheck_enabled;
 
+/* The slab-related functions. */
+void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node,
+			    struct page *page, int order);
+void kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order);
+void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
+			  size_t size);
+void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size);
+
+void kmemcheck_show_pages(struct page *p, unsigned int n);
+void kmemcheck_hide_pages(struct page *p, unsigned int n);
+
+bool kmemcheck_page_is_tracked(struct page *p);
+
+void kmemcheck_mark_unallocated(void *address, unsigned int n);
+void kmemcheck_mark_uninitialized(void *address, unsigned int n);
+void kmemcheck_mark_initialized(void *address, unsigned int n);
+void kmemcheck_mark_freed(void *address, unsigned int n);
+
+void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n);
+void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n);
+
 int kmemcheck_show_addr(unsigned long address);
 int kmemcheck_hide_addr(unsigned long address);
 #else
 #define kmemcheck_enabled 0
 
+static inline void
+kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node,
+		       struct page *page, int order)
+{
+}
+
+static inline void
+kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order)
+{
+}
+
+static inline void
+kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
+		     size_t size)
+{
+}
+
+static inline void kmemcheck_slab_free(struct kmem_cache *s, void *object,
+				       size_t size)
+{
+}
+
+static inline bool kmemcheck_page_is_tracked(struct page *p)
+{
+	return false;
+}
 #endif /* CONFIG_KMEMCHECK */
 
 #endif /* LINUX_KMEMCHECK_H */
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 4880306..e339fcf 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -64,6 +64,13 @@
 
 #define SLAB_NOLEAKTRACE	0x00800000UL	/* Avoid kmemleak tracing */
 
+/* Don't track use of uninitialized memory */
+#ifdef CONFIG_KMEMCHECK
+# define SLAB_NOTRACK		0x01000000UL
+#else
+# define SLAB_NOTRACK		0x00000000UL
+#endif
+
 /* The following flags affect the page allocator grouping pages by mobility */
 #define SLAB_RECLAIM_ACCOUNT	0x00020000UL		/* Objects are reclaimable */
 #define SLAB_TEMPORARY		SLAB_RECLAIM_ACCOUNT	/* Objects are short-lived */
diff --git a/kernel/fork.c b/kernel/fork.c
index 4430eb1..be022c2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -178,7 +178,7 @@
 	/* create a slab on which task_structs can be allocated */
 	task_struct_cachep =
 		kmem_cache_create("task_struct", sizeof(struct task_struct),
-			ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL);
+			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
 #endif
 
 	/* do the arch specific task caches init */
@@ -1470,20 +1470,20 @@
 {
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU,
-			sighand_ctor);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
+			SLAB_NOTRACK, sighand_ctor);
 	signal_cachep = kmem_cache_create("signal_cache",
 			sizeof(struct signal_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
 	files_cachep = kmem_cache_create("files_cache",
 			sizeof(struct files_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
 	fs_cachep = kmem_cache_create("fs_cache",
 			sizeof(struct fs_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
 	mm_cachep = kmem_cache_create("mm_struct",
 			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
 	vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
 	mmap_init();
 }
diff --git a/mm/Makefile b/mm/Makefile
index e89acb0..c379ce0 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -27,6 +27,7 @@
 obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
 obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_SLUB) += slub.o
+obj-$(CONFIG_KMEMCHECK) += kmemcheck.o
 obj-$(CONFIG_FAILSLAB) += failslab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c
new file mode 100644
index 0000000..eaa41b8
--- /dev/null
+++ b/mm/kmemcheck.c
@@ -0,0 +1,103 @@
+#include <linux/mm_types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/kmemcheck.h>
+
+void kmemcheck_alloc_shadow(struct kmem_cache *s, gfp_t flags, int node,
+			   struct page *page, int order)
+{
+	struct page *shadow;
+	int pages;
+	int i;
+
+	pages = 1 << order;
+
+	/*
+	 * With kmemcheck enabled, we need to allocate a memory area for the
+	 * shadow bits as well.
+	 */
+	shadow = alloc_pages_node(node, flags, order);
+	if (!shadow) {
+		if (printk_ratelimit())
+			printk(KERN_ERR "kmemcheck: failed to allocate "
+				"shadow bitmap\n");
+		return;
+	}
+
+	for(i = 0; i < pages; ++i)
+		page[i].shadow = page_address(&shadow[i]);
+
+	/*
+	 * Mark it as non-present for the MMU so that our accesses to
+	 * this memory will trigger a page fault and let us analyze
+	 * the memory accesses.
+	 */
+	kmemcheck_hide_pages(page, pages);
+
+	/*
+	 * Objects from caches that have a constructor don't get
+	 * cleared when they're allocated, so we need to do it here.
+	 */
+	if (s->ctor)
+		kmemcheck_mark_uninitialized_pages(page, pages);
+	else
+		kmemcheck_mark_unallocated_pages(page, pages);
+}
+
+void kmemcheck_free_shadow(struct kmem_cache *s, struct page *page, int order)
+{
+	struct page *shadow;
+	int pages;
+	int i;
+
+	pages = 1 << order;
+
+	kmemcheck_show_pages(page, pages);
+
+	shadow = virt_to_page(page[0].shadow);
+
+	for(i = 0; i < pages; ++i)
+		page[i].shadow = NULL;
+
+	__free_pages(shadow, order);
+}
+
+void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
+			  size_t size)
+{
+	/*
+	 * Has already been memset(), which initializes the shadow for us
+	 * as well.
+	 */
+	if (gfpflags & __GFP_ZERO)
+		return;
+
+	/* No need to initialize the shadow of a non-tracked slab. */
+	if (s->flags & SLAB_NOTRACK)
+		return;
+
+	if (!kmemcheck_enabled || gfpflags & __GFP_NOTRACK) {
+		/*
+		 * Allow notracked objects to be allocated from
+		 * tracked caches. Note however that these objects
+		 * will still get page faults on access, they just
+		 * won't ever be flagged as uninitialized. If page
+		 * faults are not acceptable, the slab cache itself
+		 * should be marked NOTRACK.
+		 */
+		kmemcheck_mark_initialized(object, size);
+	} else if (!s->ctor) {
+		/*
+		 * New objects should be marked uninitialized before
+		 * they're returned to the called.
+		 */
+		kmemcheck_mark_uninitialized(object, size);
+	}
+}
+
+void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size)
+{
+	/* TODO: RCU freeing is unsupported for now; hide false positives. */
+	if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU))
+		kmemcheck_mark_freed(object, size);
+}