Add MMF_DUMP_ELF_HEADERS

This adds the MMF_DUMP_ELF_HEADERS option to /proc/pid/coredump_filter.
This dumps the first page (only) of a private file mapping if it appears to
be a mapping of an ELF file.  Including these pages in the core dump may
give sufficient identifying information to associate the original DSO and
executable file images and their debugging information with a core file in
a generic way just from its contents (e.g.  when those binaries were built
with ld --build-id).  I expect this to become the default behavior
eventually.  Existing versions of gdb can be confused by the core dumps it
creates, so it won't enabled by default for some time to come.  Soon many
people will have systems with a gdb that handle these dumps, so they can
arrange to set the bit at boot and have it inherited system-wide.

This also cleans up the checking of the MMF_DUMP_* flag bits, which did not
need to be using atomic macros.

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 66cd711..4263326 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1201,35 +1201,68 @@
 }
 
 /*
- * Decide whether a segment is worth dumping; default is yes to be
- * sure (missing info is worse than too much; etc).
- * Personally I'd include everything, and use the coredump limit...
- *
- * I think we should skip something. But I am not sure how. H.J.
+ * Decide what to dump of a segment, part, all or none.
  */
-static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
+static unsigned long vma_dump_size(struct vm_area_struct *vma,
+				   unsigned long mm_flags)
 {
 	/* The vma can be set up to tell us the answer directly.  */
 	if (vma->vm_flags & VM_ALWAYSDUMP)
-		return 1;
+		goto whole;
 
 	/* Do not dump I/O mapped devices or special mappings */
 	if (vma->vm_flags & (VM_IO | VM_RESERVED))
 		return 0;
 
+#define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
+
 	/* By default, dump shared memory if mapped from an anonymous file. */
 	if (vma->vm_flags & VM_SHARED) {
-		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0)
-			return test_bit(MMF_DUMP_ANON_SHARED, &mm_flags);
-		else
-			return test_bit(MMF_DUMP_MAPPED_SHARED, &mm_flags);
+		if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
+		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
+			goto whole;
+		return 0;
 	}
 
-	/* By default, if it hasn't been written to, don't write it out. */
-	if (!vma->anon_vma)
-		return test_bit(MMF_DUMP_MAPPED_PRIVATE, &mm_flags);
+	/* Dump segments that have been written to.  */
+	if (vma->anon_vma && FILTER(ANON_PRIVATE))
+		goto whole;
+	if (vma->vm_file == NULL)
+		return 0;
 
-	return test_bit(MMF_DUMP_ANON_PRIVATE, &mm_flags);
+	if (FILTER(MAPPED_PRIVATE))
+		goto whole;
+
+	/*
+	 * If this looks like the beginning of a DSO or executable mapping,
+	 * check for an ELF header.  If we find one, dump the first page to
+	 * aid in determining what was mapped here.
+	 */
+	if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
+		u32 __user *header = (u32 __user *) vma->vm_start;
+		u32 word;
+		/*
+		 * Doing it this way gets the constant folded by GCC.
+		 */
+		union {
+			u32 cmp;
+			char elfmag[SELFMAG];
+		} magic;
+		BUILD_BUG_ON(SELFMAG != sizeof word);
+		magic.elfmag[EI_MAG0] = ELFMAG0;
+		magic.elfmag[EI_MAG1] = ELFMAG1;
+		magic.elfmag[EI_MAG2] = ELFMAG2;
+		magic.elfmag[EI_MAG3] = ELFMAG3;
+		if (get_user(word, header) == 0 && word == magic.cmp)
+			return PAGE_SIZE;
+	}
+
+#undef	FILTER
+
+	return 0;
+
+whole:
+	return vma->vm_end - vma->vm_start;
 }
 
 /* An ELF note in memory */
@@ -1669,16 +1702,13 @@
 	for (vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma)) {
 		struct elf_phdr phdr;
-		size_t sz;
-
-		sz = vma->vm_end - vma->vm_start;
 
 		phdr.p_type = PT_LOAD;
 		phdr.p_offset = offset;
 		phdr.p_vaddr = vma->vm_start;
 		phdr.p_paddr = 0;
-		phdr.p_filesz = maydump(vma, mm_flags) ? sz : 0;
-		phdr.p_memsz = sz;
+		phdr.p_filesz = vma_dump_size(vma, mm_flags);
+		phdr.p_memsz = vma->vm_end - vma->vm_start;
 		offset += phdr.p_filesz;
 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
 		if (vma->vm_flags & VM_WRITE)
@@ -1718,13 +1748,11 @@
 	for (vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma)) {
 		unsigned long addr;
+		unsigned long end;
 
-		if (!maydump(vma, mm_flags))
-			continue;
+		end = vma->vm_start + vma_dump_size(vma, mm_flags);
 
-		for (addr = vma->vm_start;
-		     addr < vma->vm_end;
-		     addr += PAGE_SIZE) {
+		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
 			struct page *page;
 			struct vm_area_struct *vma;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e643357..cb952bc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -359,8 +359,9 @@
 #define MMF_DUMP_ANON_SHARED	3
 #define MMF_DUMP_MAPPED_PRIVATE	4
 #define MMF_DUMP_MAPPED_SHARED	5
+#define MMF_DUMP_ELF_HEADERS	6
 #define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
-#define MMF_DUMP_FILTER_BITS	4
+#define MMF_DUMP_FILTER_BITS	5
 #define MMF_DUMP_FILTER_MASK \
 	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
 #define MMF_DUMP_FILTER_DEFAULT \