[PATCH] mempool: NOMEMALLOC and NORETRY
Mempools have 2 problems.
The first is that mempool_alloc can possibly get stuck in __alloc_pages
when they should opt to fail, and take an element from their reserved pool.
The second is that it will happily eat emergency PF_MEMALLOC reserves
instead of going to their reserved pools.
Fix the first by passing __GFP_NORETRY in the allocation calls in
mempool_alloc. Fix the second by introducing a __GFP_MEMPOOL flag which
directs the page allocator not to allocate from the reserve pool.
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 848a1ba..af7407e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -38,14 +38,16 @@
#define __GFP_NO_GROW 0x2000u /* Slab internal usage */
#define __GFP_COMP 0x4000u /* Add compound page metadata */
#define __GFP_ZERO 0x8000u /* Return zeroed page on success */
+#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
-#define __GFP_BITS_SHIFT 16 /* Room for 16 __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
#define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
/* if you forget to add the bitmask here kernel will crash, period */
#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
- __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP)
+ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
+ __GFP_NOMEMALLOC)
#define GFP_ATOMIC (__GFP_HIGH)
#define GFP_NOIO (__GFP_WAIT)
diff --git a/mm/mempool.c b/mm/mempool.c
index b014ffe..d691b5c 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -198,11 +198,16 @@
void *element;
unsigned long flags;
DEFINE_WAIT(wait);
- int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
+ int gfp_nowait;
+
+ gfp_mask |= __GFP_NOMEMALLOC; /* don't allocate emergency reserves */
+ gfp_mask |= __GFP_NORETRY; /* don't loop in __alloc_pages */
+ gfp_mask |= __GFP_NOWARN; /* failures are OK */
+ gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
might_sleep_if(gfp_mask & __GFP_WAIT);
repeat_alloc:
- element = pool->alloc(gfp_nowait|__GFP_NOWARN, pool->pool_data);
+ element = pool->alloc(gfp_nowait, pool->pool_data);
if (likely(element != NULL))
return element;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 08e8627..04a35b3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -799,14 +799,18 @@
}
/* This allocation should allow future memory freeing. */
- if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) && !in_interrupt()) {
- /* go through the zonelist yet again, ignoring mins */
- for (i = 0; (z = zones[i]) != NULL; i++) {
- if (!cpuset_zone_allowed(z))
- continue;
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
+
+ if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
+ && !in_interrupt()) {
+ if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+ /* go through the zonelist yet again, ignoring mins */
+ for (i = 0; (z = zones[i]) != NULL; i++) {
+ if (!cpuset_zone_allowed(z))
+ continue;
+ page = buffered_rmqueue(z, order, gfp_mask);
+ if (page)
+ goto got_pg;
+ }
}
goto nopage;
}