vlib: mmap process stacks
Instead of allocating stack from the main heap, this patch mmaps stack
memory together with guard page.
This aproach reduces main heap usage, and stack memory is prefaulted
on demand, so bigger process stacks will have zero impact on memory
usage as long as stack memory is not needed for real.
In addition, it fixes issue with systems which have bigger default page
size (observed with 65536).
Type: improvement
Change-Id: I593365c603d4702e428967d80fd425fdee2c4a21
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vlib/node.c b/src/vlib/node.c
index 2bb5bce..2e93b95 100644
--- a/src/vlib/node.c
+++ b/src/vlib/node.c
@@ -433,36 +433,37 @@
if (n->type == VLIB_NODE_TYPE_PROCESS)
{
vlib_process_t *p;
- uword log2_n_stack_bytes;
+ void *map;
+ uword log2_n_stack_bytes, stack_bytes;
+ int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS;
- log2_n_stack_bytes = clib_max (r->process_log2_n_stack_bytes, 15);
+ log2_n_stack_bytes = clib_max (r->process_log2_n_stack_bytes,
+ VLIB_PROCESS_LOG2_STACK_SIZE);
+ log2_n_stack_bytes = clib_max (log2_n_stack_bytes,
+ min_log2 (page_size));
-#ifdef CLIB_UNIX
- /*
- * Bump the stack size if running over a kernel with a large page size,
- * and the stack isn't any too big to begin with. Otherwise, we'll
- * trip over the stack guard page for sure.
- */
- if ((page_size > (4 << 10)) && log2_n_stack_bytes < 19)
- {
- if ((1 << log2_n_stack_bytes) <= page_size)
- log2_n_stack_bytes = min_log2 (page_size) + 1;
- else
- log2_n_stack_bytes++;
- }
-#endif
-
- p = clib_mem_alloc_aligned_at_offset
- (sizeof (p[0]) + (1 << log2_n_stack_bytes),
- STACK_ALIGN, STRUCT_OFFSET_OF (vlib_process_t, stack),
- 0 /* no, don't call os_out_of_memory */ );
- if (p == 0)
- clib_panic ("failed to allocate process stack (%d bytes)",
- 1 << log2_n_stack_bytes);
-
+ p = clib_mem_alloc_aligned (sizeof (p[0]), CLIB_CACHE_LINE_BYTES);
clib_memset (p, 0, sizeof (p[0]));
p->log2_n_stack_bytes = log2_n_stack_bytes;
+ stack_bytes = 1 << log2_n_stack_bytes;
+ /* map stack size + 2 extra guard pages */
+ map = mmap (0, stack_bytes + page_size, PROT_READ | PROT_WRITE,
+ mmap_flags, -1, 0);
+
+ if (map == MAP_FAILED)
+ clib_panic ("failed to allocate process stack (%d bytes)",
+ stack_bytes);
+
+ /* skip the guard page */
+ p->stack = map + page_size;
+
+ mmap_flags |= MAP_FIXED;
+ map = mmap (map, page_size, PROT_NONE, mmap_flags, -1, 0);
+
+ if (map == MAP_FAILED)
+ clib_unix_warning ("failed to create stack guard page");
+
/* Process node's runtime index is really index into process
pointer vector. */
n->runtime_index = vec_len (nm->processes);
@@ -475,16 +476,6 @@
/* Node runtime is stored inside of process. */
rt = &p->node_runtime;
-
-#ifdef CLIB_UNIX
- /*
- * Disallow writes to the bottom page of the stack, to
- * catch stack overflows.
- */
- if (mprotect (p->stack, page_size, PROT_READ) < 0)
- clib_unix_warning ("process stack");
-#endif
-
}
else
{