Add config option to use dlmalloc instead of mheap

Configure w/ --enable-dlmalloc, see .../build-data/platforms/vpp.mk

src/vppinfra/dlmalloc.[ch] are slightly modified versions of the
well-known Doug Lea malloc. Main advantage: dlmalloc mspaces have no
inherent size limit.

Change-Id: I19b3f43f3c65bcfb82c1a265a97922d01912446e
Signed-off-by: Dave Barach <dave@barachs.net>
diff --git a/src/vlib/cli.c b/src/vlib/cli.c
index 1692ad8..820fdef 100644
--- a/src/vlib/cli.c
+++ b/src/vlib/cli.c
@@ -707,7 +707,7 @@
 show_memory_usage (vlib_main_t * vm,
 		   unformat_input_t * input, vlib_cli_command_t * cmd)
 {
-  int verbose = 0, api_segment = 0;
+  int verbose __attribute__ ((unused)) = 0, api_segment = 0;
   clib_error_t *error;
   u32 index = 0;
 
@@ -742,19 +742,55 @@
       vec_free (s);
     }
 
+#if USE_DLMALLOC == 0
   /* *INDENT-OFF* */
   foreach_vlib_main (
   ({
       mheap_t *h = mheap_header (clib_per_cpu_mheaps[index]);
-      vlib_cli_output (vm, "%sThread %d %v\n", index ? "\n":"", index,
+      vlib_cli_output (vm, "%sThread %d %s\n", index ? "\n":"", index,
 		       vlib_worker_threads[index].name);
       vlib_cli_output (vm, "  %U\n", format_page_map, pointer_to_uword (h) -
 		       h->vm_alloc_offset_from_header,
 		       h->vm_alloc_size);
-      vlib_cli_output (vm, "  %U\n", format_mheap, clib_per_cpu_mheaps[index], verbose);
+      vlib_cli_output (vm, "  %U\n", format_mheap, clib_per_cpu_mheaps[index],
+                       verbose);
       index++;
   }));
   /* *INDENT-ON* */
+#else
+  {
+    uword clib_mem_trace_enable_disable (uword enable);
+    uword was_enabled;
+
+    /*
+     * Note: the foreach_vlib_main cause allocator traffic,
+     * so shut off tracing before we go there...
+     */
+    was_enabled = clib_mem_trace_enable_disable (0);
+
+    /* *INDENT-OFF* */
+    foreach_vlib_main (
+    ({
+      struct mallinfo mi;
+      void *mspace;
+      mspace = clib_per_cpu_mheaps[index];
+
+      mi = mspace_mallinfo (mspace);
+      vlib_cli_output (vm, "%sThread %d %s\n", index ? "\n":"", index,
+		       vlib_worker_threads[index].name);
+      vlib_cli_output (vm, "  %U\n", format_page_map,
+                       pointer_to_uword (mspace_least_addr(mspace)),
+                       mi.arena);
+      vlib_cli_output (vm, "  %U\n", format_mheap, clib_per_cpu_mheaps[index],
+                       verbose);
+      index++;
+    }));
+    /* *INDENT-ON* */
+
+    /* Restore the trace flag */
+    clib_mem_trace_enable_disable (was_enabled);
+  }
+#endif /* USE_DLMALLOC */
   return 0;
 }
 
@@ -850,6 +886,7 @@
 test_heap_validate (vlib_main_t * vm, unformat_input_t * input,
 		    vlib_cli_command_t * cmd)
 {
+#if USE_DLMALLOC == 0
   clib_error_t *error = 0;
   void *heap;
   mheap_t *mheap;
@@ -897,6 +934,9 @@
     }
 
   return error;
+#else
+  return clib_error_return (0, "unimplemented...");
+#endif /* USE_DLMALLOC */
 }
 
 /* *INDENT-OFF* */
diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c
index a708aae..6c6b072 100755
--- a/src/vlib/linux/physmem.c
+++ b/src/vlib/linux/physmem.c
@@ -69,10 +69,18 @@
 
   while (1)
     {
+#if USE_DLMALLOC == 0
+
       mheap_get_aligned (pr->heap, n_bytes,
 			 /* align */ alignment,
 			 /* align offset */ 0,
 			 &lo_offset);
+#else
+      lo_offset = (uword) mspace_get_aligned (pr->heap, n_bytes,
+					      alignment, ~0ULL /* offset */ );
+      if (lo_offset == 0)
+	lo_offset = ~0ULL;
+#endif
 
       /* Allocation failed? */
       if (lo_offset == ~0)
@@ -94,7 +102,13 @@
     {
       uword i;
       for (i = 0; i < vec_len (to_free); i++)
-	mheap_put (pr->heap, to_free[i]);
+	{
+#if USE_DLMALLOC == 0
+	  mheap_put (pr->heap, to_free[i]);
+#else
+	  mspace_put_no_offset (pr->heap, (void *) to_free[i]);
+#endif
+	}
       vec_free (to_free);
     }
 
@@ -106,7 +120,11 @@
 {
   vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, idx);
   /* Return object to region's heap. */
+#if USE_DLMALLOC == 0
   mheap_put (pr->heap, x - pr->heap);
+#else
+  mspace_put_no_offset (pr->heap, x);
+#endif
 }
 
 static clib_error_t *
@@ -181,10 +199,15 @@
 
   if (flags & VLIB_PHYSMEM_F_INIT_MHEAP)
     {
+#if USE_DLMALLOC == 0
       pr->heap = mheap_alloc_with_flags (pr->mem, pr->size,
 					 /* Don't want mheap mmap/munmap with IO memory. */
 					 MHEAP_FLAG_DISABLE_VM |
 					 MHEAP_FLAG_THREAD_SAFE);
+#else
+      pr->heap = create_mspace_with_base (pr->mem, pr->size, 1 /* locked */ );
+      mspace_disable_expand (pr->heap);
+#endif
     }
 
   *idx = pr->index;
diff --git a/src/vlib/main.h b/src/vlib/main.h
index 16e4120..8b6f947 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -100,8 +100,13 @@
   /* Name for e.g. syslog. */
   char *name;
 
-  /* Start and size of CLIB heap. */
+  /* Start of the heap. */
   void *heap_base;
+
+  /* Truncated version, to create frame indices */
+  void *heap_aligned_base;
+
+  /* Size of the heap */
   uword heap_size;
 
   /* Pool of buffer free lists. */
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
index bb302f7..577e013 100644
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -216,7 +216,7 @@
 vlib_get_frame_no_check (vlib_main_t * vm, uword frame_index)
 {
   vlib_frame_t *f;
-  f = vm->heap_base + (frame_index * VLIB_FRAME_ALIGN);
+  f = vm->heap_aligned_base + (frame_index * VLIB_FRAME_ALIGN);
   return f;
 }
 
@@ -227,7 +227,7 @@
 
   ASSERT (((uword) f & (VLIB_FRAME_ALIGN - 1)) == 0);
 
-  i = ((u8 *) f - (u8 *) vm->heap_base);
+  i = ((u8 *) f - (u8 *) vm->heap_aligned_base);
   ASSERT ((i / VLIB_FRAME_ALIGN) <= 0xFFFFFFFFULL);
 
   return i / VLIB_FRAME_ALIGN;
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index a946326..c53c5d6 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -727,7 +727,6 @@
   u32 n_vlib_mains = tm->n_vlib_mains;
   u32 worker_thread_index;
   u8 *main_heap = clib_mem_get_per_cpu_heap ();
-  mheap_t *main_heap_header = mheap_header (main_heap);
 
   vec_reset_length (vlib_worker_threads);
 
@@ -742,12 +741,6 @@
       vlib_set_thread_name ((char *) w->name);
     }
 
-  /*
-   * Truth of the matter: we always use at least two
-   * threads. So, make the main heap thread-safe
-   * and make the event log thread-safe.
-   */
-  main_heap_header->flags |= MHEAP_FLAG_THREAD_SAFE;
   vm->elog_main.lock =
     clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
   vm->elog_main.lock[0] = 0;
@@ -801,9 +794,17 @@
 	      vlib_node_t *n;
 
 	      vec_add2 (vlib_worker_threads, w, 1);
+	      /* Currently unused, may not really work */
 	      if (tr->mheap_size)
-		w->thread_mheap =
-		  mheap_alloc (0 /* use VM */ , tr->mheap_size);
+		{
+#if USE_DLMALLOC == 0
+		  w->thread_mheap =
+		    mheap_alloc (0 /* use VM */ , tr->mheap_size);
+#else
+		  w->thread_mheap = create_mspace (tr->mheap_size,
+						   0 /* unlocked */ );
+#endif
+		}
 	      else
 		w->thread_mheap = main_heap;
 
@@ -831,6 +832,8 @@
 
 	      vm_clone->thread_index = worker_thread_index;
 	      vm_clone->heap_base = w->thread_mheap;
+	      vm_clone->heap_aligned_base = (void *)
+		(((uword) w->thread_mheap) & ~(VLIB_FRAME_ALIGN - 1));
 	      vm_clone->init_functions_called =
 		hash_create (0, /* value bytes */ 0);
 	      vm_clone->pending_rpc_requests = 0;
@@ -959,8 +962,15 @@
 	    {
 	      vec_add2 (vlib_worker_threads, w, 1);
 	      if (tr->mheap_size)
-		w->thread_mheap =
-		  mheap_alloc (0 /* use VM */ , tr->mheap_size);
+		{
+#if USE_DLMALLOC == 0
+		  w->thread_mheap =
+		    mheap_alloc (0 /* use VM */ , tr->mheap_size);
+#else
+		  w->thread_mheap =
+		    create_mspace (tr->mheap_size, 0 /* locked */ );
+#endif
+		}
 	      else
 		w->thread_mheap = main_heap;
 	      w->thread_stack =
diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c
index 947c664..2420861 100755
--- a/src/vlib/unix/main.c
+++ b/src/vlib/unix/main.c
@@ -638,6 +638,8 @@
   vm->argv = (u8 **) argv;
   vm->name = argv[0];
   vm->heap_base = clib_mem_get_heap ();
+  vm->heap_aligned_base = (void *)
+    (((uword) vm->heap_base) & ~(VLIB_FRAME_ALIGN - 1));
   ASSERT (vm->heap_base);
 
   unformat_init_command_line (&input, (char **) vm->argv);