bihash table size perf/scale improvements

Directly allocate and carve cache-line-aligned chunks of virtual
memory. To a first approximation, bihash wasn't using
clib_mem_free(...).

We eliminate mheap object header/trailers, which improves space
efficiency. We also eliminate the 4gb bihash table size limit. An 8_8
bihash w/ 100 million random entries uses 3.8 Gbytes.

Change-Id: Icf925fdf99bce7d6ac407ac4edd30560b8f04808
Signed-off-by: Dave Barach <dave@barachs.net>
diff --git a/src/vppinfra/bihash_template.h b/src/vppinfra/bihash_template.h
index 4e5d995..81d9ffa 100644
--- a/src/vppinfra/bihash_template.h
+++ b/src/vppinfra/bihash_template.h
@@ -89,7 +89,14 @@
   u64 cache_misses;
 
     BVT (clib_bihash_value) ** freelists;
-  void *mheap;
+
+  /*
+   * Backing store allocation. Since bihash mananges its own
+   * freelists, we simple dole out memory at alloc_arena_next.
+   */
+  uword alloc_arena;
+  uword alloc_arena_next;
+  uword alloc_arena_size;
 
   /**
     * A custom format function to print the Key and Value of bihash_key instead of default hexdump
@@ -224,7 +231,7 @@
 static inline void *BV (clib_bihash_get_value) (BVT (clib_bihash) * h,
 						uword offset)
 {
-  u8 *hp = h->mheap;
+  u8 *hp = (u8 *) h->alloc_arena;
   u8 *vp = hp + offset;
 
   return (void *) vp;
@@ -235,10 +242,9 @@
 {
   u8 *hp, *vp;
 
-  hp = (u8 *) h->mheap;
+  hp = (u8 *) h->alloc_arena;
   vp = (u8 *) v;
 
-  ASSERT ((vp - hp) < 0x100000000ULL);
   return vp - hp;
 }