Minimize bihash memory consumption

Reference-count the number of entries in each bucket. If the reference
count goes to zero, free the backing store.

Add long-term churn-testing to test_bihash_template.c, thanks to
Andrew Yourtchenko for the initial implementation.

Change-Id: I4fbd9229cacfaba8027a85cbf87b74afdead6e39
Signed-off-by: Dave Barach <dave@barachs.net>
diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c
index 3f25b02..2b40af3 100644
--- a/src/vppinfra/bihash_template.c
+++ b/src/vppinfra/bihash_template.c
@@ -290,6 +290,7 @@
       *v->kvp = *add_v;
       tmp_b.as_u64 = 0;
       tmp_b.offset = BV (clib_bihash_get_offset) (h, v);
+      tmp_b.refcnt = 1;
 
       b->as_u64 = tmp_b.as_u64;
       goto unlock;
@@ -329,6 +330,7 @@
 	      clib_memcpy (&(v->kvp[i]), add_v, sizeof (*add_v));
 	      CLIB_MEMORY_BARRIER ();
 	      b->as_u64 = h->saved_bucket.as_u64;
+	      b->refcnt++;
 	      goto unlock;
 	    }
 	}
@@ -342,8 +344,17 @@
 	    {
 	      memset (&(v->kvp[i]), 0xff, sizeof (*(add_v)));
 	      CLIB_MEMORY_BARRIER ();
-	      b->as_u64 = h->saved_bucket.as_u64;
-	      goto unlock;
+	      if (PREDICT_TRUE (h->saved_bucket.refcnt > 1))
+		{
+		  h->saved_bucket.refcnt -= 1;
+		  b->as_u64 = h->saved_bucket.as_u64;
+		  goto unlock;
+		}
+	      else
+		{
+		  tmp_b.as_u64 = 0;
+		  goto free_old_bucket;
+		}
 	    }
 	}
       rv = -3;
@@ -411,18 +422,18 @@
     goto try_resplit;
 
 expand_ok:
-  /* Keep track of the number of linear-scan buckets */
-  if (tmp_b.linear_search ^ mark_bucket_linear)
-    h->linear_buckets += (mark_bucket_linear == 1) ? 1 : -1;
-
   tmp_b.log2_pages = new_log2_pages;
   tmp_b.offset = BV (clib_bihash_get_offset) (h, save_new_v);
   tmp_b.linear_search = mark_bucket_linear;
+  tmp_b.refcnt = h->saved_bucket.refcnt + 1;
+
+free_old_bucket:
 
   CLIB_MEMORY_BARRIER ();
   b->as_u64 = tmp_b.as_u64;
   v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset);
-  BV (value_free) (h, v, old_log2_pages);
+
+  BV (value_free) (h, v, h->saved_bucket.log2_pages);
 
 unlock:
   BV (clib_bihash_reset_cache) (b);
@@ -541,6 +552,8 @@
   BVT (clib_bihash_value) * v;
   int i, j, k;
   u64 active_elements = 0;
+  u64 active_buckets = 0;
+  u64 linear_buckets = 0;
 
   s = format (s, "Hash table %s\n", h->name ? h->name : (u8 *) "(unnamed)");
 
@@ -554,6 +567,11 @@
 	  continue;
 	}
 
+      active_buckets++;
+
+      if (b->linear_search)
+	linear_buckets++;
+
       if (verbose)
 	{
 	  s = format (s, "[%d]: heap offset %d, len %d, linear %d\n", i,
@@ -593,11 +611,30 @@
 	}
     }
 
-  s = format (s, "    %lld active elements\n", active_elements);
+  s = format (s, "    %lld active elements %lld active buckets\n",
+	      active_elements, active_buckets);
   s = format (s, "    %d free lists\n", vec_len (h->freelists));
-  s = format (s, "    %d linear search buckets\n", h->linear_buckets);
+
+  for (i = 0; i < vec_len (h->freelists); i++)
+    {
+      u32 nfree = 0;
+      BVT (clib_bihash_value) * free_elt;
+
+      free_elt = h->freelists[i];
+      while (free_elt)
+	{
+	  nfree++;
+	  free_elt = free_elt->next_free;
+	}
+
+      s = format (s, "       [len %d] %u free elts\n", 1 << i, nfree);
+    }
+
+  s = format (s, "    %lld linear search buckets\n", linear_buckets);
   s = format (s, "    %lld cache hits, %lld cache misses\n",
 	      h->cache_hits, h->cache_misses);
+  if (h->mheap)
+    s = format (s, "    mheap: %U", format_mheap, h->mheap, 0 /* verbose */ );
   return s;
 }
 
diff --git a/src/vppinfra/bihash_template.h b/src/vppinfra/bihash_template.h
index 9df418b..4e5d995 100644
--- a/src/vppinfra/bihash_template.h
+++ b/src/vppinfra/bihash_template.h
@@ -61,11 +61,12 @@
       u32 offset;
       u8 linear_search;
       u8 log2_pages;
-      u16 cache_lru;
+      i16 refcnt;
     };
     u64 as_u64;
   };
 #if BIHASH_KVP_CACHE_SIZE > 0
+  u16 cache_lru;
     BVT (clib_bihash_kv) cache[BIHASH_KVP_CACHE_SIZE];
 #endif
 } BVT (clib_bihash_bucket);
@@ -82,7 +83,6 @@
 
   u32 nbuckets;
   u32 log2_nbuckets;
-  u32 linear_buckets;
   u8 *name;
 
   u64 cache_hits;
@@ -102,13 +102,11 @@
 static inline void
 BV (clib_bihash_update_lru) (BVT (clib_bihash_bucket) * b, u8 slot)
 {
+#if BIHASH_KVP_CACHE_SIZE > 1
   u16 value, tmp, mask;
   u8 found_lru_pos;
   u16 save_hi;
 
-  if (BIHASH_KVP_CACHE_SIZE < 2)
-    return;
-
   ASSERT (slot < BIHASH_KVP_CACHE_SIZE);
 
   /* First, find the slot in cache_lru */
@@ -154,6 +152,7 @@
   value = save_hi | (tmp << 3) | slot;
 
   b->cache_lru = value;
+#endif
 }
 
 void
@@ -197,28 +196,29 @@
 
 static inline int BV (clib_bihash_lock_bucket) (BVT (clib_bihash_bucket) * b)
 {
-  BVT (clib_bihash_bucket) tmp_b;
-  u64 rv;
+#if BIHASH_KVP_CACHE_SIZE > 0
+  u16 cache_lru_bit;
+  u16 rv;
 
-  tmp_b.as_u64 = 0;
-  tmp_b.cache_lru = 1 << 15;
+  cache_lru_bit = 1 << 15;
 
-  rv = __sync_fetch_and_or (&b->as_u64, tmp_b.as_u64);
-  tmp_b.as_u64 = rv;
+  rv = __sync_fetch_and_or (&b->cache_lru, cache_lru_bit);
   /* Was already locked? */
-  if (tmp_b.cache_lru & (1 << 15))
+  if (rv & (1 << 15))
     return 0;
+#endif
   return 1;
 }
 
 static inline void BV (clib_bihash_unlock_bucket)
   (BVT (clib_bihash_bucket) * b)
 {
-  BVT (clib_bihash_bucket) tmp_b;
+#if BIHASH_KVP_CACHE_SIZE > 0
+  u16 cache_lru;
 
-  tmp_b.as_u64 = b->as_u64;
-  tmp_b.cache_lru &= ~(1 << 15);
-  b->as_u64 = tmp_b.as_u64;
+  cache_lru = b->cache_lru & ~(1 << 15);
+  b->cache_lru = cache_lru;
+#endif
 }
 
 static inline void *BV (clib_bihash_get_value) (BVT (clib_bihash) * h,
diff --git a/src/vppinfra/test_bihash_template.c b/src/vppinfra/test_bihash_template.c
index 589c815..2d4b553 100644
--- a/src/vppinfra/test_bihash_template.c
+++ b/src/vppinfra/test_bihash_template.c
@@ -15,6 +15,8 @@
 #include <vppinfra/time.h>
 #include <vppinfra/cache.h>
 #include <vppinfra/error.h>
+#include <sys/resource.h>
+#include <stdio.h>
 
 #include <vppinfra/bihash_8_8.h>
 #include <vppinfra/bihash_template.h>
@@ -26,6 +28,8 @@
   u64 seed;
   u32 nbuckets;
   u32 nitems;
+  u32 ncycles;
+  u32 report_every_n;
   u32 search_iter;
   int careful_delete_tests;
   int verbose;
@@ -93,144 +97,185 @@
   f64 before, delta;
   BVT (clib_bihash) * h;
   BVT (clib_bihash_kv) kv;
+  u32 acycle;
 
   h = &tm->hash;
 
   BV (clib_bihash_init) (h, "test", tm->nbuckets, 3ULL << 30);
 
-  fformat (stdout, "Pick %lld unique %s keys...\n",
-	   tm->nitems, tm->non_random_keys ? "non-random" : "random");
 
-  for (i = 0; i < tm->nitems; i++)
+  for (acycle = 0; acycle < tm->ncycles; acycle++)
     {
-      u64 rndkey;
-
-      if (tm->non_random_keys == 0)
+      if ((acycle % tm->report_every_n) == 0)
 	{
+	  fformat (stdout, "Cycle %lld out of %lld...\n",
+		   acycle, tm->ncycles);
 
-	again:
-	  rndkey = random_u64 (&tm->seed);
-
-	  p = hash_get (tm->key_hash, rndkey);
-	  if (p)
-	    goto again;
+	  fformat (stdout, "Pick %lld unique %s keys...\n",
+		   tm->nitems, tm->non_random_keys ? "non-random" : "random");
 	}
-      else
-	rndkey = (u64) (i + 1) << 16;
 
-      hash_set (tm->key_hash, rndkey, i + 1);
-      vec_add1 (tm->keys, rndkey);
-    }
-
-  fformat (stdout, "Add items...\n");
-  for (i = 0; i < tm->nitems; i++)
-    {
-      kv.key = tm->keys[i];
-      kv.value = i + 1;
-
-      BV (clib_bihash_add_del) (h, &kv, 1 /* is_add */ );
-
-      if (tm->verbose > 1)
+      for (i = 0; i < tm->nitems; i++)
 	{
-	  fformat (stdout, "--------------------\n");
-	  fformat (stdout, "After adding key %llu value %lld...\n",
-		   tm->keys[i], (u64) (i + 1));
-	  fformat (stdout, "%U", BV (format_bihash), h,
-		   2 /* very verbose */ );
+	  u64 rndkey;
+
+	  if (tm->non_random_keys == 0)
+	    {
+
+	    again:
+	      rndkey = random_u64 (&tm->seed);
+
+	      p = hash_get (tm->key_hash, rndkey);
+	      if (p)
+		goto again;
+	    }
+	  else
+	    rndkey = (u64) (i + 1) << 16;
+	  rndkey += acycle;
+
+	  hash_set (tm->key_hash, rndkey, i + 1);
+	  vec_add1 (tm->keys, rndkey);
 	}
-    }
 
-  fformat (stdout, "%U", BV (format_bihash), h, 0 /* very verbose */ );
 
-  fformat (stdout, "Search for items %d times...\n", tm->search_iter);
+      if ((acycle % tm->report_every_n) == 0)
+	fformat (stdout, "Add items...\n");
 
-  before = clib_time_now (&tm->clib_time);
-
-  for (j = 0; j < tm->search_iter; j++)
-    {
       for (i = 0; i < tm->nitems; i++)
 	{
 	  kv.key = tm->keys[i];
-	  if (BV (clib_bihash_search) (h, &kv, &kv) < 0)
-	    if (BV (clib_bihash_search) (h, &kv, &kv) < 0)
-	      clib_warning ("[%d] search for key %lld failed unexpectedly\n",
-			    i, tm->keys[i]);
-	  if (kv.value != (u64) (i + 1))
-	    clib_warning
-	      ("[%d] search for key %lld returned %lld, not %lld\n", i,
-	       tm->keys, kv.value, (u64) (i + 1));
+	  kv.value = i + 1;
+
+	  BV (clib_bihash_add_del) (h, &kv, 1 /* is_add */ );
+
+	  if (tm->verbose > 1)
+	    {
+	      fformat (stdout, "--------------------\n");
+	      fformat (stdout, "After adding key %llu value %lld...\n",
+		       tm->keys[i], (u64) (i + 1));
+	      fformat (stdout, "%U", BV (format_bihash), h,
+		       2 /* very verbose */ );
+	    }
 	}
-    }
 
-  delta = clib_time_now (&tm->clib_time) - before;
-  total_searches = (uword) tm->search_iter * (uword) tm->nitems;
+      if ((acycle % tm->report_every_n) == 0)
+	{
+	  fformat (stdout, "%U", BV (format_bihash), h,
+		   0 /* very verbose */ );
 
-  if (delta > 0)
-    fformat (stdout, "%.f searches per second\n",
-	     ((f64) total_searches) / delta);
+	  fformat (stdout, "Search for items %d times...\n", tm->search_iter);
+	}
 
-  fformat (stdout, "%lld searches in %.6f seconds\n", total_searches, delta);
+      before = clib_time_now (&tm->clib_time);
 
-  fformat (stdout, "Standard E-hash search for items %d times...\n",
-	   tm->search_iter);
+      for (j = 0; j < tm->search_iter; j++)
+	{
+	  for (i = 0; i < tm->nitems; i++)
+	    {
+	      kv.key = tm->keys[i];
+	      if (BV (clib_bihash_search) (h, &kv, &kv) < 0)
+		if (BV (clib_bihash_search) (h, &kv, &kv) < 0)
+		  clib_warning
+		    ("[%d] search for key %lld failed unexpectedly\n", i,
+		     tm->keys[i]);
+	      if (kv.value != (u64) (i + 1))
+		clib_warning
+		  ("[%d] search for key %lld returned %lld, not %lld\n", i,
+		   tm->keys, kv.value, (u64) (i + 1));
+	    }
+	}
 
-  before = clib_time_now (&tm->clib_time);
+      if ((acycle % tm->report_every_n) == 0)
+	{
+	  delta = clib_time_now (&tm->clib_time) - before;
+	  total_searches = (uword) tm->search_iter * (uword) tm->nitems;
 
-  for (j = 0; j < tm->search_iter; j++)
-    {
+	  if (delta > 0)
+	    fformat (stdout, "%.f searches per second\n",
+		     ((f64) total_searches) / delta);
+
+	  fformat (stdout, "%lld searches in %.6f seconds\n", total_searches,
+		   delta);
+
+	  fformat (stdout, "Standard E-hash search for items %d times...\n",
+		   tm->search_iter);
+	}
+
+      before = clib_time_now (&tm->clib_time);
+
+      for (j = 0; j < tm->search_iter; j++)
+	{
+	  for (i = 0; i < tm->nitems; i++)
+	    {
+	      p = hash_get (tm->key_hash, tm->keys[i]);
+	      if (p == 0 || p[0] != (uword) (i + 1))
+		clib_warning ("ugh, couldn't find %lld\n", tm->keys[i]);
+	    }
+	}
+
+      delta = clib_time_now (&tm->clib_time) - before;
+      total_searches = (uword) tm->search_iter * (uword) tm->nitems;
+
+      if ((acycle % tm->report_every_n) == 0)
+	{
+	  fformat (stdout, "%lld searches in %.6f seconds\n",
+		   total_searches, delta);
+
+	  if (delta > 0)
+	    fformat (stdout, "%.f searches per second\n",
+		     ((f64) total_searches) / delta);
+	  fformat (stdout, "Delete items...\n");
+	}
+
       for (i = 0; i < tm->nitems; i++)
 	{
-	  p = hash_get (tm->key_hash, tm->keys[i]);
-	  if (p == 0 || p[0] != (uword) (i + 1))
-	    clib_warning ("ugh, couldn't find %lld\n", tm->keys[i]);
-	}
-    }
+	  int j;
+	  int rv;
 
-  delta = clib_time_now (&tm->clib_time) - before;
-  total_searches = (uword) tm->search_iter * (uword) tm->nitems;
+	  kv.key = tm->keys[i];
+	  kv.value = (u64) (i + 1);
+	  rv = BV (clib_bihash_add_del) (h, &kv, 0 /* is_add */ );
 
-  fformat (stdout, "%lld searches in %.6f seconds\n", total_searches, delta);
+	  if (rv < 0)
+	    clib_warning ("delete key %lld not ok but should be",
+			  tm->keys[i]);
 
-  if (delta > 0)
-    fformat (stdout, "%.f searches per second\n",
-	     ((f64) total_searches) / delta);
-
-  fformat (stdout, "Delete items...\n");
-
-  for (i = 0; i < tm->nitems; i++)
-    {
-      int j;
-      int rv;
-
-      kv.key = tm->keys[i];
-      kv.value = (u64) (i + 1);
-      rv = BV (clib_bihash_add_del) (h, &kv, 0 /* is_add */ );
-
-      if (rv < 0)
-	clib_warning ("delete key %lld not ok but should be", tm->keys[i]);
-
-      if (tm->careful_delete_tests)
-	{
-	  for (j = 0; j < tm->nitems; j++)
+	  if (tm->careful_delete_tests)
 	    {
-	      kv.key = tm->keys[j];
-	      rv = BV (clib_bihash_search) (h, &kv, &kv);
-	      if (j <= i && rv >= 0)
+	      for (j = 0; j < tm->nitems; j++)
 		{
-		  clib_warning
-		    ("i %d j %d search ok but should not be, value %lld",
-		     i, j, kv.value);
-		}
-	      if (j > i && rv < 0)
-		{
-		  clib_warning ("i %d j %d search not ok but should be",
-				i, j);
+		  kv.key = tm->keys[j];
+		  rv = BV (clib_bihash_search) (h, &kv, &kv);
+		  if (j <= i && rv >= 0)
+		    {
+		      clib_warning
+			("i %d j %d search ok but should not be, value %lld",
+			 i, j, kv.value);
+		    }
+		  if (j > i && rv < 0)
+		    {
+		      clib_warning ("i %d j %d search not ok but should be",
+				    i, j);
+		    }
 		}
 	    }
 	}
+      if ((acycle % tm->report_every_n) == 0)
+	{
+	  struct rusage r_usage;
+	  getrusage (RUSAGE_SELF, &r_usage);
+	  fformat (stdout, "Kernel RSS: %ld bytes\n", r_usage.ru_maxrss);
+	  fformat (stdout, "%U\n", BV (format_bihash), h, 0 /* verbose */ );
+	}
+
+      /* Clean up side-bet hash table and random key vector */
+      for (i = 0; i < tm->nitems; i++)
+	hash_unset (tm->key_hash, tm->keys[i]);
+
+      vec_reset_length (tm->keys);
     }
 
-  fformat (stdout, "After deletions, should be empty...\n");
+  fformat (stdout, "End of run, should be empty...\n");
 
   fformat (stdout, "%U", BV (format_bihash), h, 0 /* very verbose */ );
   return 0;
@@ -276,6 +321,8 @@
   clib_error_t *error;
   int which = 0;
 
+  tm->report_every_n = 1;
+
   while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
     {
       if (unformat (i, "seed %u", &tm->seed))
@@ -287,12 +334,16 @@
 	tm->non_random_keys = 1;
       else if (unformat (i, "nitems %d", &tm->nitems))
 	;
+      else if (unformat (i, "ncycles %d", &tm->ncycles))
+	;
       else if (unformat (i, "careful %d", &tm->careful_delete_tests))
 	;
       else if (unformat (i, "verbose %d", &tm->verbose))
 	;
       else if (unformat (i, "search %d", &tm->search_iter))
 	;
+      else if (unformat (i, "report-every %d", &tm->report_every_n))
+	;
       else if (unformat (i, "vec64"))
 	which = 1;
       else if (unformat (i, "cache"))
@@ -341,6 +392,7 @@
 
   tm->nbuckets = 2;
   tm->nitems = 5;
+  tm->ncycles = 1;
   tm->verbose = 1;
   tm->search_iter = 1;
   tm->careful_delete_tests = 0;