VPP-847: improve bihash template memory allocator performance
Particularly in the DCLIB_VEC64=1 case, using vectors vs. raw
clib_mem_alloc'ed memory causes abysmal memory allocator performance.
Change-Id: I07a4dec0cd69ca357445385e2671cdf23c59b95d
Signed-off-by: Dave Barach <dave@barachs.net>
diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c
index 51fadeb..7e4216b 100644
--- a/src/vppinfra/bihash_template.c
+++ b/src/vppinfra/bihash_template.c
@@ -55,7 +55,8 @@
oldheap = clib_mem_set_heap (h->mheap);
vec_validate (h->freelists, log2_pages);
- vec_validate_aligned (rv, (1 << log2_pages) - 1, CLIB_CACHE_LINE_BYTES);
+ rv = clib_mem_alloc_aligned ((sizeof (*rv) * (1 << log2_pages)),
+ CLIB_CACHE_LINE_BYTES);
clib_mem_set_heap (oldheap);
goto initialize;
}
@@ -64,7 +65,6 @@
initialize:
ASSERT (rv);
- ASSERT (vec_len (rv) == (1 << log2_pages));
/*
* Latest gcc complains that the length arg is zero
* if we replace (1<<log2_pages) with vec_len(rv).
@@ -75,14 +75,11 @@
}
static void
-BV (value_free) (BVT (clib_bihash) * h, BVT (clib_bihash_value) * v)
+BV (value_free) (BVT (clib_bihash) * h, BVT (clib_bihash_value) * v,
+ u32 log2_pages)
{
- u32 log2_pages;
-
ASSERT (h->writer_lock[0]);
- log2_pages = min_log2 (vec_len (v));
-
ASSERT (vec_len (h->freelists) > log2_pages);
v->next_free = h->freelists[log2_pages];
@@ -97,11 +94,14 @@
void *oldheap;
BVT (clib_bihash_value) * working_copy;
u32 thread_index = os_get_thread_index ();
+ int log2_working_copy_length;
if (thread_index >= vec_len (h->working_copies))
{
oldheap = clib_mem_set_heap (h->mheap);
vec_validate (h->working_copies, thread_index);
+ vec_validate (h->working_copy_lengths, thread_index);
+ h->working_copy_lengths[thread_index] = -1;
clib_mem_set_heap (oldheap);
}
@@ -111,18 +111,23 @@
* lookup failures.
*/
working_copy = h->working_copies[thread_index];
+ log2_working_copy_length = h->working_copy_lengths[thread_index];
h->saved_bucket.as_u64 = b->as_u64;
oldheap = clib_mem_set_heap (h->mheap);
- if ((1 << b->log2_pages) > vec_len (working_copy))
+ if (b->log2_pages > log2_working_copy_length)
{
- vec_validate_aligned (working_copy, (1 << b->log2_pages) - 1,
- sizeof (u64));
+ if (working_copy)
+ clib_mem_free (working_copy);
+
+ working_copy = clib_mem_alloc_aligned
+ (sizeof (working_copy[0]) * (1 << b->log2_pages),
+ CLIB_CACHE_LINE_BYTES);
+ h->working_copy_lengths[thread_index] = b->log2_pages;
h->working_copies[thread_index] = working_copy;
}
- _vec_len (working_copy) = 1 << b->log2_pages;
clib_mem_set_heap (oldheap);
v = BV (clib_bihash_get_value) (h, b->offset);
@@ -139,15 +144,16 @@
BVT (clib_bihash_value) *
BV (split_and_rehash)
(BVT (clib_bihash) * h,
- BVT (clib_bihash_value) * old_values, u32 new_log2_pages)
+ BVT (clib_bihash_value) * old_values, u32 old_log2_pages,
+ u32 new_log2_pages)
{
BVT (clib_bihash_value) * new_values, *new_v;
- int i, j, length;
+ int i, j, length_in_kvs;
new_values = BV (value_alloc) (h, new_log2_pages);
- length = vec_len (old_values) * BIHASH_KVP_PER_PAGE;
+ length_in_kvs = (1 << old_log2_pages) * BIHASH_KVP_PER_PAGE;
- for (i = 0; i < length; i++)
+ for (i = 0; i < length_in_kvs; i++)
{
u64 new_hash;
@@ -173,10 +179,11 @@
}
}
/* Crap. Tell caller to try again */
- BV (value_free) (h, new_values);
+ BV (value_free) (h, new_values, new_log2_pages);
return 0;
doublebreak:;
}
+
return new_values;
}
@@ -184,17 +191,19 @@
BVT (clib_bihash_value) *
BV (split_and_rehash_linear)
(BVT (clib_bihash) * h,
- BVT (clib_bihash_value) * old_values, u32 new_log2_pages)
+ BVT (clib_bihash_value) * old_values, u32 old_log2_pages,
+ u32 new_log2_pages)
{
BVT (clib_bihash_value) * new_values;
- int i, j, new_length;
+ int i, j, new_length, old_length;
new_values = BV (value_alloc) (h, new_log2_pages);
new_length = (1 << new_log2_pages) * BIHASH_KVP_PER_PAGE;
+ old_length = (1 << old_log2_pages) * BIHASH_KVP_PER_PAGE;
j = 0;
/* Across the old value array */
- for (i = 0; i < vec_len (old_values) * BIHASH_KVP_PER_PAGE; i++)
+ for (i = 0; i < old_length; i++)
{
/* Find a free slot in the new linear scan bucket */
for (; j < new_length; j++)
@@ -215,7 +224,7 @@
}
/* This should never happen... */
clib_warning ("BUG: linear rehash failed!");
- BV (value_free) (h, new_values);
+ BV (value_free) (h, new_values, new_log2_pages);
return 0;
doublebreak:;
@@ -232,7 +241,7 @@
int rv = 0;
int i, limit;
u64 hash, new_hash;
- u32 new_log2_pages;
+ u32 new_log2_pages, old_log2_pages;
u32 thread_index = os_get_thread_index ();
int mark_bucket_linear;
int resplit_once;
@@ -257,6 +266,7 @@
}
v = BV (value_alloc) (h, 0);
+
*v->kvp = *add_v;
tmp_b.as_u64 = 0;
tmp_b.offset = BV (clib_bihash_get_offset) (h, v);
@@ -320,27 +330,31 @@
goto unlock;
}
- new_log2_pages = h->saved_bucket.log2_pages + 1;
+ old_log2_pages = h->saved_bucket.log2_pages;
+ new_log2_pages = old_log2_pages + 1;
mark_bucket_linear = 0;
working_copy = h->working_copies[thread_index];
resplit_once = 0;
- new_v = BV (split_and_rehash) (h, working_copy, new_log2_pages);
+ new_v = BV (split_and_rehash) (h, working_copy, old_log2_pages,
+ new_log2_pages);
if (new_v == 0)
{
try_resplit:
resplit_once = 1;
new_log2_pages++;
/* Try re-splitting. If that fails, fall back to linear search */
- new_v = BV (split_and_rehash) (h, working_copy, new_log2_pages);
+ new_v = BV (split_and_rehash) (h, working_copy, old_log2_pages,
+ new_log2_pages);
if (new_v == 0)
{
mark_linear:
new_log2_pages--;
/* pinned collisions, use linear search */
new_v =
- BV (split_and_rehash_linear) (h, working_copy, new_log2_pages);
+ BV (split_and_rehash_linear) (h, working_copy, old_log2_pages,
+ new_log2_pages);
mark_bucket_linear = 1;
}
}
@@ -363,8 +377,9 @@
goto expand_ok;
}
}
+
/* Crap. Try again */
- BV (value_free) (h, save_new_v);
+ BV (value_free) (h, save_new_v, new_log2_pages);
/*
* If we've already doubled the size of the bucket once,
* fall back to linear search now.
@@ -382,10 +397,11 @@
tmp_b.log2_pages = new_log2_pages;
tmp_b.offset = BV (clib_bihash_get_offset) (h, save_new_v);
tmp_b.linear_search = mark_bucket_linear;
+
CLIB_MEMORY_BARRIER ();
b->as_u64 = tmp_b.as_u64;
v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset);
- BV (value_free) (h, v);
+ BV (value_free) (h, v, old_log2_pages);
unlock:
CLIB_MEMORY_BARRIER ();
diff --git a/src/vppinfra/bihash_template.h b/src/vppinfra/bihash_template.h
index a0a7844..4ea14ff 100644
--- a/src/vppinfra/bihash_template.h
+++ b/src/vppinfra/bihash_template.h
@@ -77,6 +77,7 @@
volatile u32 *writer_lock;
BVT (clib_bihash_value) ** working_copies;
+ int *working_copy_lengths;
clib_bihash_bucket_t saved_bucket;
u32 nbuckets;
diff --git a/src/vppinfra/mheap.c b/src/vppinfra/mheap.c
index d4010ce..5bbbc65 100644
--- a/src/vppinfra/mheap.c
+++ b/src/vppinfra/mheap.c
@@ -549,23 +549,17 @@
non_empty_bin_mask &= ~pow2_mask (bin % BITS (uword));
/* Search each occupied free bin which is large enough. */
- foreach_set_bit (bi, non_empty_bin_mask, (
- {
- uword r =
- mheap_get_search_free_bin (v,
- bi
- +
- i
- *
- BITS
- (uword),
- n_user_bytes_arg,
- align,
- align_offset);
- if (r !=
- MHEAP_GROUNDED) return
- r;}
- ));
+ /* *INDENT-OFF* */
+ foreach_set_bit (bi, non_empty_bin_mask,
+ ({
+ uword r =
+ mheap_get_search_free_bin (v, bi + i * BITS (uword),
+ n_user_bytes_arg,
+ align,
+ align_offset);
+ if (r != MHEAP_GROUNDED) return r;
+ }));
+ /* *INDENT-ON* */
}
return MHEAP_GROUNDED;
diff --git a/src/vppinfra/mheap_bootstrap.h b/src/vppinfra/mheap_bootstrap.h
index 4b21051..38f0ac8 100644
--- a/src/vppinfra/mheap_bootstrap.h
+++ b/src/vppinfra/mheap_bootstrap.h
@@ -160,11 +160,23 @@
uword *trace_index_by_offset;
} mheap_trace_main_t;
+/* Without vector instructions don't bother with small object cache. */
+#ifdef CLIB_HAVE_VEC128
+#define MHEAP_HAVE_SMALL_OBJECT_CACHE 1
+#else
+#define MHEAP_HAVE_SMALL_OBJECT_CACHE 0
+#endif
+
/* Small object bin i is for objects with
user_size > sizeof (mheap_elt_t) + sizeof (mheap_elt_t) * (i - 1)
user_size <= sizeof (mheap_elt_t) + sizeof (mheap_size_t) * i. */
+#if MHEAP_HAVE_SMALL_OBJECT_CACHE > 0
#define MHEAP_LOG2_N_SMALL_OBJECT_BINS 8
#define MHEAP_N_SMALL_OBJECT_BINS (1 << MHEAP_LOG2_N_SMALL_OBJECT_BINS)
+#else
+#define MHEAP_LOG2_N_SMALL_OBJECT_BINS 0
+#define MHEAP_N_SMALL_OBJECT_BINS 0
+#endif
#define MHEAP_N_BINS \
(MHEAP_N_SMALL_OBJECT_BINS \
@@ -188,18 +200,6 @@
u64 n_clocks_get, n_clocks_put;
} mheap_stats_t;
-/* Without vector instructions don't bother with small object cache. */
-#ifdef CLIB_HAVE_VEC128
-#define MHEAP_HAVE_SMALL_OBJECT_CACHE 1
-#else
-#define MHEAP_HAVE_SMALL_OBJECT_CACHE 0
-#endif
-
-#if CLIB_VEC64 > 0
-#undef MHEAP_HAVE_SMALL_OBJECT_CACHE
-#define MHEAP_HAVE_SMALL_OBJECT_CACHE 0
-#endif
-
/* For objects with align == 4 and align_offset == 0 (e.g. vector strings). */
typedef struct
{
diff --git a/src/vppinfra/test_bihash_template.c b/src/vppinfra/test_bihash_template.c
index ef03f56..1e26243 100644
--- a/src/vppinfra/test_bihash_template.c
+++ b/src/vppinfra/test_bihash_template.c
@@ -48,6 +48,43 @@
}
static clib_error_t *
+test_bihash_vec64 (test_main_t * tm)
+{
+ u32 user_buckets = 1228800;
+ u32 user_memory_size = 209715200;
+ BVT (clib_bihash_kv) kv;
+ int i, j;
+ f64 before;
+ f64 *cum_times = 0;
+ BVT (clib_bihash) * h;
+
+ h = &tm->hash;
+
+ BV (clib_bihash_init) (h, "test", user_buckets, user_memory_size);
+
+ before = clib_time_now (&tm->clib_time);
+
+ for (j = 0; j < 10; j++)
+ {
+ for (i = 1; i <= j * 1000 + 1; i++)
+ {
+ kv.key = i;
+ kv.value = 1;
+
+ BV (clib_bihash_add_del) (h, &kv, 1 /* is_add */ );
+ }
+
+ vec_add1 (cum_times, clib_time_now (&tm->clib_time) - before);
+ }
+
+ for (j = 0; j < vec_len (cum_times); j++)
+ fformat (stdout, "Cum time for %d: %.4f (us)\n", (j + 1) * 1000,
+ cum_times[j] * 1e6);
+
+ return 0;
+}
+
+static clib_error_t *
test_bihash (test_main_t * tm)
{
int i, j;
@@ -204,6 +241,7 @@
{
unformat_input_t *i = tm->input;
clib_error_t *error;
+ int test_vec64 = 0;
while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
{
@@ -222,6 +260,8 @@
;
else if (unformat (i, "search %d", &tm->search_iter))
;
+ else if (unformat (i, "vec64"))
+ test_vec64 = 1;
else if (unformat (i, "verbose"))
tm->verbose = 1;
else
@@ -229,7 +269,10 @@
format_unformat_error, i);
}
- error = test_bihash (tm);
+ if (test_vec64)
+ error = test_bihash_vec64 (tm);
+ else
+ error = test_bihash (tm);
return error;
}