blob: 18d74d9ea2f41328ea2f2191566d1b8f82fdd14b [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
Chris Luke16bcf7d2016-09-01 14:31:46 -040016/** @cond DOCUMENTATION_IS_IN_BIHASH_DOC_H */
Dave Barachdd3a57f2016-07-27 16:58:51 -040017
Dave Barach97f5af02018-02-22 09:48:45 -050018static inline void *BV (alloc_aligned) (BVT (clib_bihash) * h, uword nbytes)
19{
20 uword rv;
21
22 /* Round to an even number of cache lines */
23 nbytes += CLIB_CACHE_LINE_BYTES - 1;
24 nbytes &= ~(CLIB_CACHE_LINE_BYTES - 1);
25
26 rv = h->alloc_arena_next;
27 h->alloc_arena_next += nbytes;
28
29 if (rv >= (h->alloc_arena + h->alloc_arena_size))
30 os_out_of_memory ();
31
32 return (void *) rv;
33}
34
35
Dave Barachc3799992016-08-15 11:12:27 -040036void BV (clib_bihash_init)
37 (BVT (clib_bihash) * h, char *name, u32 nbuckets, uword memory_size)
Ed Warnickecb9cada2015-12-08 15:45:58 -070038{
Dave Barach97f5af02018-02-22 09:48:45 -050039 uword bucket_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -070040
41 nbuckets = 1 << (max_log2 (nbuckets));
42
Dave Barachc3799992016-08-15 11:12:27 -040043 h->name = (u8 *) name;
Ed Warnickecb9cada2015-12-08 15:45:58 -070044 h->nbuckets = nbuckets;
45 h->log2_nbuckets = max_log2 (nbuckets);
Dave Barach908a5ea2017-07-14 12:42:21 -040046 h->cache_hits = 0;
47 h->cache_misses = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -070048
Dave Barach508498f2018-07-19 12:11:16 -040049 /*
50 * Make sure the requested size is rational. The max table
51 * size without playing the alignment card is 64 Gbytes.
52 * If someone starts complaining that's not enough, we can shift
53 * the offset by CLIB_LOG2_CACHE_LINE_BYTES...
54 */
55 ASSERT (memory_size < (1ULL << BIHASH_BUCKET_OFFSET_BITS));
56
Dave Barach97f5af02018-02-22 09:48:45 -050057 h->alloc_arena = (uword) clib_mem_vm_alloc (memory_size);
58 h->alloc_arena_next = h->alloc_arena;
59 h->alloc_arena_size = memory_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -070060
Dave Barach97f5af02018-02-22 09:48:45 -050061 bucket_size = nbuckets * sizeof (h->buckets[0]);
62 h->buckets = BV (alloc_aligned) (h, bucket_size);
63
Dave Barach508498f2018-07-19 12:11:16 -040064 h->alloc_lock = BV (alloc_aligned) (h, CLIB_CACHE_LINE_BYTES);
65 h->alloc_lock[0] = 0;
Dave Barach908a5ea2017-07-14 12:42:21 -040066
Vijayabhaskar Katamreddyfb8e61c2017-12-14 13:20:50 -080067 h->fmt_fn = NULL;
68}
69
70void BV (clib_bihash_set_kvp_format_fn) (BVT (clib_bihash) * h,
71 format_function_t * fmt_fn)
72{
73 h->fmt_fn = fmt_fn;
Ed Warnickecb9cada2015-12-08 15:45:58 -070074}
75
Dave Barachc3799992016-08-15 11:12:27 -040076void BV (clib_bihash_free) (BVT (clib_bihash) * h)
Ed Warnickecb9cada2015-12-08 15:45:58 -070077{
Dave Barach97f5af02018-02-22 09:48:45 -050078 vec_free (h->working_copies);
79 vec_free (h->freelists);
80 clib_mem_vm_free ((void *) (h->alloc_arena), h->alloc_arena_size);
Dave Barachc3799992016-08-15 11:12:27 -040081 memset (h, 0, sizeof (*h));
Ed Warnickecb9cada2015-12-08 15:45:58 -070082}
83
Dave Barachc3799992016-08-15 11:12:27 -040084static
85BVT (clib_bihash_value) *
86BV (value_alloc) (BVT (clib_bihash) * h, u32 log2_pages)
Ed Warnickecb9cada2015-12-08 15:45:58 -070087{
Dave Barachc3799992016-08-15 11:12:27 -040088 BVT (clib_bihash_value) * rv = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -070089
Dave Barach508498f2018-07-19 12:11:16 -040090 ASSERT (h->alloc_lock[0]);
Dave Barachc3799992016-08-15 11:12:27 -040091 if (log2_pages >= vec_len (h->freelists) || h->freelists[log2_pages] == 0)
Ed Warnickecb9cada2015-12-08 15:45:58 -070092 {
Dave Barach97f5af02018-02-22 09:48:45 -050093 vec_validate_init_empty (h->freelists, log2_pages, 0);
94 rv = BV (alloc_aligned) (h, (sizeof (*rv) * (1 << log2_pages)));
Dave Barachc3799992016-08-15 11:12:27 -040095 goto initialize;
Ed Warnickecb9cada2015-12-08 15:45:58 -070096 }
Dave Barachc3799992016-08-15 11:12:27 -040097 rv = h->freelists[log2_pages];
98 h->freelists[log2_pages] = rv->next_free;
Ed Warnickecb9cada2015-12-08 15:45:58 -070099
Dave Barachc3799992016-08-15 11:12:27 -0400100initialize:
101 ASSERT (rv);
Dave Barachc3799992016-08-15 11:12:27 -0400102 /*
103 * Latest gcc complains that the length arg is zero
104 * if we replace (1<<log2_pages) with vec_len(rv).
105 * No clue.
106 */
107 memset (rv, 0xff, sizeof (*rv) * (1 << log2_pages));
108 return rv;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700109}
110
111static void
Dave Barachba7ddfe2017-05-17 20:20:50 -0400112BV (value_free) (BVT (clib_bihash) * h, BVT (clib_bihash_value) * v,
113 u32 log2_pages)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700114{
Dave Barach508498f2018-07-19 12:11:16 -0400115 ASSERT (h->alloc_lock[0]);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700116
Dave Barachc3799992016-08-15 11:12:27 -0400117 ASSERT (vec_len (h->freelists) > log2_pages);
118
Dave Barach508498f2018-07-19 12:11:16 -0400119 if (CLIB_DEBUG > 0)
120 memset (v, 0xFE, sizeof (*v) * (1 << log2_pages));
121
Dave Barachc3799992016-08-15 11:12:27 -0400122 v->next_free = h->freelists[log2_pages];
123 h->freelists[log2_pages] = v;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700124}
125
126static inline void
Dave Barach908a5ea2017-07-14 12:42:21 -0400127BV (make_working_copy) (BVT (clib_bihash) * h, BVT (clib_bihash_bucket) * b)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700128{
Dave Barachc3799992016-08-15 11:12:27 -0400129 BVT (clib_bihash_value) * v;
Dave Barach908a5ea2017-07-14 12:42:21 -0400130 BVT (clib_bihash_bucket) working_bucket __attribute__ ((aligned (8)));
Dave Barachc3799992016-08-15 11:12:27 -0400131 BVT (clib_bihash_value) * working_copy;
Damjan Marionf55f9b82017-05-10 21:06:28 +0200132 u32 thread_index = os_get_thread_index ();
Dave Barachba7ddfe2017-05-17 20:20:50 -0400133 int log2_working_copy_length;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700134
Dave Barach508498f2018-07-19 12:11:16 -0400135 ASSERT (h->alloc_lock[0]);
136
Damjan Marionf55f9b82017-05-10 21:06:28 +0200137 if (thread_index >= vec_len (h->working_copies))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700138 {
Damjan Marionf55f9b82017-05-10 21:06:28 +0200139 vec_validate (h->working_copies, thread_index);
Steve Shin871cdec2017-06-02 10:09:02 -0700140 vec_validate_init_empty (h->working_copy_lengths, thread_index, ~0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700141 }
142
Dave Barachc3799992016-08-15 11:12:27 -0400143 /*
Ed Warnickecb9cada2015-12-08 15:45:58 -0700144 * working_copies are per-cpu so that near-simultaneous
145 * updates from multiple threads will not result in sporadic, spurious
Dave Barachc3799992016-08-15 11:12:27 -0400146 * lookup failures.
Ed Warnickecb9cada2015-12-08 15:45:58 -0700147 */
Damjan Marionf55f9b82017-05-10 21:06:28 +0200148 working_copy = h->working_copies[thread_index];
Dave Barachba7ddfe2017-05-17 20:20:50 -0400149 log2_working_copy_length = h->working_copy_lengths[thread_index];
Ed Warnickecb9cada2015-12-08 15:45:58 -0700150
151 h->saved_bucket.as_u64 = b->as_u64;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700152
Dave Barachba7ddfe2017-05-17 20:20:50 -0400153 if (b->log2_pages > log2_working_copy_length)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700154 {
Dave Barach97f5af02018-02-22 09:48:45 -0500155 /*
156 * It's not worth the bookkeeping to free working copies
157 * if (working_copy)
158 * clib_mem_free (working_copy);
159 */
160 working_copy = BV (alloc_aligned)
161 (h, sizeof (working_copy[0]) * (1 << b->log2_pages));
Dave Barachba7ddfe2017-05-17 20:20:50 -0400162 h->working_copy_lengths[thread_index] = b->log2_pages;
Damjan Marionf55f9b82017-05-10 21:06:28 +0200163 h->working_copies[thread_index] = working_copy;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700164 }
165
Dave Barachc3799992016-08-15 11:12:27 -0400166 v = BV (clib_bihash_get_value) (h, b->offset);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700167
Dave Barachc3799992016-08-15 11:12:27 -0400168 clib_memcpy (working_copy, v, sizeof (*v) * (1 << b->log2_pages));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700169 working_bucket.as_u64 = b->as_u64;
Dave Barachc3799992016-08-15 11:12:27 -0400170 working_bucket.offset = BV (clib_bihash_get_offset) (h, working_copy);
171 CLIB_MEMORY_BARRIER ();
Ed Warnickecb9cada2015-12-08 15:45:58 -0700172 b->as_u64 = working_bucket.as_u64;
Damjan Marionf55f9b82017-05-10 21:06:28 +0200173 h->working_copies[thread_index] = working_copy;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700174}
175
Dave Barachc3799992016-08-15 11:12:27 -0400176static
177BVT (clib_bihash_value) *
178BV (split_and_rehash)
179 (BVT (clib_bihash) * h,
Dave Barachba7ddfe2017-05-17 20:20:50 -0400180 BVT (clib_bihash_value) * old_values, u32 old_log2_pages,
181 u32 new_log2_pages)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700182{
Dave Barach5e6b9582016-12-12 15:37:29 -0500183 BVT (clib_bihash_value) * new_values, *new_v;
Dave Barachba7ddfe2017-05-17 20:20:50 -0400184 int i, j, length_in_kvs;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700185
Dave Barach508498f2018-07-19 12:11:16 -0400186 ASSERT (h->alloc_lock[0]);
187
Dave Barachc3799992016-08-15 11:12:27 -0400188 new_values = BV (value_alloc) (h, new_log2_pages);
Dave Barachba7ddfe2017-05-17 20:20:50 -0400189 length_in_kvs = (1 << old_log2_pages) * BIHASH_KVP_PER_PAGE;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700190
Dave Barachba7ddfe2017-05-17 20:20:50 -0400191 for (i = 0; i < length_in_kvs; i++)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700192 {
193 u64 new_hash;
Dave Barachc3799992016-08-15 11:12:27 -0400194
Dave Barach5e6b9582016-12-12 15:37:29 -0500195 /* Entry not in use? Forget it */
196 if (BV (clib_bihash_is_free) (&(old_values->kvp[i])))
197 continue;
198
199 /* rehash the item onto its new home-page */
200 new_hash = BV (clib_bihash_hash) (&(old_values->kvp[i]));
201 new_hash >>= h->log2_nbuckets;
202 new_hash &= (1 << new_log2_pages) - 1;
203 new_v = &new_values[new_hash];
204
205 /* Across the new home-page */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700206 for (j = 0; j < BIHASH_KVP_PER_PAGE; j++)
Dave Barachc3799992016-08-15 11:12:27 -0400207 {
Dave Barach5e6b9582016-12-12 15:37:29 -0500208 /* Empty slot */
209 if (BV (clib_bihash_is_free) (&(new_v->kvp[j])))
Dave Barachc3799992016-08-15 11:12:27 -0400210 {
Dave Barach5e6b9582016-12-12 15:37:29 -0500211 clib_memcpy (&(new_v->kvp[j]), &(old_values->kvp[i]),
212 sizeof (new_v->kvp[j]));
213 goto doublebreak;
Dave Barachc3799992016-08-15 11:12:27 -0400214 }
Dave Barachc3799992016-08-15 11:12:27 -0400215 }
Dave Barach5e6b9582016-12-12 15:37:29 -0500216 /* Crap. Tell caller to try again */
Dave Barachba7ddfe2017-05-17 20:20:50 -0400217 BV (value_free) (h, new_values, new_log2_pages);
Dave Barach5e6b9582016-12-12 15:37:29 -0500218 return 0;
219 doublebreak:;
220 }
Dave Barachba7ddfe2017-05-17 20:20:50 -0400221
Dave Barach5e6b9582016-12-12 15:37:29 -0500222 return new_values;
223}
224
225static
226BVT (clib_bihash_value) *
227BV (split_and_rehash_linear)
228 (BVT (clib_bihash) * h,
Dave Barachba7ddfe2017-05-17 20:20:50 -0400229 BVT (clib_bihash_value) * old_values, u32 old_log2_pages,
230 u32 new_log2_pages)
Dave Barach5e6b9582016-12-12 15:37:29 -0500231{
232 BVT (clib_bihash_value) * new_values;
Dave Barachba7ddfe2017-05-17 20:20:50 -0400233 int i, j, new_length, old_length;
Dave Barach5e6b9582016-12-12 15:37:29 -0500234
Dave Barach508498f2018-07-19 12:11:16 -0400235 ASSERT (h->alloc_lock[0]);
236
Dave Barach5e6b9582016-12-12 15:37:29 -0500237 new_values = BV (value_alloc) (h, new_log2_pages);
238 new_length = (1 << new_log2_pages) * BIHASH_KVP_PER_PAGE;
Dave Barachba7ddfe2017-05-17 20:20:50 -0400239 old_length = (1 << old_log2_pages) * BIHASH_KVP_PER_PAGE;
Dave Barach5e6b9582016-12-12 15:37:29 -0500240
241 j = 0;
242 /* Across the old value array */
Dave Barachba7ddfe2017-05-17 20:20:50 -0400243 for (i = 0; i < old_length; i++)
Dave Barach5e6b9582016-12-12 15:37:29 -0500244 {
245 /* Find a free slot in the new linear scan bucket */
246 for (; j < new_length; j++)
247 {
Dave Barach8f544962017-01-18 10:23:22 -0500248 /* Old value not in use? Forget it. */
Dave Barach5e6b9582016-12-12 15:37:29 -0500249 if (BV (clib_bihash_is_free) (&(old_values->kvp[i])))
250 goto doublebreak;
251
252 /* New value should never be in use */
253 if (BV (clib_bihash_is_free) (&(new_values->kvp[j])))
254 {
255 /* Copy the old value and move along */
256 clib_memcpy (&(new_values->kvp[j]), &(old_values->kvp[i]),
257 sizeof (new_values->kvp[j]));
258 j++;
259 goto doublebreak;
260 }
Dave Barach5e6b9582016-12-12 15:37:29 -0500261 }
Dave Barach8f544962017-01-18 10:23:22 -0500262 /* This should never happen... */
263 clib_warning ("BUG: linear rehash failed!");
Dave Barachba7ddfe2017-05-17 20:20:50 -0400264 BV (value_free) (h, new_values, new_log2_pages);
Dave Barach8f544962017-01-18 10:23:22 -0500265 return 0;
266
Dave Barach5e6b9582016-12-12 15:37:29 -0500267 doublebreak:;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700268 }
269 return new_values;
270}
271
Matus Fabian828d27e2018-08-21 03:15:50 -0700272static inline int BV (clib_bihash_add_del_inline)
273 (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, int is_add,
274 int (*is_stale_cb) (BVT (clib_bihash_kv) *, void *), void *arg)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700275{
276 u32 bucket_index;
Dave Barach908a5ea2017-07-14 12:42:21 -0400277 BVT (clib_bihash_bucket) * b, tmp_b;
Dave Barachc3799992016-08-15 11:12:27 -0400278 BVT (clib_bihash_value) * v, *new_v, *save_new_v, *working_copy;
Dave Barach5e6b9582016-12-12 15:37:29 -0500279 int i, limit;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700280 u64 hash, new_hash;
Dave Barachba7ddfe2017-05-17 20:20:50 -0400281 u32 new_log2_pages, old_log2_pages;
Damjan Marionf55f9b82017-05-10 21:06:28 +0200282 u32 thread_index = os_get_thread_index ();
Dave Barach5e6b9582016-12-12 15:37:29 -0500283 int mark_bucket_linear;
284 int resplit_once;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700285
Dave Barachc3799992016-08-15 11:12:27 -0400286 hash = BV (clib_bihash_hash) (add_v);
287
288 bucket_index = hash & (h->nbuckets - 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700289 b = &h->buckets[bucket_index];
290
291 hash >>= h->log2_nbuckets;
292
Dave Barach508498f2018-07-19 12:11:16 -0400293 BV (clib_bihash_lock_bucket) (b);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700294
295 /* First elt in the bucket? */
Damjan Marion882fcfe2018-07-17 23:01:49 +0200296 if (BV (clib_bihash_bucket_is_empty) (b))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700297 {
298 if (is_add == 0)
Dave Barachc3799992016-08-15 11:12:27 -0400299 {
Dave Barach508498f2018-07-19 12:11:16 -0400300 BV (clib_bihash_unlock_bucket) (b);
301 return (-1);
Dave Barachc3799992016-08-15 11:12:27 -0400302 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700303
Dave Barach508498f2018-07-19 12:11:16 -0400304 BV (clib_bihash_alloc_lock) (h);
Dave Barachc3799992016-08-15 11:12:27 -0400305 v = BV (value_alloc) (h, 0);
Dave Barach508498f2018-07-19 12:11:16 -0400306 BV (clib_bihash_alloc_unlock) (h);
Dave Barachba7ddfe2017-05-17 20:20:50 -0400307
Dave Barachc3799992016-08-15 11:12:27 -0400308 *v->kvp = *add_v;
Dave Barach508498f2018-07-19 12:11:16 -0400309 tmp_b.as_u64 = 0; /* clears bucket lock */
Dave Barachc3799992016-08-15 11:12:27 -0400310 tmp_b.offset = BV (clib_bihash_get_offset) (h, v);
Dave Barache7d212f2018-02-07 13:14:06 -0500311 tmp_b.refcnt = 1;
Dave Barach508498f2018-07-19 12:11:16 -0400312 CLIB_MEMORY_BARRIER ();
Ed Warnickecb9cada2015-12-08 15:45:58 -0700313
314 b->as_u64 = tmp_b.as_u64;
Dave Barach508498f2018-07-19 12:11:16 -0400315 BV (clib_bihash_unlock_bucket) (b);
316 return (0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700317 }
318
Dave Barach508498f2018-07-19 12:11:16 -0400319 /* WARNING: we're still looking at the live copy... */
Dave Barach5e6b9582016-12-12 15:37:29 -0500320 limit = BIHASH_KVP_PER_PAGE;
Dave Barach508498f2018-07-19 12:11:16 -0400321 v = BV (clib_bihash_get_value) (h, b->offset);
322
Dave Barach5e6b9582016-12-12 15:37:29 -0500323 v += (b->linear_search == 0) ? hash & ((1 << b->log2_pages) - 1) : 0;
324 if (b->linear_search)
325 limit <<= b->log2_pages;
Dave Barachc3799992016-08-15 11:12:27 -0400326
Ed Warnickecb9cada2015-12-08 15:45:58 -0700327 if (is_add)
328 {
Dave Barachc3799992016-08-15 11:12:27 -0400329 /*
Dave Barach508498f2018-07-19 12:11:16 -0400330 * Because reader threads are looking at live data,
331 * we have to be extra careful. Readers do NOT hold the
332 * bucket lock. We need to be SLOWER than a search, past the
333 * point where readers CHECK the bucket lock.
334 */
335
336 /*
Ed Warnickecb9cada2015-12-08 15:45:58 -0700337 * For obvious (in hindsight) reasons, see if we're supposed to
338 * replace an existing key, then look for an empty slot.
339 */
Dave Barach5e6b9582016-12-12 15:37:29 -0500340 for (i = 0; i < limit; i++)
Dave Barachc3799992016-08-15 11:12:27 -0400341 {
342 if (!memcmp (&(v->kvp[i]), &add_v->key, sizeof (add_v->key)))
343 {
Dave Barach508498f2018-07-19 12:11:16 -0400344 CLIB_MEMORY_BARRIER (); /* Add a delay */
Dave Barachc3799992016-08-15 11:12:27 -0400345 clib_memcpy (&(v->kvp[i]), add_v, sizeof (*add_v));
Dave Barach508498f2018-07-19 12:11:16 -0400346 BV (clib_bihash_unlock_bucket) (b);
347 return (0);
Dave Barachc3799992016-08-15 11:12:27 -0400348 }
349 }
Dave Barach508498f2018-07-19 12:11:16 -0400350 /*
351 * Look for an empty slot. If found, use it
352 */
Dave Barach5e6b9582016-12-12 15:37:29 -0500353 for (i = 0; i < limit; i++)
Dave Barachc3799992016-08-15 11:12:27 -0400354 {
355 if (BV (clib_bihash_is_free) (&(v->kvp[i])))
356 {
Dave Barach508498f2018-07-19 12:11:16 -0400357 /*
358 * Copy the value first, so that if a reader manages
359 * to match the new key, the value will be right...
360 */
361 clib_memcpy (&(v->kvp[i].value),
362 &add_v->value, sizeof (add_v->value));
363 CLIB_MEMORY_BARRIER (); /* Make sure the value has settled */
364 clib_memcpy (&(v->kvp[i]), &add_v->key, sizeof (add_v->key));
Dave Barache7d212f2018-02-07 13:14:06 -0500365 b->refcnt++;
Dave Barach508498f2018-07-19 12:11:16 -0400366 BV (clib_bihash_unlock_bucket) (b);
367 return (0);
Dave Barachc3799992016-08-15 11:12:27 -0400368 }
369 }
Matus Fabian828d27e2018-08-21 03:15:50 -0700370 /* look for stale data to overwrite */
371 if (is_stale_cb)
372 {
373 for (i = 0; i < limit; i++)
374 {
375 if (is_stale_cb (&(v->kvp[i]), arg))
376 {
377 CLIB_MEMORY_BARRIER ();
378 clib_memcpy (&(v->kvp[i]), add_v, sizeof (*add_v));
379 BV (clib_bihash_unlock_bucket) (b);
380 return (0);
381 }
382 }
383 }
Dave Barach508498f2018-07-19 12:11:16 -0400384 /* Out of space in this bucket, split the bucket... */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700385 }
Dave Barach508498f2018-07-19 12:11:16 -0400386 else /* delete case */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700387 {
Dave Barach5e6b9582016-12-12 15:37:29 -0500388 for (i = 0; i < limit; i++)
Dave Barachc3799992016-08-15 11:12:27 -0400389 {
Dave Barach508498f2018-07-19 12:11:16 -0400390 /* Found the key? Kill it... */
Dave Barachc3799992016-08-15 11:12:27 -0400391 if (!memcmp (&(v->kvp[i]), &add_v->key, sizeof (add_v->key)))
392 {
393 memset (&(v->kvp[i]), 0xff, sizeof (*(add_v)));
Dave Barach508498f2018-07-19 12:11:16 -0400394 /* Is the bucket empty? */
395 if (PREDICT_TRUE (b->refcnt > 1))
Dave Barache7d212f2018-02-07 13:14:06 -0500396 {
Dave Barach508498f2018-07-19 12:11:16 -0400397 b->refcnt--;
398 BV (clib_bihash_unlock_bucket) (b);
399 return (0);
Dave Barache7d212f2018-02-07 13:14:06 -0500400 }
Dave Barach508498f2018-07-19 12:11:16 -0400401 else /* yes, free it */
Dave Barache7d212f2018-02-07 13:14:06 -0500402 {
Dave Barach508498f2018-07-19 12:11:16 -0400403 /* Save old bucket value, need log2_pages to free it */
404 tmp_b.as_u64 = b->as_u64;
405 CLIB_MEMORY_BARRIER ();
406
407 /* Kill and unlock the bucket */
408 b->as_u64 = 0;
409
410 /* And free the backing storage */
411 BV (clib_bihash_alloc_lock) (h);
412 /* Note: v currently points into the middle of the bucket */
413 v = BV (clib_bihash_get_value) (h, tmp_b.offset);
414 BV (value_free) (h, v, tmp_b.log2_pages);
415 BV (clib_bihash_alloc_unlock) (h);
416 return (0);
Dave Barache7d212f2018-02-07 13:14:06 -0500417 }
Dave Barachc3799992016-08-15 11:12:27 -0400418 }
419 }
Dave Barach508498f2018-07-19 12:11:16 -0400420 /* Not found... */
421 BV (clib_bihash_unlock_bucket) (b);
422 return (-3);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700423 }
424
Dave Barach508498f2018-07-19 12:11:16 -0400425 /* Move readers to a (locked) temp copy of the bucket */
426 BV (clib_bihash_alloc_lock) (h);
427 BV (make_working_copy) (h, b);
428
429 v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset);
430
Dave Barachba7ddfe2017-05-17 20:20:50 -0400431 old_log2_pages = h->saved_bucket.log2_pages;
432 new_log2_pages = old_log2_pages + 1;
Dave Barach5e6b9582016-12-12 15:37:29 -0500433 mark_bucket_linear = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700434
Damjan Marionf55f9b82017-05-10 21:06:28 +0200435 working_copy = h->working_copies[thread_index];
Dave Barach5e6b9582016-12-12 15:37:29 -0500436 resplit_once = 0;
437
Dave Barachba7ddfe2017-05-17 20:20:50 -0400438 new_v = BV (split_and_rehash) (h, working_copy, old_log2_pages,
439 new_log2_pages);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700440 if (new_v == 0)
441 {
Dave Barach5e6b9582016-12-12 15:37:29 -0500442 try_resplit:
443 resplit_once = 1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700444 new_log2_pages++;
Dave Barach5e6b9582016-12-12 15:37:29 -0500445 /* Try re-splitting. If that fails, fall back to linear search */
Dave Barachba7ddfe2017-05-17 20:20:50 -0400446 new_v = BV (split_and_rehash) (h, working_copy, old_log2_pages,
447 new_log2_pages);
Dave Barach5e6b9582016-12-12 15:37:29 -0500448 if (new_v == 0)
449 {
450 mark_linear:
451 new_log2_pages--;
452 /* pinned collisions, use linear search */
453 new_v =
Dave Barachba7ddfe2017-05-17 20:20:50 -0400454 BV (split_and_rehash_linear) (h, working_copy, old_log2_pages,
455 new_log2_pages);
Dave Barach5e6b9582016-12-12 15:37:29 -0500456 mark_bucket_linear = 1;
457 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700458 }
459
460 /* Try to add the new entry */
461 save_new_v = new_v;
Dave Barachc3799992016-08-15 11:12:27 -0400462 new_hash = BV (clib_bihash_hash) (add_v);
Dave Barach5e6b9582016-12-12 15:37:29 -0500463 limit = BIHASH_KVP_PER_PAGE;
464 if (mark_bucket_linear)
465 limit <<= new_log2_pages;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700466 new_hash >>= h->log2_nbuckets;
Dave Barach5e6b9582016-12-12 15:37:29 -0500467 new_hash &= (1 << new_log2_pages) - 1;
468 new_v += mark_bucket_linear ? 0 : new_hash;
Dave Barachc3799992016-08-15 11:12:27 -0400469
Dave Barach5e6b9582016-12-12 15:37:29 -0500470 for (i = 0; i < limit; i++)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700471 {
Dave Barachc3799992016-08-15 11:12:27 -0400472 if (BV (clib_bihash_is_free) (&(new_v->kvp[i])))
473 {
474 clib_memcpy (&(new_v->kvp[i]), add_v, sizeof (*add_v));
475 goto expand_ok;
476 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700477 }
Dave Barachba7ddfe2017-05-17 20:20:50 -0400478
Ed Warnickecb9cada2015-12-08 15:45:58 -0700479 /* Crap. Try again */
Dave Barachba7ddfe2017-05-17 20:20:50 -0400480 BV (value_free) (h, save_new_v, new_log2_pages);
Dave Barach5e6b9582016-12-12 15:37:29 -0500481 /*
482 * If we've already doubled the size of the bucket once,
483 * fall back to linear search now.
484 */
485 if (resplit_once)
486 goto mark_linear;
487 else
488 goto try_resplit;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700489
Dave Barachc3799992016-08-15 11:12:27 -0400490expand_ok:
Dave Barach5e6b9582016-12-12 15:37:29 -0500491 tmp_b.log2_pages = new_log2_pages;
Dave Barachc3799992016-08-15 11:12:27 -0400492 tmp_b.offset = BV (clib_bihash_get_offset) (h, save_new_v);
Dave Barach5e6b9582016-12-12 15:37:29 -0500493 tmp_b.linear_search = mark_bucket_linear;
Dave Barache7d212f2018-02-07 13:14:06 -0500494 tmp_b.refcnt = h->saved_bucket.refcnt + 1;
Dave Barach508498f2018-07-19 12:11:16 -0400495 tmp_b.lock = 0;
Dave Barachc3799992016-08-15 11:12:27 -0400496 CLIB_MEMORY_BARRIER ();
Ed Warnickecb9cada2015-12-08 15:45:58 -0700497 b->as_u64 = tmp_b.as_u64;
Dave Barach508498f2018-07-19 12:11:16 -0400498 BV (clib_bihash_alloc_unlock) (h);
499 return (0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700500}
501
Matus Fabian828d27e2018-08-21 03:15:50 -0700502int BV (clib_bihash_add_del)
503 (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, int is_add)
504{
505 return BV (clib_bihash_add_del_inline) (h, add_v, is_add, 0, 0);
506}
507
508int BV (clib_bihash_add_or_overwrite_stale)
509 (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v,
510 int (*stale_callback) (BVT (clib_bihash_kv) *, void *), void *arg)
511{
512 return BV (clib_bihash_add_del_inline) (h, add_v, 1, stale_callback, arg);
513}
514
Dave Barachc3799992016-08-15 11:12:27 -0400515int BV (clib_bihash_search)
Dave Barach908a5ea2017-07-14 12:42:21 -0400516 (BVT (clib_bihash) * h,
Dave Barachc3799992016-08-15 11:12:27 -0400517 BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700518{
519 u64 hash;
520 u32 bucket_index;
Dave Barachc3799992016-08-15 11:12:27 -0400521 BVT (clib_bihash_value) * v;
Dave Barach908a5ea2017-07-14 12:42:21 -0400522 BVT (clib_bihash_bucket) * b;
Dave Barach5e6b9582016-12-12 15:37:29 -0500523 int i, limit;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700524
Dave Barachc3799992016-08-15 11:12:27 -0400525 ASSERT (valuep);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700526
Dave Barachc3799992016-08-15 11:12:27 -0400527 hash = BV (clib_bihash_hash) (search_key);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700528
Dave Barachc3799992016-08-15 11:12:27 -0400529 bucket_index = hash & (h->nbuckets - 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700530 b = &h->buckets[bucket_index];
531
Damjan Marion882fcfe2018-07-17 23:01:49 +0200532 if (BV (clib_bihash_bucket_is_empty) (b))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700533 return -1;
534
Dave Barach508498f2018-07-19 12:11:16 -0400535 if (PREDICT_FALSE (b->lock))
Dave Barach908a5ea2017-07-14 12:42:21 -0400536 {
Dave Barach508498f2018-07-19 12:11:16 -0400537 volatile BVT (clib_bihash_bucket) * bv = b;
538 while (bv->lock)
Damjan Marion2a03efe2018-07-20 21:48:59 +0200539 CLIB_PAUSE ();
Dave Barach908a5ea2017-07-14 12:42:21 -0400540 }
541
Ed Warnickecb9cada2015-12-08 15:45:58 -0700542 hash >>= h->log2_nbuckets;
543
Dave Barachc3799992016-08-15 11:12:27 -0400544 v = BV (clib_bihash_get_value) (h, b->offset);
Dave Barach5e6b9582016-12-12 15:37:29 -0500545 limit = BIHASH_KVP_PER_PAGE;
546 v += (b->linear_search == 0) ? hash & ((1 << b->log2_pages) - 1) : 0;
547 if (PREDICT_FALSE (b->linear_search))
548 limit <<= b->log2_pages;
Dave Barachc3799992016-08-15 11:12:27 -0400549
Dave Barach5e6b9582016-12-12 15:37:29 -0500550 for (i = 0; i < limit; i++)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700551 {
Dave Barachc3799992016-08-15 11:12:27 -0400552 if (BV (clib_bihash_key_compare) (v->kvp[i].key, search_key->key))
553 {
554 *valuep = v->kvp[i];
555 return 0;
556 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700557 }
558 return -1;
559}
560
Dave Barachc3799992016-08-15 11:12:27 -0400561u8 *BV (format_bihash) (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700562{
Dave Barachc3799992016-08-15 11:12:27 -0400563 BVT (clib_bihash) * h = va_arg (*args, BVT (clib_bihash) *);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700564 int verbose = va_arg (*args, int);
Dave Barach908a5ea2017-07-14 12:42:21 -0400565 BVT (clib_bihash_bucket) * b;
Dave Barachc3799992016-08-15 11:12:27 -0400566 BVT (clib_bihash_value) * v;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700567 int i, j, k;
568 u64 active_elements = 0;
Dave Barache7d212f2018-02-07 13:14:06 -0500569 u64 active_buckets = 0;
570 u64 linear_buckets = 0;
Dave Barach97f5af02018-02-22 09:48:45 -0500571 u64 used_bytes;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700572
573 s = format (s, "Hash table %s\n", h->name ? h->name : (u8 *) "(unnamed)");
Dave Barachc3799992016-08-15 11:12:27 -0400574
Ed Warnickecb9cada2015-12-08 15:45:58 -0700575 for (i = 0; i < h->nbuckets; i++)
576 {
Dave Barachc3799992016-08-15 11:12:27 -0400577 b = &h->buckets[i];
Damjan Marion882fcfe2018-07-17 23:01:49 +0200578 if (BV (clib_bihash_bucket_is_empty) (b))
Dave Barachc3799992016-08-15 11:12:27 -0400579 {
580 if (verbose > 1)
581 s = format (s, "[%d]: empty\n", i);
582 continue;
583 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700584
Dave Barache7d212f2018-02-07 13:14:06 -0500585 active_buckets++;
586
587 if (b->linear_search)
588 linear_buckets++;
589
Ed Warnickecb9cada2015-12-08 15:45:58 -0700590 if (verbose)
Dave Barachc3799992016-08-15 11:12:27 -0400591 {
Dave Barach5e6b9582016-12-12 15:37:29 -0500592 s = format (s, "[%d]: heap offset %d, len %d, linear %d\n", i,
593 b->offset, (1 << b->log2_pages), b->linear_search);
Dave Barachc3799992016-08-15 11:12:27 -0400594 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700595
Dave Barachc3799992016-08-15 11:12:27 -0400596 v = BV (clib_bihash_get_value) (h, b->offset);
597 for (j = 0; j < (1 << b->log2_pages); j++)
598 {
599 for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
600 {
601 if (BV (clib_bihash_is_free) (&v->kvp[k]))
602 {
603 if (verbose > 1)
604 s = format (s, " %d: empty\n",
605 j * BIHASH_KVP_PER_PAGE + k);
606 continue;
607 }
608 if (verbose)
609 {
Vijayabhaskar Katamreddyfb8e61c2017-12-14 13:20:50 -0800610 if (h->fmt_fn)
611 {
612 s = format (s, " %d: %U\n",
613 j * BIHASH_KVP_PER_PAGE + k,
614 h->fmt_fn, &(v->kvp[k]));
615 }
616 else
617 {
618 s = format (s, " %d: %U\n",
619 j * BIHASH_KVP_PER_PAGE + k,
620 BV (format_bihash_kvp), &(v->kvp[k]));
621 }
Dave Barachc3799992016-08-15 11:12:27 -0400622 }
623 active_elements++;
624 }
625 v++;
626 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700627 }
628
Dave Barache7d212f2018-02-07 13:14:06 -0500629 s = format (s, " %lld active elements %lld active buckets\n",
630 active_elements, active_buckets);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700631 s = format (s, " %d free lists\n", vec_len (h->freelists));
Dave Barache7d212f2018-02-07 13:14:06 -0500632
633 for (i = 0; i < vec_len (h->freelists); i++)
634 {
635 u32 nfree = 0;
636 BVT (clib_bihash_value) * free_elt;
637
638 free_elt = h->freelists[i];
639 while (free_elt)
640 {
641 nfree++;
642 free_elt = free_elt->next_free;
643 }
644
645 s = format (s, " [len %d] %u free elts\n", 1 << i, nfree);
646 }
647
648 s = format (s, " %lld linear search buckets\n", linear_buckets);
Dave Barach908a5ea2017-07-14 12:42:21 -0400649 s = format (s, " %lld cache hits, %lld cache misses\n",
650 h->cache_hits, h->cache_misses);
Dave Barach97f5af02018-02-22 09:48:45 -0500651 used_bytes = h->alloc_arena_next - h->alloc_arena;
652 s = format (s,
653 " arena: base %llx, next %llx\n"
654 " used %lld b (%lld Mbytes) of %lld b (%lld Mbytes)\n",
655 h->alloc_arena, h->alloc_arena_next,
656 used_bytes, used_bytes >> 20,
657 h->alloc_arena_size, h->alloc_arena_size >> 20);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700658 return s;
659}
660
Dave Barachc3799992016-08-15 11:12:27 -0400661void BV (clib_bihash_foreach_key_value_pair)
662 (BVT (clib_bihash) * h, void *callback, void *arg)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700663{
664 int i, j, k;
Dave Barach908a5ea2017-07-14 12:42:21 -0400665 BVT (clib_bihash_bucket) * b;
Dave Barachc3799992016-08-15 11:12:27 -0400666 BVT (clib_bihash_value) * v;
667 void (*fp) (BVT (clib_bihash_kv) *, void *) = callback;
668
Ed Warnickecb9cada2015-12-08 15:45:58 -0700669 for (i = 0; i < h->nbuckets; i++)
670 {
Dave Barachc3799992016-08-15 11:12:27 -0400671 b = &h->buckets[i];
Damjan Marion882fcfe2018-07-17 23:01:49 +0200672 if (BV (clib_bihash_bucket_is_empty) (b))
Dave Barachc3799992016-08-15 11:12:27 -0400673 continue;
674
675 v = BV (clib_bihash_get_value) (h, b->offset);
676 for (j = 0; j < (1 << b->log2_pages); j++)
677 {
678 for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
679 {
680 if (BV (clib_bihash_is_free) (&v->kvp[k]))
681 continue;
682
683 (*fp) (&v->kvp[k], arg);
Dave Barachca45ee72018-08-06 08:43:47 -0400684 /*
685 * In case the callback deletes the last entry in the bucket...
686 */
687 if (BV (clib_bihash_bucket_is_empty) (b))
688 goto doublebreak;
Dave Barachc3799992016-08-15 11:12:27 -0400689 }
690 v++;
691 }
Dave Barachca45ee72018-08-06 08:43:47 -0400692 doublebreak:
693 ;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700694 }
695}
Dave Barachdd3a57f2016-07-27 16:58:51 -0400696
Chris Luke16bcf7d2016-09-01 14:31:46 -0400697/** @endcond */
Dave Barachc3799992016-08-15 11:12:27 -0400698
699/*
700 * fd.io coding-style-patch-verification: ON
701 *
702 * Local Variables:
703 * eval: (c-set-style "gnu")
704 * End:
705 */