blob: 85dfb788308b5445c2a96750164516a3b1aee6d1 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 Copyright (c) 2010 Eliot Dresselhaus
17
18 Permission is hereby granted, free of charge, to any person obtaining
19 a copy of this software and associated documentation files (the
20 "Software"), to deal in the Software without restriction, including
21 without limitation the rights to use, copy, modify, merge, publish,
22 distribute, sublicense, and/or sell copies of the Software, and to
23 permit persons to whom the Software is furnished to do so, subject to
24 the following conditions:
25
26 The above copyright notice and this permission notice shall be
27 included in all copies or substantial portions of the Software.
28
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
36*/
37
38#ifndef included_clib_vhash_h
39#define included_clib_vhash_h
40
41#include <vppinfra/vector.h>
42
43#ifdef CLIB_HAVE_VEC128
44
45#include <vppinfra/cache.h>
46#include <vppinfra/hash.h>
47#include <vppinfra/pipeline.h>
48
49/* Gathers 32 bits worth of key with given index. */
Dave Barachc3799992016-08-15 11:12:27 -040050typedef u32 (vhash_key_function_t) (void *state, u32 vector_index,
51 u32 key_word_index);
52typedef u32x4 (vhash_4key_function_t) (void *state, u32 vector_index,
53 u32 key_word_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -070054/* Sets/gets result of hash lookup. */
Dave Barachc3799992016-08-15 11:12:27 -040055typedef u32 (vhash_result_function_t) (void *state, u32 vector_index,
56 u32 result, u32 n_key_u32);
57typedef u32x4 (vhash_4result_function_t) (void *state, u32 vector_index,
58 u32x4 results, u32 n_key_u32);
Ed Warnickecb9cada2015-12-08 15:45:58 -070059
Dave Barachc3799992016-08-15 11:12:27 -040060typedef struct
61{
Ed Warnickecb9cada2015-12-08 15:45:58 -070062 u32x4_union_t hashed_key[3];
63} vhash_hashed_key_t;
64
65/* Search buckets are really this structure. */
Dave Barachc3799992016-08-15 11:12:27 -040066typedef struct
67{
Ed Warnickecb9cada2015-12-08 15:45:58 -070068 /* 4 results for this bucket.
69 Zero is used to mark empty results. This means user can't use the result ~0
70 since user results differ from internal results stored in buckets by 1.
71 e.g. internal result = user result + 1. */
72 u32x4_union_t result;
73
74 /* n_key_u32s u32x4s of key data follow. */
75 u32x4_union_t key[0];
76} vhash_search_bucket_t;
77
Dave Barachc3799992016-08-15 11:12:27 -040078typedef struct
79{
80 u32x4_union_t *search_buckets;
Ed Warnickecb9cada2015-12-08 15:45:58 -070081
82 /* Vector of bucket free indices. */
Dave Barachc3799992016-08-15 11:12:27 -040083 u32 *free_indices;
Ed Warnickecb9cada2015-12-08 15:45:58 -070084
85 /* Number of entries in this overflow bucket. */
86 u32 n_overflow;
87} vhash_overflow_buckets_t;
88
Dave Barachc3799992016-08-15 11:12:27 -040089typedef struct
90{
Ed Warnickecb9cada2015-12-08 15:45:58 -070091 /* 2^log2_n_keys keys grouped in groups of 4.
92 Each bucket contains 4 results plus 4 keys for a
93 total of (1 + n_key_u32) u32x4s. */
Dave Barachc3799992016-08-15 11:12:27 -040094 u32x4_union_t *search_buckets;
Ed Warnickecb9cada2015-12-08 15:45:58 -070095
96 /* When a bucket of 4 results/keys are full we search
97 the overflow. hash_key is used to select which overflow
98 bucket. */
99 vhash_overflow_buckets_t overflow_buckets[16];
100
101 /* Total count of occupied elements in hash table. */
102 u32 n_elts;
103
104 u32 log2_n_keys;
105
106 /* Number of 32 bit words in a hash key. */
107 u32 n_key_u32;
108
109 u32x4_union_t bucket_mask;
110
111 /* table[i] = min_log2 (first_set (~i)). */
112 u8 find_first_zero_table[16];
113
114 /* Hash seeds for Jenkins hash. */
115 u32x4_union_t hash_seeds[3];
116
117 /* Key work space is a vector of length
118 n_key_u32s << log2_n_key_word_len_u32x. */
119 u32 log2_n_key_word_len_u32x;
120
121 /* Work space to store keys between pipeline stages. */
Dave Barachc3799992016-08-15 11:12:27 -0400122 u32x4_union_t *key_work_space;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700123
124 /* Hash work space to store Jenkins hash values between
125 pipeline stages. */
Dave Barachc3799992016-08-15 11:12:27 -0400126 vhash_hashed_key_t *hash_work_space;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700127} vhash_t;
128
129always_inline vhash_overflow_buckets_t *
130vhash_get_overflow_buckets (vhash_t * h, u32 key)
131{
132 u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf);
133 ASSERT (i < ARRAY_LEN (h->overflow_buckets));
134 return h->overflow_buckets + i;
135}
136
137always_inline uword
138vhash_is_non_empty_overflow_bucket (vhash_t * h, u32 key)
139{
140 u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf);
141 ASSERT (i < ARRAY_LEN (h->overflow_buckets));
142 return h->overflow_buckets[i].n_overflow > 0;
143}
144
145always_inline void
146vhash_free_overflow_buckets (vhash_overflow_buckets_t * obs)
147{
148 vec_free (obs->search_buckets);
149 vec_free (obs->free_indices);
150}
151
152always_inline void
153vhash_free (vhash_t * h)
154{
155 uword i;
156 for (i = 0; i < ARRAY_LEN (h->overflow_buckets); i++)
157 vhash_free_overflow_buckets (&h->overflow_buckets[i]);
158 vec_free (h->search_buckets);
159 vec_free (h->key_work_space);
160 vec_free (h->hash_work_space);
161}
162
163always_inline void
164vhash_set_key_word (vhash_t * h, u32 wi, u32 vi, u32 value)
165{
166 u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
167 u32 i1 = vi % 4;
168 vec_elt (h->key_work_space, i0).as_u32[i1] = value;
169}
170
171always_inline void
172vhash_set_key_word_u32x (vhash_t * h, u32 wi, u32 vi, u32x value)
173{
174 u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
175 vec_elt (h->key_work_space, i0).as_u32x4 = value;
176}
177
178always_inline u32
179vhash_get_key_word (vhash_t * h, u32 wi, u32 vi)
180{
181 u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4);
182 u32 i1 = vi % 4;
183 return vec_elt (h->key_work_space, i0).as_u32[i1];
184}
185
186always_inline u32x
187vhash_get_key_word_u32x (vhash_t * h, u32 wi, u32 vi)
188{
189 u32 i0 = (wi << h->log2_n_key_word_len_u32x) + vi;
190 return vec_elt (h->key_work_space, i0).as_u32x4;
191}
192
193always_inline void
194vhash_validate_sizes (vhash_t * h, u32 n_key_u32, u32 n_vectors)
195{
196 u32 n, l;
197
198 n = max_pow2 (n_vectors) / 4;
199 n = clib_max (n, 8);
200
201 h->log2_n_key_word_len_u32x = l = min_log2 (n);
Dave Barachc3799992016-08-15 11:12:27 -0400202 vec_validate_aligned (h->key_work_space, (n_key_u32 << l) - 1,
203 CLIB_CACHE_LINE_BYTES);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700204 vec_validate_aligned (h->hash_work_space, n - 1, CLIB_CACHE_LINE_BYTES);
205}
206
207always_inline void
208vhash_gather_key_stage (vhash_t * h,
209 u32 vector_index,
210 u32 n_vectors,
211 vhash_key_function_t key_function,
Dave Barachc3799992016-08-15 11:12:27 -0400212 void *state, u32 n_key_u32s)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700213{
214 u32 i, j, vi;
215
216 /* Gather keys for 4 packets (for 128 bit vector length e.g. u32x4). */
217 for (i = 0; i < n_vectors; i++)
218 {
219 vi = vector_index * 4 + i;
220 for (j = 0; j < n_key_u32s; j++)
Dave Barachc3799992016-08-15 11:12:27 -0400221 vhash_set_key_word (h, j, vi, key_function (state, vi, j));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700222 }
223}
224
225always_inline void
226vhash_gather_4key_stage (vhash_t * h,
227 u32 vector_index,
228 vhash_4key_function_t key_function,
Dave Barachc3799992016-08-15 11:12:27 -0400229 void *state, u32 n_key_u32s)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700230{
231 u32 j, vi;
232 vi = vector_index * 4;
233 for (j = 0; j < n_key_u32s; j++)
234 vhash_set_key_word_u32x (h, j, vi, key_function (state, vi, j));
235}
236
237always_inline void
Dave Barachc3799992016-08-15 11:12:27 -0400238vhash_mix_stage (vhash_t * h, u32 vector_index, u32 n_key_u32s)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700239{
240 i32 i, n_left;
241 u32x a, b, c;
242
243 /* Only need to do this for keys longer than 12 bytes. */
244 ASSERT (n_key_u32s > 3);
245
246 a = h->hash_seeds[0].as_u32x4;
247 b = h->hash_seeds[1].as_u32x4;
248 c = h->hash_seeds[2].as_u32x4;
249 for (i = 0, n_left = n_key_u32s - 3; n_left > 0; n_left -= 3, i += 3)
250 {
Dave Barachc3799992016-08-15 11:12:27 -0400251 a +=
252 vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 0), vector_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700253 if (n_left > 1)
Dave Barachc3799992016-08-15 11:12:27 -0400254 b +=
255 vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 1), vector_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700256 if (n_left > 2)
Dave Barachc3799992016-08-15 11:12:27 -0400257 c +=
258 vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 2), vector_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700259
260 hash_v3_mix_u32x (a, b, c);
261 }
262
263 /* Save away a, b, c for later finalize. */
264 {
Dave Barachc3799992016-08-15 11:12:27 -0400265 vhash_hashed_key_t *hk =
266 vec_elt_at_index (h->hash_work_space, vector_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700267 hk->hashed_key[0].as_u32x4 = a;
268 hk->hashed_key[1].as_u32x4 = b;
269 hk->hashed_key[2].as_u32x4 = c;
270 }
271}
272
273always_inline vhash_search_bucket_t *
274vhash_get_search_bucket_with_index (vhash_t * h, u32 i, u32 n_key_u32s)
275{
276 return ((vhash_search_bucket_t *)
277 vec_elt_at_index (h->search_buckets,
Dave Barachc3799992016-08-15 11:12:27 -0400278 (i / 4) *
279 ((sizeof (vhash_search_bucket_t) /
280 sizeof (u32x4)) + n_key_u32s)));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700281}
282
283always_inline vhash_search_bucket_t *
284vhash_get_search_bucket (vhash_t * h, u32 key_hash, u32 n_key_u32s)
285{
286 u32 i = key_hash & h->bucket_mask.as_u32[0];
287 return vhash_get_search_bucket_with_index (h, i, n_key_u32s);
288}
289
290always_inline u32x4
Dave Barachc3799992016-08-15 11:12:27 -0400291vhash_get_4_search_bucket_byte_offsets (vhash_t * h, u32x4 key_hash,
292 u32 n_key_u32s)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700293{
Dave Barachc3799992016-08-15 11:12:27 -0400294 vhash_search_bucket_t *b;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700295 u32 n_bytes_per_bucket = sizeof (b[0]) + n_key_u32s * sizeof (b->key[0]);
296 u32x4 r = key_hash & h->bucket_mask.as_u32x4;
297
298 /* Multiply with shifts and adds to get bucket byte offset. */
299#define _(x) u32x4_ishift_left (r, (x) - 2)
300 if (n_bytes_per_bucket == (1 << 5))
Dave Barachc3799992016-08-15 11:12:27 -0400301 r = _(5);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700302 else if (n_bytes_per_bucket == ((1 << 5) + (1 << 4)))
Dave Barachc3799992016-08-15 11:12:27 -0400303 r = _(5) + _(4);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700304 else if (n_bytes_per_bucket == (1 << 6))
Dave Barachc3799992016-08-15 11:12:27 -0400305 r = _(6);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700306 else if (n_bytes_per_bucket == ((1 << 6) + (1 << 4)))
Dave Barachc3799992016-08-15 11:12:27 -0400307 r = _(6) + _(4);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700308 else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5)))
Dave Barachc3799992016-08-15 11:12:27 -0400309 r = _(6) + _(5);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700310 else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5) + (1 << 4)))
Dave Barachc3799992016-08-15 11:12:27 -0400311 r = _(6) + _(5) + _(4);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700312 else
313 ASSERT (0);
314#undef _
315 return r;
316}
317
318always_inline void
Dave Barachc3799992016-08-15 11:12:27 -0400319vhash_finalize_stage (vhash_t * h, u32 vector_index, u32 n_key_u32s)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700320{
321 i32 n_left;
322 u32x a, b, c;
Dave Barachc3799992016-08-15 11:12:27 -0400323 vhash_hashed_key_t *hk =
324 vec_elt_at_index (h->hash_work_space, vector_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700325
326 if (n_key_u32s <= 3)
327 {
328 a = h->hash_seeds[0].as_u32x4;
329 b = h->hash_seeds[1].as_u32x4;
330 c = h->hash_seeds[2].as_u32x4;
331 n_left = n_key_u32s;
332 }
333 else
334 {
335 a = hk->hashed_key[0].as_u32x4;
336 b = hk->hashed_key[1].as_u32x4;
337 c = hk->hashed_key[2].as_u32x4;
338 n_left = 3;
339 }
340
341 if (n_left > 0)
342 a += vhash_get_key_word_u32x (h, 0, vector_index);
343 if (n_left > 1)
344 b += vhash_get_key_word_u32x (h, 1, vector_index);
345 if (n_left > 2)
346 c += vhash_get_key_word_u32x (h, 2, vector_index);
347
348 hash_v3_finalize_u32x (a, b, c);
349
350 /* Only save away last 32 bits of hash code. */
351 hk->hashed_key[2].as_u32x4 = c;
352
353 /* Prefetch buckets. This costs a bit for small tables but saves
354 big for large ones. */
355 {
Dave Barachc3799992016-08-15 11:12:27 -0400356 vhash_search_bucket_t *b0, *b1, *b2, *b3;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700357 u32x4_union_t kh;
358
359 kh.as_u32x4 = vhash_get_4_search_bucket_byte_offsets (h, c, n_key_u32s);
360 hk->hashed_key[1].as_u32x4 = kh.as_u32x4;
361
362 b0 = (void *) h->search_buckets + kh.as_u32[0];
363 b1 = (void *) h->search_buckets + kh.as_u32[1];
364 b2 = (void *) h->search_buckets + kh.as_u32[2];
365 b3 = (void *) h->search_buckets + kh.as_u32[3];
366
Dave Barachc3799992016-08-15 11:12:27 -0400367 CLIB_PREFETCH (b0, sizeof (b0[0]) + n_key_u32s * sizeof (b0->key[0]),
368 READ);
369 CLIB_PREFETCH (b1, sizeof (b1[0]) + n_key_u32s * sizeof (b1->key[0]),
370 READ);
371 CLIB_PREFETCH (b2, sizeof (b2[0]) + n_key_u32s * sizeof (b2->key[0]),
372 READ);
373 CLIB_PREFETCH (b3, sizeof (b3[0]) + n_key_u32s * sizeof (b3->key[0]),
374 READ);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700375 }
376}
Dave Barachc3799992016-08-15 11:12:27 -0400377
Ed Warnickecb9cada2015-12-08 15:45:58 -0700378always_inline u32
379vhash_merge_results (u32x4 r)
380{
381 r = r | u32x4_word_shift_right (r, 2);
382 r = r | u32x4_word_shift_right (r, 1);
383 return u32x4_get0 (r);
384}
385
386/* Bucket is full if none of its 4 results are 0. */
387always_inline u32
388vhash_search_bucket_is_full (u32x4 r)
Dave Barachc3799992016-08-15 11:12:27 -0400389{
390 return u32x4_zero_byte_mask (r) == 0;
391}
Ed Warnickecb9cada2015-12-08 15:45:58 -0700392
393always_inline u32
394vhash_non_empty_result_index (u32x4 x)
395{
396 u32 empty_mask = u32x4_zero_byte_mask (x);
397 ASSERT (empty_mask != 0xffff);
Dave Barachc3799992016-08-15 11:12:27 -0400398 return min_log2 (0xffff & ~empty_mask) / 4;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700399}
400
401always_inline u32
402vhash_empty_result_index (u32x4 x)
403{
404 u32 empty_mask = u32x4_zero_byte_mask (x);
405 ASSERT (empty_mask != 0);
406 return min_log2 (0xffff & empty_mask) / 4;
407}
408
409always_inline u32x4
410vhash_bucket_compare (vhash_t * h,
Dave Barachc3799992016-08-15 11:12:27 -0400411 u32x4_union_t * bucket, u32 key_word_index, u32 vi)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700412{
413 u32 k = vhash_get_key_word (h, key_word_index, vi);
Dave Barachc3799992016-08-15 11:12:27 -0400414 u32x4 x = { k, k, k, k };
Damjan Mariona52e1662018-05-19 00:04:23 +0200415 return (bucket[key_word_index].as_u32x4 == x);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700416}
417
418#define vhash_bucket_compare_4(h,wi,vi,b0,b1,b2,b3,cmp0,cmp1,cmp2,cmp3) \
419do { \
420 u32x4 _k4 = vhash_get_key_word_u32x ((h), (wi), (vi)); \
421 u32x4 _k0 = u32x4_splat_word (_k4, 0); \
422 u32x4 _k1 = u32x4_splat_word (_k4, 1); \
423 u32x4 _k2 = u32x4_splat_word (_k4, 2); \
424 u32x4 _k3 = u32x4_splat_word (_k4, 3); \
425 \
Damjan Mariona52e1662018-05-19 00:04:23 +0200426 cmp0 = (b0->key[wi].as_u32x4 == _k0); \
427 cmp1 = (b1->key[wi].as_u32x4 == _k1); \
428 cmp2 = (b2->key[wi].as_u32x4 == _k2); \
429 cmp3 = (b3->key[wi].as_u32x4 == _k3); \
Ed Warnickecb9cada2015-12-08 15:45:58 -0700430} while (0)
431
Dave Barachc3799992016-08-15 11:12:27 -0400432u32 vhash_get_overflow (vhash_t * h, u32 key_hash, u32 vi, u32 n_key_u32s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700433
434always_inline void
435vhash_get_stage (vhash_t * h,
436 u32 vector_index,
437 u32 n_vectors,
438 vhash_result_function_t result_function,
Dave Barachc3799992016-08-15 11:12:27 -0400439 void *state, u32 n_key_u32s)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700440{
441 u32 i, j;
Dave Barachc3799992016-08-15 11:12:27 -0400442 vhash_hashed_key_t *hk =
443 vec_elt_at_index (h->hash_work_space, vector_index);
444 vhash_search_bucket_t *b;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700445
446 for (i = 0; i < n_vectors; i++)
447 {
448 u32 vi = vector_index * 4 + i;
449 u32 key_hash = hk->hashed_key[2].as_u32[i];
450 u32 result;
451 u32x4 r, r0;
452
453 b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
454
455 r = r0 = b->result.as_u32x4;
456 for (j = 0; j < n_key_u32s; j++)
457 r &= vhash_bucket_compare (h, &b->key[0], j, vi);
458
459 /* At this point only one of 4 results should be non-zero.
Dave Barachc3799992016-08-15 11:12:27 -0400460 So we can or all 4 together and get the valid result (if there is one). */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700461 result = vhash_merge_results (r);
462
Dave Barachc3799992016-08-15 11:12:27 -0400463 if (!result && vhash_search_bucket_is_full (r0))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700464 result = vhash_get_overflow (h, key_hash, vi, n_key_u32s);
465
466 result_function (state, vi, result - 1, n_key_u32s);
467 }
468}
469
470always_inline void
471vhash_get_4_stage (vhash_t * h,
472 u32 vector_index,
473 vhash_4result_function_t result_function,
Dave Barachc3799992016-08-15 11:12:27 -0400474 void *state, u32 n_key_u32s)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700475{
476 u32 i, vi;
Dave Barachc3799992016-08-15 11:12:27 -0400477 vhash_hashed_key_t *hk =
478 vec_elt_at_index (h->hash_work_space, vector_index);
479 vhash_search_bucket_t *b0, *b1, *b2, *b3;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700480 u32x4 r0, r1, r2, r3, r0_before, r1_before, r2_before, r3_before;
481 u32x4_union_t kh;
482
483 kh.as_u32x4 = hk->hashed_key[1].as_u32x4;
484
485 b0 = (void *) h->search_buckets + kh.as_u32[0];
486 b1 = (void *) h->search_buckets + kh.as_u32[1];
487 b2 = (void *) h->search_buckets + kh.as_u32[2];
488 b3 = (void *) h->search_buckets + kh.as_u32[3];
489
490 r0 = r0_before = b0->result.as_u32x4;
491 r1 = r1_before = b1->result.as_u32x4;
492 r2 = r2_before = b2->result.as_u32x4;
493 r3 = r3_before = b3->result.as_u32x4;
494
495 vi = vector_index * 4;
496
497 for (i = 0; i < n_key_u32s; i++)
498 {
499 u32x4 c0, c1, c2, c3;
500 vhash_bucket_compare_4 (h, i, vector_index,
Dave Barachc3799992016-08-15 11:12:27 -0400501 b0, b1, b2, b3, c0, c1, c2, c3);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700502 r0 &= c0;
503 r1 &= c1;
504 r2 &= c2;
505 r3 &= c3;
506 }
507
508 u32x4_transpose (r0, r1, r2, r3);
509
510 /* Gather together 4 results. */
511 {
512 u32x4_union_t r;
Dave Barachc3799992016-08-15 11:12:27 -0400513 u32x4 ones = { 1, 1, 1, 1 };
Ed Warnickecb9cada2015-12-08 15:45:58 -0700514 u32 not_found_mask;
515
516 r.as_u32x4 = r0 | r1 | r2 | r3;
517 not_found_mask = u32x4_zero_byte_mask (r.as_u32x4);
Dave Barachc3799992016-08-15 11:12:27 -0400518 not_found_mask &= ((vhash_search_bucket_is_full (r0_before) << (4 * 0))
519 | (vhash_search_bucket_is_full (r1_before) << (4 * 1))
520 | (vhash_search_bucket_is_full (r2_before) << (4 * 2))
521 | (vhash_search_bucket_is_full (r3_before) <<
522 (4 * 3)));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700523 if (not_found_mask)
524 {
525 u32x4_union_t key_hash;
526
Dave Barachc3799992016-08-15 11:12:27 -0400527 key_hash.as_u32x4 =
528 hk->hashed_key[2].as_u32x4 & h->bucket_mask.as_u32x4;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700529
530 /* Slow path: one of the buckets may have been full and we need to search overflow. */
Dave Barachc3799992016-08-15 11:12:27 -0400531 if (not_found_mask & (1 << (4 * 0)))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700532 r.as_u32[0] = vhash_get_overflow (h, key_hash.as_u32[0],
Dave Barachc3799992016-08-15 11:12:27 -0400533 vi + 0, n_key_u32s);
534 if (not_found_mask & (1 << (4 * 1)))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700535 r.as_u32[1] = vhash_get_overflow (h, key_hash.as_u32[1],
Dave Barachc3799992016-08-15 11:12:27 -0400536 vi + 1, n_key_u32s);
537 if (not_found_mask & (1 << (4 * 2)))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700538 r.as_u32[2] = vhash_get_overflow (h, key_hash.as_u32[2],
Dave Barachc3799992016-08-15 11:12:27 -0400539 vi + 2, n_key_u32s);
540 if (not_found_mask & (1 << (4 * 3)))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700541 r.as_u32[3] = vhash_get_overflow (h, key_hash.as_u32[3],
Dave Barachc3799992016-08-15 11:12:27 -0400542 vi + 3, n_key_u32s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700543 }
544
545 result_function (state, vi, r.as_u32x4 - ones, n_key_u32s);
546 }
547}
548
549u32
550vhash_set_overflow (vhash_t * h,
Dave Barachc3799992016-08-15 11:12:27 -0400551 u32 key_hash, u32 vi, u32 new_result, u32 n_key_u32s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700552
553always_inline void
554vhash_set_stage (vhash_t * h,
555 u32 vector_index,
556 u32 n_vectors,
557 vhash_result_function_t result_function,
Dave Barachc3799992016-08-15 11:12:27 -0400558 void *state, u32 n_key_u32s)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700559{
560 u32 i, j, n_new_elts = 0;
Dave Barachc3799992016-08-15 11:12:27 -0400561 vhash_hashed_key_t *hk =
562 vec_elt_at_index (h->hash_work_space, vector_index);
563 vhash_search_bucket_t *b;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700564
565 for (i = 0; i < n_vectors; i++)
566 {
567 u32 vi = vector_index * 4 + i;
568 u32 key_hash = hk->hashed_key[2].as_u32[i];
569 u32 old_result, new_result;
570 u32 i_set;
571 u32x4 r, r0, cmp;
572
573 b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
574
575 cmp = vhash_bucket_compare (h, &b->key[0], 0, vi);
576 for (j = 1; j < n_key_u32s; j++)
577 cmp &= vhash_bucket_compare (h, &b->key[0], j, vi);
578
579 r0 = b->result.as_u32x4;
580 r = r0 & cmp;
581
582 /* At this point only one of 4 results should be non-zero.
Dave Barachc3799992016-08-15 11:12:27 -0400583 So we can or all 4 together and get the valid result (if there is one). */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700584 old_result = vhash_merge_results (r);
585
Dave Barachc3799992016-08-15 11:12:27 -0400586 if (!old_result && vhash_search_bucket_is_full (r0))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700587 old_result = vhash_get_overflow (h, key_hash, vi, n_key_u32s);
588
589 /* Get new result; possibly do something with old result. */
590 new_result = result_function (state, vi, old_result - 1, n_key_u32s);
591
592 /* User cannot use ~0 as a hash result since a result of 0 is
Dave Barachc3799992016-08-15 11:12:27 -0400593 used to mark unused bucket entries. */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700594 ASSERT (new_result + 1 != 0);
595 new_result += 1;
596
597 /* Set over-writes existing result. */
598 if (old_result)
599 {
600 i_set = vhash_non_empty_result_index (r);
601 b->result.as_u32[i_set] = new_result;
602 }
603 else
604 {
605 /* Set allocates new result. */
606 u32 valid_mask;
607
608 valid_mask = (((b->result.as_u32[0] != 0) << 0)
609 | ((b->result.as_u32[1] != 0) << 1)
610 | ((b->result.as_u32[2] != 0) << 2)
611 | ((b->result.as_u32[3] != 0) << 3));
612
613 /* Rotate 4 bit valid mask so that key_hash corresponds to bit 0. */
614 i_set = key_hash & 3;
Dave Barachc3799992016-08-15 11:12:27 -0400615 valid_mask =
616 ((valid_mask >> i_set) | (valid_mask << (4 - i_set))) & 0xf;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700617
618 /* Insert into first empty position in bucket after key_hash. */
619 i_set = (i_set + h->find_first_zero_table[valid_mask]) & 3;
620
621 if (valid_mask != 0xf)
622 {
623 n_new_elts += 1;
624
625 b->result.as_u32[i_set] = new_result;
626
627 /* Insert new key into search bucket. */
628 for (j = 0; j < n_key_u32s; j++)
629 b->key[j].as_u32[i_set] = vhash_get_key_word (h, j, vi);
630 }
631 else
632 vhash_set_overflow (h, key_hash, vi, new_result, n_key_u32s);
633 }
634 }
635
636 h->n_elts += n_new_elts;
637}
638
Dave Barachc3799992016-08-15 11:12:27 -0400639u32 vhash_unset_overflow (vhash_t * h, u32 key_hash, u32 vi, u32 n_key_u32s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700640
641void
642vhash_unset_refill_from_overflow (vhash_t * h,
643 vhash_search_bucket_t * b,
Dave Barachc3799992016-08-15 11:12:27 -0400644 u32 key_hash, u32 n_key_u32s);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700645
646/* Note: Eliot tried doing 4 unsets at once and could not get a speed up
647 and abandoned vhash_unset_4_stage. */
648always_inline void
649vhash_unset_stage (vhash_t * h,
650 u32 vector_index,
651 u32 n_vectors,
652 vhash_result_function_t result_function,
Dave Barachc3799992016-08-15 11:12:27 -0400653 void *state, u32 n_key_u32s)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700654{
655 u32 i, j, n_elts_unset = 0;
Dave Barachc3799992016-08-15 11:12:27 -0400656 vhash_hashed_key_t *hk =
657 vec_elt_at_index (h->hash_work_space, vector_index);
658 vhash_search_bucket_t *b;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700659
660 for (i = 0; i < n_vectors; i++)
661 {
662 u32 vi = vector_index * 4 + i;
663 u32 key_hash = hk->hashed_key[2].as_u32[i];
664 u32 old_result;
665 u32x4 cmp, r0;
666
667 b = vhash_get_search_bucket (h, key_hash, n_key_u32s);
668
669 cmp = vhash_bucket_compare (h, &b->key[0], 0, vi);
670 for (j = 1; j < n_key_u32s; j++)
671 cmp &= vhash_bucket_compare (h, &b->key[0], j, vi);
672
673 r0 = b->result.as_u32x4;
674
675 /* At this point cmp is all ones where key matches and zero otherwise.
Dave Barachc3799992016-08-15 11:12:27 -0400676 So, this will invalidate results for matching key and do nothing otherwise. */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700677 b->result.as_u32x4 = r0 & ~cmp;
678
679 old_result = vhash_merge_results (r0 & cmp);
680
681 n_elts_unset += old_result != 0;
682
683 if (vhash_search_bucket_is_full (r0))
684 {
685 if (old_result)
686 vhash_unset_refill_from_overflow (h, b, key_hash, n_key_u32s);
687 else
688 old_result = vhash_unset_overflow (h, key_hash, vi, n_key_u32s);
689 }
690
691 result_function (state, vi, old_result - 1, n_key_u32s);
692 }
693 ASSERT (h->n_elts >= n_elts_unset);
694 h->n_elts -= n_elts_unset;
695}
696
697void vhash_init (vhash_t * h, u32 log2_n_keys, u32 n_key_u32,
698 u32 * hash_seeds);
699
700void vhash_resize (vhash_t * old, u32 log2_n_keys);
701
Dave Barachc3799992016-08-15 11:12:27 -0400702typedef struct
703{
704 vhash_t *vhash;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700705
Dave Barachc3799992016-08-15 11:12:27 -0400706 union
707 {
708 struct
709 {
710 u32 *keys;
711 u32 *results;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700712 };
713
714 /* Vector layout for get keys. */
Dave Barachc3799992016-08-15 11:12:27 -0400715 struct
716 {
717 u32x4_union_t *get_keys;
718 u32x4_union_t *get_results;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700719 };
720 };
721
722 u32 n_vectors_div_4;
723 u32 n_vectors_mod_4;
724
725 u32 n_key_u32;
726
727 u32 n_keys;
728} vhash_main_t;
729
730always_inline u32
731vhash_get_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32)
732{
733 u32 i, n;
734
735 i = vm->n_keys;
736 vm->n_keys = i + n_keys;
737
738 n = (round_pow2 (vm->n_keys, 4) / 4) * n_key_u32;
739
740 vec_validate_aligned (vm->get_keys, n - 1, sizeof (vm->get_keys[0]));
741 vec_validate_aligned (vm->get_results, n - 1, sizeof (vm->get_results[0]));
742
743 return i;
744}
745
746always_inline void
747vhash_get_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32,
748 u32 value)
749{
Dave Barachc3799992016-08-15 11:12:27 -0400750 u32x4_union_t *k = vec_elt_at_index (vm->get_keys, (vi / 4) * n_key_u32);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700751 ASSERT (wi < n_key_u32);
752 k[wi].as_u32[vi % 4] = value;
753}
754
755always_inline u32
756vhash_get_fetch_result (vhash_main_t * vm, u32 vi)
757{
Dave Barachc3799992016-08-15 11:12:27 -0400758 u32x4_union_t *r = vec_elt_at_index (vm->get_results, vi / 4);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700759 return r->as_u32[vi % 4];
760}
761
762void vhash_main_get (vhash_main_t * vm);
763
764always_inline u32
765vhash_set_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32)
766{
767 u32 i;
768
769 i = vm->n_keys;
770 vm->n_keys = i + n_keys;
771
772 vec_resize (vm->keys, n_keys * n_key_u32);
773 vec_resize (vm->results, n_keys);
774
775 return i;
776}
777
778always_inline void
779vhash_set_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32,
780 u32 value)
781{
Dave Barachc3799992016-08-15 11:12:27 -0400782 u32 *k = vec_elt_at_index (vm->keys, vi * n_key_u32);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700783 ASSERT (wi < n_key_u32);
784 k[wi] = value;
785}
786
787always_inline void
788vhash_set_set_result (vhash_main_t * vm, u32 vi, u32 result)
789{
Dave Barachc3799992016-08-15 11:12:27 -0400790 u32 *r = vec_elt_at_index (vm->results, vi);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700791 r[0] = result;
792}
793
794always_inline u32
795vhash_set_fetch_old_result (vhash_main_t * vm, u32 vi)
796{
Dave Barachc3799992016-08-15 11:12:27 -0400797 u32 *r = vec_elt_at_index (vm->results, vi);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700798 return r[0];
799}
800
801void vhash_main_set (vhash_main_t * vm);
802
803always_inline u32
804vhash_unset_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32)
Dave Barachc3799992016-08-15 11:12:27 -0400805{
806 return vhash_set_alloc_keys (vm, n_keys, n_key_u32);
807}
Ed Warnickecb9cada2015-12-08 15:45:58 -0700808
809always_inline void
810vhash_unset_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32,
Dave Barachc3799992016-08-15 11:12:27 -0400811 u32 value)
812{
813 vhash_set_set_key_word (vm, vi, wi, n_key_u32, value);
814}
Ed Warnickecb9cada2015-12-08 15:45:58 -0700815
816always_inline void
817vhash_unset_set_result (vhash_main_t * vm, u32 vi, u32 result)
Dave Barachc3799992016-08-15 11:12:27 -0400818{
819 vhash_set_set_result (vm, vi, result);
820}
Ed Warnickecb9cada2015-12-08 15:45:58 -0700821
822always_inline u32
823vhash_unset_fetch_old_result (vhash_main_t * vm, u32 vi)
Dave Barachc3799992016-08-15 11:12:27 -0400824{
825 return vhash_set_fetch_old_result (vm, vi);
826}
Ed Warnickecb9cada2015-12-08 15:45:58 -0700827
828void vhash_main_unset (vhash_main_t * vm);
829
Dave Barachc3799992016-08-15 11:12:27 -0400830typedef struct
831{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700832 vhash_main_t new;
833
Dave Barachc3799992016-08-15 11:12:27 -0400834 vhash_t *old;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700835} vhash_resize_t;
836
Dave Barachc3799992016-08-15 11:12:27 -0400837u32 vhash_resize_incremental (vhash_resize_t * vr, u32 vector_index,
838 u32 n_vectors);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700839
840#endif /* CLIB_HAVE_VEC128 */
841
842#endif /* included_clib_vhash_h */
Dave Barachc3799992016-08-15 11:12:27 -0400843
844/*
845 * fd.io coding-style-patch-verification: ON
846 *
847 * Local Variables:
848 * eval: (c-set-style "gnu")
849 * End:
850 */