Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2015 Cisco and/or its affiliates. |
| 3 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | * you may not use this file except in compliance with the License. |
| 5 | * You may obtain a copy of the License at: |
| 6 | * |
| 7 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | * |
| 9 | * Unless required by applicable law or agreed to in writing, software |
| 10 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | * See the License for the specific language governing permissions and |
| 13 | * limitations under the License. |
| 14 | */ |
| 15 | /* |
| 16 | Copyright (c) 2010 Eliot Dresselhaus |
| 17 | |
| 18 | Permission is hereby granted, free of charge, to any person obtaining |
| 19 | a copy of this software and associated documentation files (the |
| 20 | "Software"), to deal in the Software without restriction, including |
| 21 | without limitation the rights to use, copy, modify, merge, publish, |
| 22 | distribute, sublicense, and/or sell copies of the Software, and to |
| 23 | permit persons to whom the Software is furnished to do so, subject to |
| 24 | the following conditions: |
| 25 | |
| 26 | The above copyright notice and this permission notice shall be |
| 27 | included in all copies or substantial portions of the Software. |
| 28 | |
| 29 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 30 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 31 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 32 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
| 33 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
| 34 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
| 35 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 36 | */ |
| 37 | |
| 38 | #ifndef included_clib_vhash_h |
| 39 | #define included_clib_vhash_h |
| 40 | |
| 41 | #include <vppinfra/vector.h> |
| 42 | |
| 43 | #ifdef CLIB_HAVE_VEC128 |
| 44 | |
| 45 | #include <vppinfra/cache.h> |
| 46 | #include <vppinfra/hash.h> |
| 47 | #include <vppinfra/pipeline.h> |
| 48 | |
| 49 | /* Gathers 32 bits worth of key with given index. */ |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 50 | typedef u32 (vhash_key_function_t) (void *state, u32 vector_index, |
| 51 | u32 key_word_index); |
| 52 | typedef u32x4 (vhash_4key_function_t) (void *state, u32 vector_index, |
| 53 | u32 key_word_index); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 54 | /* Sets/gets result of hash lookup. */ |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 55 | typedef u32 (vhash_result_function_t) (void *state, u32 vector_index, |
| 56 | u32 result, u32 n_key_u32); |
| 57 | typedef u32x4 (vhash_4result_function_t) (void *state, u32 vector_index, |
| 58 | u32x4 results, u32 n_key_u32); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 59 | |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 60 | typedef struct |
| 61 | { |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 62 | u32x4_union_t hashed_key[3]; |
| 63 | } vhash_hashed_key_t; |
| 64 | |
| 65 | /* Search buckets are really this structure. */ |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 66 | typedef struct |
| 67 | { |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 68 | /* 4 results for this bucket. |
| 69 | Zero is used to mark empty results. This means user can't use the result ~0 |
| 70 | since user results differ from internal results stored in buckets by 1. |
| 71 | e.g. internal result = user result + 1. */ |
| 72 | u32x4_union_t result; |
| 73 | |
| 74 | /* n_key_u32s u32x4s of key data follow. */ |
| 75 | u32x4_union_t key[0]; |
| 76 | } vhash_search_bucket_t; |
| 77 | |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 78 | typedef struct |
| 79 | { |
| 80 | u32x4_union_t *search_buckets; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 81 | |
| 82 | /* Vector of bucket free indices. */ |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 83 | u32 *free_indices; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 84 | |
| 85 | /* Number of entries in this overflow bucket. */ |
| 86 | u32 n_overflow; |
| 87 | } vhash_overflow_buckets_t; |
| 88 | |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 89 | typedef struct |
| 90 | { |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 91 | /* 2^log2_n_keys keys grouped in groups of 4. |
| 92 | Each bucket contains 4 results plus 4 keys for a |
| 93 | total of (1 + n_key_u32) u32x4s. */ |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 94 | u32x4_union_t *search_buckets; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 95 | |
| 96 | /* When a bucket of 4 results/keys are full we search |
| 97 | the overflow. hash_key is used to select which overflow |
| 98 | bucket. */ |
| 99 | vhash_overflow_buckets_t overflow_buckets[16]; |
| 100 | |
| 101 | /* Total count of occupied elements in hash table. */ |
| 102 | u32 n_elts; |
| 103 | |
| 104 | u32 log2_n_keys; |
| 105 | |
| 106 | /* Number of 32 bit words in a hash key. */ |
| 107 | u32 n_key_u32; |
| 108 | |
| 109 | u32x4_union_t bucket_mask; |
| 110 | |
| 111 | /* table[i] = min_log2 (first_set (~i)). */ |
| 112 | u8 find_first_zero_table[16]; |
| 113 | |
| 114 | /* Hash seeds for Jenkins hash. */ |
| 115 | u32x4_union_t hash_seeds[3]; |
| 116 | |
| 117 | /* Key work space is a vector of length |
| 118 | n_key_u32s << log2_n_key_word_len_u32x. */ |
| 119 | u32 log2_n_key_word_len_u32x; |
| 120 | |
| 121 | /* Work space to store keys between pipeline stages. */ |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 122 | u32x4_union_t *key_work_space; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 123 | |
| 124 | /* Hash work space to store Jenkins hash values between |
| 125 | pipeline stages. */ |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 126 | vhash_hashed_key_t *hash_work_space; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 127 | } vhash_t; |
| 128 | |
| 129 | always_inline vhash_overflow_buckets_t * |
| 130 | vhash_get_overflow_buckets (vhash_t * h, u32 key) |
| 131 | { |
| 132 | u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf); |
| 133 | ASSERT (i < ARRAY_LEN (h->overflow_buckets)); |
| 134 | return h->overflow_buckets + i; |
| 135 | } |
| 136 | |
| 137 | always_inline uword |
| 138 | vhash_is_non_empty_overflow_bucket (vhash_t * h, u32 key) |
| 139 | { |
| 140 | u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf); |
| 141 | ASSERT (i < ARRAY_LEN (h->overflow_buckets)); |
| 142 | return h->overflow_buckets[i].n_overflow > 0; |
| 143 | } |
| 144 | |
| 145 | always_inline void |
| 146 | vhash_free_overflow_buckets (vhash_overflow_buckets_t * obs) |
| 147 | { |
| 148 | vec_free (obs->search_buckets); |
| 149 | vec_free (obs->free_indices); |
| 150 | } |
| 151 | |
| 152 | always_inline void |
| 153 | vhash_free (vhash_t * h) |
| 154 | { |
| 155 | uword i; |
| 156 | for (i = 0; i < ARRAY_LEN (h->overflow_buckets); i++) |
| 157 | vhash_free_overflow_buckets (&h->overflow_buckets[i]); |
| 158 | vec_free (h->search_buckets); |
| 159 | vec_free (h->key_work_space); |
| 160 | vec_free (h->hash_work_space); |
| 161 | } |
| 162 | |
| 163 | always_inline void |
| 164 | vhash_set_key_word (vhash_t * h, u32 wi, u32 vi, u32 value) |
| 165 | { |
| 166 | u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4); |
| 167 | u32 i1 = vi % 4; |
| 168 | vec_elt (h->key_work_space, i0).as_u32[i1] = value; |
| 169 | } |
| 170 | |
| 171 | always_inline void |
| 172 | vhash_set_key_word_u32x (vhash_t * h, u32 wi, u32 vi, u32x value) |
| 173 | { |
| 174 | u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4); |
| 175 | vec_elt (h->key_work_space, i0).as_u32x4 = value; |
| 176 | } |
| 177 | |
| 178 | always_inline u32 |
| 179 | vhash_get_key_word (vhash_t * h, u32 wi, u32 vi) |
| 180 | { |
| 181 | u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4); |
| 182 | u32 i1 = vi % 4; |
| 183 | return vec_elt (h->key_work_space, i0).as_u32[i1]; |
| 184 | } |
| 185 | |
| 186 | always_inline u32x |
| 187 | vhash_get_key_word_u32x (vhash_t * h, u32 wi, u32 vi) |
| 188 | { |
| 189 | u32 i0 = (wi << h->log2_n_key_word_len_u32x) + vi; |
| 190 | return vec_elt (h->key_work_space, i0).as_u32x4; |
| 191 | } |
| 192 | |
| 193 | always_inline void |
| 194 | vhash_validate_sizes (vhash_t * h, u32 n_key_u32, u32 n_vectors) |
| 195 | { |
| 196 | u32 n, l; |
| 197 | |
| 198 | n = max_pow2 (n_vectors) / 4; |
| 199 | n = clib_max (n, 8); |
| 200 | |
| 201 | h->log2_n_key_word_len_u32x = l = min_log2 (n); |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 202 | vec_validate_aligned (h->key_work_space, (n_key_u32 << l) - 1, |
| 203 | CLIB_CACHE_LINE_BYTES); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 204 | vec_validate_aligned (h->hash_work_space, n - 1, CLIB_CACHE_LINE_BYTES); |
| 205 | } |
| 206 | |
| 207 | always_inline void |
| 208 | vhash_gather_key_stage (vhash_t * h, |
| 209 | u32 vector_index, |
| 210 | u32 n_vectors, |
| 211 | vhash_key_function_t key_function, |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 212 | void *state, u32 n_key_u32s) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 213 | { |
| 214 | u32 i, j, vi; |
| 215 | |
| 216 | /* Gather keys for 4 packets (for 128 bit vector length e.g. u32x4). */ |
| 217 | for (i = 0; i < n_vectors; i++) |
| 218 | { |
| 219 | vi = vector_index * 4 + i; |
| 220 | for (j = 0; j < n_key_u32s; j++) |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 221 | vhash_set_key_word (h, j, vi, key_function (state, vi, j)); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 222 | } |
| 223 | } |
| 224 | |
| 225 | always_inline void |
| 226 | vhash_gather_4key_stage (vhash_t * h, |
| 227 | u32 vector_index, |
| 228 | vhash_4key_function_t key_function, |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 229 | void *state, u32 n_key_u32s) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 230 | { |
| 231 | u32 j, vi; |
| 232 | vi = vector_index * 4; |
| 233 | for (j = 0; j < n_key_u32s; j++) |
| 234 | vhash_set_key_word_u32x (h, j, vi, key_function (state, vi, j)); |
| 235 | } |
| 236 | |
| 237 | always_inline void |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 238 | vhash_mix_stage (vhash_t * h, u32 vector_index, u32 n_key_u32s) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 239 | { |
| 240 | i32 i, n_left; |
| 241 | u32x a, b, c; |
| 242 | |
| 243 | /* Only need to do this for keys longer than 12 bytes. */ |
| 244 | ASSERT (n_key_u32s > 3); |
| 245 | |
| 246 | a = h->hash_seeds[0].as_u32x4; |
| 247 | b = h->hash_seeds[1].as_u32x4; |
| 248 | c = h->hash_seeds[2].as_u32x4; |
| 249 | for (i = 0, n_left = n_key_u32s - 3; n_left > 0; n_left -= 3, i += 3) |
| 250 | { |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 251 | a += |
| 252 | vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 0), vector_index); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 253 | if (n_left > 1) |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 254 | b += |
| 255 | vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 1), vector_index); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 256 | if (n_left > 2) |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 257 | c += |
| 258 | vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 2), vector_index); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 259 | |
| 260 | hash_v3_mix_u32x (a, b, c); |
| 261 | } |
| 262 | |
| 263 | /* Save away a, b, c for later finalize. */ |
| 264 | { |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 265 | vhash_hashed_key_t *hk = |
| 266 | vec_elt_at_index (h->hash_work_space, vector_index); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 267 | hk->hashed_key[0].as_u32x4 = a; |
| 268 | hk->hashed_key[1].as_u32x4 = b; |
| 269 | hk->hashed_key[2].as_u32x4 = c; |
| 270 | } |
| 271 | } |
| 272 | |
| 273 | always_inline vhash_search_bucket_t * |
| 274 | vhash_get_search_bucket_with_index (vhash_t * h, u32 i, u32 n_key_u32s) |
| 275 | { |
| 276 | return ((vhash_search_bucket_t *) |
| 277 | vec_elt_at_index (h->search_buckets, |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 278 | (i / 4) * |
| 279 | ((sizeof (vhash_search_bucket_t) / |
| 280 | sizeof (u32x4)) + n_key_u32s))); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 281 | } |
| 282 | |
| 283 | always_inline vhash_search_bucket_t * |
| 284 | vhash_get_search_bucket (vhash_t * h, u32 key_hash, u32 n_key_u32s) |
| 285 | { |
| 286 | u32 i = key_hash & h->bucket_mask.as_u32[0]; |
| 287 | return vhash_get_search_bucket_with_index (h, i, n_key_u32s); |
| 288 | } |
| 289 | |
| 290 | always_inline u32x4 |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 291 | vhash_get_4_search_bucket_byte_offsets (vhash_t * h, u32x4 key_hash, |
| 292 | u32 n_key_u32s) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 293 | { |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 294 | vhash_search_bucket_t *b; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 295 | u32 n_bytes_per_bucket = sizeof (b[0]) + n_key_u32s * sizeof (b->key[0]); |
| 296 | u32x4 r = key_hash & h->bucket_mask.as_u32x4; |
| 297 | |
| 298 | /* Multiply with shifts and adds to get bucket byte offset. */ |
| 299 | #define _(x) u32x4_ishift_left (r, (x) - 2) |
| 300 | if (n_bytes_per_bucket == (1 << 5)) |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 301 | r = _(5); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 302 | else if (n_bytes_per_bucket == ((1 << 5) + (1 << 4))) |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 303 | r = _(5) + _(4); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 304 | else if (n_bytes_per_bucket == (1 << 6)) |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 305 | r = _(6); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 306 | else if (n_bytes_per_bucket == ((1 << 6) + (1 << 4))) |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 307 | r = _(6) + _(4); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 308 | else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5))) |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 309 | r = _(6) + _(5); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 310 | else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5) + (1 << 4))) |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 311 | r = _(6) + _(5) + _(4); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 312 | else |
| 313 | ASSERT (0); |
| 314 | #undef _ |
| 315 | return r; |
| 316 | } |
| 317 | |
| 318 | always_inline void |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 319 | vhash_finalize_stage (vhash_t * h, u32 vector_index, u32 n_key_u32s) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 320 | { |
| 321 | i32 n_left; |
| 322 | u32x a, b, c; |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 323 | vhash_hashed_key_t *hk = |
| 324 | vec_elt_at_index (h->hash_work_space, vector_index); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 325 | |
| 326 | if (n_key_u32s <= 3) |
| 327 | { |
| 328 | a = h->hash_seeds[0].as_u32x4; |
| 329 | b = h->hash_seeds[1].as_u32x4; |
| 330 | c = h->hash_seeds[2].as_u32x4; |
| 331 | n_left = n_key_u32s; |
| 332 | } |
| 333 | else |
| 334 | { |
| 335 | a = hk->hashed_key[0].as_u32x4; |
| 336 | b = hk->hashed_key[1].as_u32x4; |
| 337 | c = hk->hashed_key[2].as_u32x4; |
| 338 | n_left = 3; |
| 339 | } |
| 340 | |
| 341 | if (n_left > 0) |
| 342 | a += vhash_get_key_word_u32x (h, 0, vector_index); |
| 343 | if (n_left > 1) |
| 344 | b += vhash_get_key_word_u32x (h, 1, vector_index); |
| 345 | if (n_left > 2) |
| 346 | c += vhash_get_key_word_u32x (h, 2, vector_index); |
| 347 | |
| 348 | hash_v3_finalize_u32x (a, b, c); |
| 349 | |
| 350 | /* Only save away last 32 bits of hash code. */ |
| 351 | hk->hashed_key[2].as_u32x4 = c; |
| 352 | |
| 353 | /* Prefetch buckets. This costs a bit for small tables but saves |
| 354 | big for large ones. */ |
| 355 | { |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 356 | vhash_search_bucket_t *b0, *b1, *b2, *b3; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 357 | u32x4_union_t kh; |
| 358 | |
| 359 | kh.as_u32x4 = vhash_get_4_search_bucket_byte_offsets (h, c, n_key_u32s); |
| 360 | hk->hashed_key[1].as_u32x4 = kh.as_u32x4; |
| 361 | |
| 362 | b0 = (void *) h->search_buckets + kh.as_u32[0]; |
| 363 | b1 = (void *) h->search_buckets + kh.as_u32[1]; |
| 364 | b2 = (void *) h->search_buckets + kh.as_u32[2]; |
| 365 | b3 = (void *) h->search_buckets + kh.as_u32[3]; |
| 366 | |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 367 | CLIB_PREFETCH (b0, sizeof (b0[0]) + n_key_u32s * sizeof (b0->key[0]), |
| 368 | READ); |
| 369 | CLIB_PREFETCH (b1, sizeof (b1[0]) + n_key_u32s * sizeof (b1->key[0]), |
| 370 | READ); |
| 371 | CLIB_PREFETCH (b2, sizeof (b2[0]) + n_key_u32s * sizeof (b2->key[0]), |
| 372 | READ); |
| 373 | CLIB_PREFETCH (b3, sizeof (b3[0]) + n_key_u32s * sizeof (b3->key[0]), |
| 374 | READ); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 375 | } |
| 376 | } |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 377 | |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 378 | always_inline u32 |
| 379 | vhash_merge_results (u32x4 r) |
| 380 | { |
| 381 | r = r | u32x4_word_shift_right (r, 2); |
| 382 | r = r | u32x4_word_shift_right (r, 1); |
| 383 | return u32x4_get0 (r); |
| 384 | } |
| 385 | |
| 386 | /* Bucket is full if none of its 4 results are 0. */ |
| 387 | always_inline u32 |
| 388 | vhash_search_bucket_is_full (u32x4 r) |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 389 | { |
| 390 | return u32x4_zero_byte_mask (r) == 0; |
| 391 | } |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 392 | |
| 393 | always_inline u32 |
| 394 | vhash_non_empty_result_index (u32x4 x) |
| 395 | { |
| 396 | u32 empty_mask = u32x4_zero_byte_mask (x); |
| 397 | ASSERT (empty_mask != 0xffff); |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 398 | return min_log2 (0xffff & ~empty_mask) / 4; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 399 | } |
| 400 | |
| 401 | always_inline u32 |
| 402 | vhash_empty_result_index (u32x4 x) |
| 403 | { |
| 404 | u32 empty_mask = u32x4_zero_byte_mask (x); |
| 405 | ASSERT (empty_mask != 0); |
| 406 | return min_log2 (0xffff & empty_mask) / 4; |
| 407 | } |
| 408 | |
| 409 | always_inline u32x4 |
| 410 | vhash_bucket_compare (vhash_t * h, |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 411 | u32x4_union_t * bucket, u32 key_word_index, u32 vi) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 412 | { |
| 413 | u32 k = vhash_get_key_word (h, key_word_index, vi); |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 414 | u32x4 x = { k, k, k, k }; |
Damjan Marion | a52e166 | 2018-05-19 00:04:23 +0200 | [diff] [blame] | 415 | return (bucket[key_word_index].as_u32x4 == x); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 416 | } |
| 417 | |
| 418 | #define vhash_bucket_compare_4(h,wi,vi,b0,b1,b2,b3,cmp0,cmp1,cmp2,cmp3) \ |
| 419 | do { \ |
| 420 | u32x4 _k4 = vhash_get_key_word_u32x ((h), (wi), (vi)); \ |
| 421 | u32x4 _k0 = u32x4_splat_word (_k4, 0); \ |
| 422 | u32x4 _k1 = u32x4_splat_word (_k4, 1); \ |
| 423 | u32x4 _k2 = u32x4_splat_word (_k4, 2); \ |
| 424 | u32x4 _k3 = u32x4_splat_word (_k4, 3); \ |
| 425 | \ |
Damjan Marion | a52e166 | 2018-05-19 00:04:23 +0200 | [diff] [blame] | 426 | cmp0 = (b0->key[wi].as_u32x4 == _k0); \ |
| 427 | cmp1 = (b1->key[wi].as_u32x4 == _k1); \ |
| 428 | cmp2 = (b2->key[wi].as_u32x4 == _k2); \ |
| 429 | cmp3 = (b3->key[wi].as_u32x4 == _k3); \ |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 430 | } while (0) |
| 431 | |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 432 | u32 vhash_get_overflow (vhash_t * h, u32 key_hash, u32 vi, u32 n_key_u32s); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 433 | |
| 434 | always_inline void |
| 435 | vhash_get_stage (vhash_t * h, |
| 436 | u32 vector_index, |
| 437 | u32 n_vectors, |
| 438 | vhash_result_function_t result_function, |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 439 | void *state, u32 n_key_u32s) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 440 | { |
| 441 | u32 i, j; |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 442 | vhash_hashed_key_t *hk = |
| 443 | vec_elt_at_index (h->hash_work_space, vector_index); |
| 444 | vhash_search_bucket_t *b; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 445 | |
| 446 | for (i = 0; i < n_vectors; i++) |
| 447 | { |
| 448 | u32 vi = vector_index * 4 + i; |
| 449 | u32 key_hash = hk->hashed_key[2].as_u32[i]; |
| 450 | u32 result; |
| 451 | u32x4 r, r0; |
| 452 | |
| 453 | b = vhash_get_search_bucket (h, key_hash, n_key_u32s); |
| 454 | |
| 455 | r = r0 = b->result.as_u32x4; |
| 456 | for (j = 0; j < n_key_u32s; j++) |
| 457 | r &= vhash_bucket_compare (h, &b->key[0], j, vi); |
| 458 | |
| 459 | /* At this point only one of 4 results should be non-zero. |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 460 | So we can or all 4 together and get the valid result (if there is one). */ |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 461 | result = vhash_merge_results (r); |
| 462 | |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 463 | if (!result && vhash_search_bucket_is_full (r0)) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 464 | result = vhash_get_overflow (h, key_hash, vi, n_key_u32s); |
| 465 | |
| 466 | result_function (state, vi, result - 1, n_key_u32s); |
| 467 | } |
| 468 | } |
| 469 | |
| 470 | always_inline void |
| 471 | vhash_get_4_stage (vhash_t * h, |
| 472 | u32 vector_index, |
| 473 | vhash_4result_function_t result_function, |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 474 | void *state, u32 n_key_u32s) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 475 | { |
| 476 | u32 i, vi; |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 477 | vhash_hashed_key_t *hk = |
| 478 | vec_elt_at_index (h->hash_work_space, vector_index); |
| 479 | vhash_search_bucket_t *b0, *b1, *b2, *b3; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 480 | u32x4 r0, r1, r2, r3, r0_before, r1_before, r2_before, r3_before; |
| 481 | u32x4_union_t kh; |
| 482 | |
| 483 | kh.as_u32x4 = hk->hashed_key[1].as_u32x4; |
| 484 | |
| 485 | b0 = (void *) h->search_buckets + kh.as_u32[0]; |
| 486 | b1 = (void *) h->search_buckets + kh.as_u32[1]; |
| 487 | b2 = (void *) h->search_buckets + kh.as_u32[2]; |
| 488 | b3 = (void *) h->search_buckets + kh.as_u32[3]; |
| 489 | |
| 490 | r0 = r0_before = b0->result.as_u32x4; |
| 491 | r1 = r1_before = b1->result.as_u32x4; |
| 492 | r2 = r2_before = b2->result.as_u32x4; |
| 493 | r3 = r3_before = b3->result.as_u32x4; |
| 494 | |
| 495 | vi = vector_index * 4; |
| 496 | |
| 497 | for (i = 0; i < n_key_u32s; i++) |
| 498 | { |
| 499 | u32x4 c0, c1, c2, c3; |
| 500 | vhash_bucket_compare_4 (h, i, vector_index, |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 501 | b0, b1, b2, b3, c0, c1, c2, c3); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 502 | r0 &= c0; |
| 503 | r1 &= c1; |
| 504 | r2 &= c2; |
| 505 | r3 &= c3; |
| 506 | } |
| 507 | |
| 508 | u32x4_transpose (r0, r1, r2, r3); |
| 509 | |
| 510 | /* Gather together 4 results. */ |
| 511 | { |
| 512 | u32x4_union_t r; |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 513 | u32x4 ones = { 1, 1, 1, 1 }; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 514 | u32 not_found_mask; |
| 515 | |
| 516 | r.as_u32x4 = r0 | r1 | r2 | r3; |
| 517 | not_found_mask = u32x4_zero_byte_mask (r.as_u32x4); |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 518 | not_found_mask &= ((vhash_search_bucket_is_full (r0_before) << (4 * 0)) |
| 519 | | (vhash_search_bucket_is_full (r1_before) << (4 * 1)) |
| 520 | | (vhash_search_bucket_is_full (r2_before) << (4 * 2)) |
| 521 | | (vhash_search_bucket_is_full (r3_before) << |
| 522 | (4 * 3))); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 523 | if (not_found_mask) |
| 524 | { |
| 525 | u32x4_union_t key_hash; |
| 526 | |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 527 | key_hash.as_u32x4 = |
| 528 | hk->hashed_key[2].as_u32x4 & h->bucket_mask.as_u32x4; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 529 | |
| 530 | /* Slow path: one of the buckets may have been full and we need to search overflow. */ |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 531 | if (not_found_mask & (1 << (4 * 0))) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 532 | r.as_u32[0] = vhash_get_overflow (h, key_hash.as_u32[0], |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 533 | vi + 0, n_key_u32s); |
| 534 | if (not_found_mask & (1 << (4 * 1))) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 535 | r.as_u32[1] = vhash_get_overflow (h, key_hash.as_u32[1], |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 536 | vi + 1, n_key_u32s); |
| 537 | if (not_found_mask & (1 << (4 * 2))) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 538 | r.as_u32[2] = vhash_get_overflow (h, key_hash.as_u32[2], |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 539 | vi + 2, n_key_u32s); |
| 540 | if (not_found_mask & (1 << (4 * 3))) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 541 | r.as_u32[3] = vhash_get_overflow (h, key_hash.as_u32[3], |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 542 | vi + 3, n_key_u32s); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 543 | } |
| 544 | |
| 545 | result_function (state, vi, r.as_u32x4 - ones, n_key_u32s); |
| 546 | } |
| 547 | } |
| 548 | |
| 549 | u32 |
| 550 | vhash_set_overflow (vhash_t * h, |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 551 | u32 key_hash, u32 vi, u32 new_result, u32 n_key_u32s); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 552 | |
| 553 | always_inline void |
| 554 | vhash_set_stage (vhash_t * h, |
| 555 | u32 vector_index, |
| 556 | u32 n_vectors, |
| 557 | vhash_result_function_t result_function, |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 558 | void *state, u32 n_key_u32s) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 559 | { |
| 560 | u32 i, j, n_new_elts = 0; |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 561 | vhash_hashed_key_t *hk = |
| 562 | vec_elt_at_index (h->hash_work_space, vector_index); |
| 563 | vhash_search_bucket_t *b; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 564 | |
| 565 | for (i = 0; i < n_vectors; i++) |
| 566 | { |
| 567 | u32 vi = vector_index * 4 + i; |
| 568 | u32 key_hash = hk->hashed_key[2].as_u32[i]; |
| 569 | u32 old_result, new_result; |
| 570 | u32 i_set; |
| 571 | u32x4 r, r0, cmp; |
| 572 | |
| 573 | b = vhash_get_search_bucket (h, key_hash, n_key_u32s); |
| 574 | |
| 575 | cmp = vhash_bucket_compare (h, &b->key[0], 0, vi); |
| 576 | for (j = 1; j < n_key_u32s; j++) |
| 577 | cmp &= vhash_bucket_compare (h, &b->key[0], j, vi); |
| 578 | |
| 579 | r0 = b->result.as_u32x4; |
| 580 | r = r0 & cmp; |
| 581 | |
| 582 | /* At this point only one of 4 results should be non-zero. |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 583 | So we can or all 4 together and get the valid result (if there is one). */ |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 584 | old_result = vhash_merge_results (r); |
| 585 | |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 586 | if (!old_result && vhash_search_bucket_is_full (r0)) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 587 | old_result = vhash_get_overflow (h, key_hash, vi, n_key_u32s); |
| 588 | |
| 589 | /* Get new result; possibly do something with old result. */ |
| 590 | new_result = result_function (state, vi, old_result - 1, n_key_u32s); |
| 591 | |
| 592 | /* User cannot use ~0 as a hash result since a result of 0 is |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 593 | used to mark unused bucket entries. */ |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 594 | ASSERT (new_result + 1 != 0); |
| 595 | new_result += 1; |
| 596 | |
| 597 | /* Set over-writes existing result. */ |
| 598 | if (old_result) |
| 599 | { |
| 600 | i_set = vhash_non_empty_result_index (r); |
| 601 | b->result.as_u32[i_set] = new_result; |
| 602 | } |
| 603 | else |
| 604 | { |
| 605 | /* Set allocates new result. */ |
| 606 | u32 valid_mask; |
| 607 | |
| 608 | valid_mask = (((b->result.as_u32[0] != 0) << 0) |
| 609 | | ((b->result.as_u32[1] != 0) << 1) |
| 610 | | ((b->result.as_u32[2] != 0) << 2) |
| 611 | | ((b->result.as_u32[3] != 0) << 3)); |
| 612 | |
| 613 | /* Rotate 4 bit valid mask so that key_hash corresponds to bit 0. */ |
| 614 | i_set = key_hash & 3; |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 615 | valid_mask = |
| 616 | ((valid_mask >> i_set) | (valid_mask << (4 - i_set))) & 0xf; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 617 | |
| 618 | /* Insert into first empty position in bucket after key_hash. */ |
| 619 | i_set = (i_set + h->find_first_zero_table[valid_mask]) & 3; |
| 620 | |
| 621 | if (valid_mask != 0xf) |
| 622 | { |
| 623 | n_new_elts += 1; |
| 624 | |
| 625 | b->result.as_u32[i_set] = new_result; |
| 626 | |
| 627 | /* Insert new key into search bucket. */ |
| 628 | for (j = 0; j < n_key_u32s; j++) |
| 629 | b->key[j].as_u32[i_set] = vhash_get_key_word (h, j, vi); |
| 630 | } |
| 631 | else |
| 632 | vhash_set_overflow (h, key_hash, vi, new_result, n_key_u32s); |
| 633 | } |
| 634 | } |
| 635 | |
| 636 | h->n_elts += n_new_elts; |
| 637 | } |
| 638 | |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 639 | u32 vhash_unset_overflow (vhash_t * h, u32 key_hash, u32 vi, u32 n_key_u32s); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 640 | |
| 641 | void |
| 642 | vhash_unset_refill_from_overflow (vhash_t * h, |
| 643 | vhash_search_bucket_t * b, |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 644 | u32 key_hash, u32 n_key_u32s); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 645 | |
| 646 | /* Note: Eliot tried doing 4 unsets at once and could not get a speed up |
| 647 | and abandoned vhash_unset_4_stage. */ |
| 648 | always_inline void |
| 649 | vhash_unset_stage (vhash_t * h, |
| 650 | u32 vector_index, |
| 651 | u32 n_vectors, |
| 652 | vhash_result_function_t result_function, |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 653 | void *state, u32 n_key_u32s) |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 654 | { |
| 655 | u32 i, j, n_elts_unset = 0; |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 656 | vhash_hashed_key_t *hk = |
| 657 | vec_elt_at_index (h->hash_work_space, vector_index); |
| 658 | vhash_search_bucket_t *b; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 659 | |
| 660 | for (i = 0; i < n_vectors; i++) |
| 661 | { |
| 662 | u32 vi = vector_index * 4 + i; |
| 663 | u32 key_hash = hk->hashed_key[2].as_u32[i]; |
| 664 | u32 old_result; |
| 665 | u32x4 cmp, r0; |
| 666 | |
| 667 | b = vhash_get_search_bucket (h, key_hash, n_key_u32s); |
| 668 | |
| 669 | cmp = vhash_bucket_compare (h, &b->key[0], 0, vi); |
| 670 | for (j = 1; j < n_key_u32s; j++) |
| 671 | cmp &= vhash_bucket_compare (h, &b->key[0], j, vi); |
| 672 | |
| 673 | r0 = b->result.as_u32x4; |
| 674 | |
| 675 | /* At this point cmp is all ones where key matches and zero otherwise. |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 676 | So, this will invalidate results for matching key and do nothing otherwise. */ |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 677 | b->result.as_u32x4 = r0 & ~cmp; |
| 678 | |
| 679 | old_result = vhash_merge_results (r0 & cmp); |
| 680 | |
| 681 | n_elts_unset += old_result != 0; |
| 682 | |
| 683 | if (vhash_search_bucket_is_full (r0)) |
| 684 | { |
| 685 | if (old_result) |
| 686 | vhash_unset_refill_from_overflow (h, b, key_hash, n_key_u32s); |
| 687 | else |
| 688 | old_result = vhash_unset_overflow (h, key_hash, vi, n_key_u32s); |
| 689 | } |
| 690 | |
| 691 | result_function (state, vi, old_result - 1, n_key_u32s); |
| 692 | } |
| 693 | ASSERT (h->n_elts >= n_elts_unset); |
| 694 | h->n_elts -= n_elts_unset; |
| 695 | } |
| 696 | |
| 697 | void vhash_init (vhash_t * h, u32 log2_n_keys, u32 n_key_u32, |
| 698 | u32 * hash_seeds); |
| 699 | |
| 700 | void vhash_resize (vhash_t * old, u32 log2_n_keys); |
| 701 | |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 702 | typedef struct |
| 703 | { |
| 704 | vhash_t *vhash; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 705 | |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 706 | union |
| 707 | { |
| 708 | struct |
| 709 | { |
| 710 | u32 *keys; |
| 711 | u32 *results; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 712 | }; |
| 713 | |
| 714 | /* Vector layout for get keys. */ |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 715 | struct |
| 716 | { |
| 717 | u32x4_union_t *get_keys; |
| 718 | u32x4_union_t *get_results; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 719 | }; |
| 720 | }; |
| 721 | |
| 722 | u32 n_vectors_div_4; |
| 723 | u32 n_vectors_mod_4; |
| 724 | |
| 725 | u32 n_key_u32; |
| 726 | |
| 727 | u32 n_keys; |
| 728 | } vhash_main_t; |
| 729 | |
| 730 | always_inline u32 |
| 731 | vhash_get_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32) |
| 732 | { |
| 733 | u32 i, n; |
| 734 | |
| 735 | i = vm->n_keys; |
| 736 | vm->n_keys = i + n_keys; |
| 737 | |
| 738 | n = (round_pow2 (vm->n_keys, 4) / 4) * n_key_u32; |
| 739 | |
| 740 | vec_validate_aligned (vm->get_keys, n - 1, sizeof (vm->get_keys[0])); |
| 741 | vec_validate_aligned (vm->get_results, n - 1, sizeof (vm->get_results[0])); |
| 742 | |
| 743 | return i; |
| 744 | } |
| 745 | |
| 746 | always_inline void |
| 747 | vhash_get_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32, |
| 748 | u32 value) |
| 749 | { |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 750 | u32x4_union_t *k = vec_elt_at_index (vm->get_keys, (vi / 4) * n_key_u32); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 751 | ASSERT (wi < n_key_u32); |
| 752 | k[wi].as_u32[vi % 4] = value; |
| 753 | } |
| 754 | |
| 755 | always_inline u32 |
| 756 | vhash_get_fetch_result (vhash_main_t * vm, u32 vi) |
| 757 | { |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 758 | u32x4_union_t *r = vec_elt_at_index (vm->get_results, vi / 4); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 759 | return r->as_u32[vi % 4]; |
| 760 | } |
| 761 | |
| 762 | void vhash_main_get (vhash_main_t * vm); |
| 763 | |
| 764 | always_inline u32 |
| 765 | vhash_set_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32) |
| 766 | { |
| 767 | u32 i; |
| 768 | |
| 769 | i = vm->n_keys; |
| 770 | vm->n_keys = i + n_keys; |
| 771 | |
| 772 | vec_resize (vm->keys, n_keys * n_key_u32); |
| 773 | vec_resize (vm->results, n_keys); |
| 774 | |
| 775 | return i; |
| 776 | } |
| 777 | |
| 778 | always_inline void |
| 779 | vhash_set_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32, |
| 780 | u32 value) |
| 781 | { |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 782 | u32 *k = vec_elt_at_index (vm->keys, vi * n_key_u32); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 783 | ASSERT (wi < n_key_u32); |
| 784 | k[wi] = value; |
| 785 | } |
| 786 | |
| 787 | always_inline void |
| 788 | vhash_set_set_result (vhash_main_t * vm, u32 vi, u32 result) |
| 789 | { |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 790 | u32 *r = vec_elt_at_index (vm->results, vi); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 791 | r[0] = result; |
| 792 | } |
| 793 | |
| 794 | always_inline u32 |
| 795 | vhash_set_fetch_old_result (vhash_main_t * vm, u32 vi) |
| 796 | { |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 797 | u32 *r = vec_elt_at_index (vm->results, vi); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 798 | return r[0]; |
| 799 | } |
| 800 | |
| 801 | void vhash_main_set (vhash_main_t * vm); |
| 802 | |
| 803 | always_inline u32 |
| 804 | vhash_unset_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32) |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 805 | { |
| 806 | return vhash_set_alloc_keys (vm, n_keys, n_key_u32); |
| 807 | } |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 808 | |
| 809 | always_inline void |
| 810 | vhash_unset_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32, |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 811 | u32 value) |
| 812 | { |
| 813 | vhash_set_set_key_word (vm, vi, wi, n_key_u32, value); |
| 814 | } |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 815 | |
| 816 | always_inline void |
| 817 | vhash_unset_set_result (vhash_main_t * vm, u32 vi, u32 result) |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 818 | { |
| 819 | vhash_set_set_result (vm, vi, result); |
| 820 | } |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 821 | |
| 822 | always_inline u32 |
| 823 | vhash_unset_fetch_old_result (vhash_main_t * vm, u32 vi) |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 824 | { |
| 825 | return vhash_set_fetch_old_result (vm, vi); |
| 826 | } |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 827 | |
| 828 | void vhash_main_unset (vhash_main_t * vm); |
| 829 | |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 830 | typedef struct |
| 831 | { |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 832 | vhash_main_t new; |
| 833 | |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 834 | vhash_t *old; |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 835 | } vhash_resize_t; |
| 836 | |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 837 | u32 vhash_resize_incremental (vhash_resize_t * vr, u32 vector_index, |
| 838 | u32 n_vectors); |
Ed Warnicke | cb9cada | 2015-12-08 15:45:58 -0700 | [diff] [blame] | 839 | |
| 840 | #endif /* CLIB_HAVE_VEC128 */ |
| 841 | |
| 842 | #endif /* included_clib_vhash_h */ |
Dave Barach | c379999 | 2016-08-15 11:12:27 -0400 | [diff] [blame] | 843 | |
| 844 | /* |
| 845 | * fd.io coding-style-patch-verification: ON |
| 846 | * |
| 847 | * Local Variables: |
| 848 | * eval: (c-set-style "gnu") |
| 849 | * End: |
| 850 | */ |