blob: 7bfcf986e6f0b3d89de1c5b20a2c42f000918f3a [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * ip/ip4_fib.h: ip4 mtrie fib
17 *
18 * Copyright (c) 2012 Eliot Dresselhaus
19 *
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40#include <vnet/ip/ip.h>
Neale Rannsa3af3372017-03-28 03:49:52 -070041#include <vnet/ip/ip4_mtrie.h>
42#include <vnet/fib/ip4_fib.h>
43
44
45/**
46 * Global pool of IPv4 8bit PLYs
47 */
48ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
Ed Warnickecb9cada2015-12-08 15:45:58 -070049
Neale Ranns04a75e32017-03-23 06:46:01 -070050always_inline u32
Neale Rannsa3af3372017-03-28 03:49:52 -070051ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte)
Ed Warnickecb9cada2015-12-08 15:45:58 -070052{
Neale Ranns04a75e32017-03-23 06:46:01 -070053 /*
54 * It's 'non-empty' if the length of the leaf stored is greater than the
55 * length of a leaf in the covering ply. i.e. the leaf is more specific
56 * than it's would be cover in the covering ply
57 */
58 if (p->dst_address_bits_of_leaves[dst_byte] > p->dst_address_bits_base)
59 return (1);
60 return (0);
61}
62
63always_inline ip4_fib_mtrie_leaf_t
64ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index)
65{
66 ip4_fib_mtrie_leaf_t l;
67 l = 1 + 2 * adj_index;
68 ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index);
69 return l;
70}
71
72always_inline u32
73ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n)
74{
75 return (n & 1) == 0;
76}
77
78always_inline u32
79ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n)
80{
81 ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n));
82 return n >> 1;
83}
84
85always_inline ip4_fib_mtrie_leaf_t
86ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
87{
88 ip4_fib_mtrie_leaf_t l;
89 l = 0 + 2 * i;
90 ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i);
91 return l;
92}
93
Neale Rannsa3af3372017-03-28 03:49:52 -070094#ifndef __ALTIVEC__
95#define PLY_X4_SPLAT_INIT(init_x4, init) \
96 init_x4 = u32x4_splat (init);
97#else
98#define PLY_X4_SPLAT_INIT(init_x4, init) \
99{ \
100 u32x4_union_t y; \
101 y.as_u32[0] = init; \
102 y.as_u32[1] = init; \
103 y.as_u32[2] = init; \
104 y.as_u32[3] = init; \
105 init_x4 = y.as_u32x4; \
106}
107#endif
108
109#ifdef CLIB_HAVE_VEC128
110#define PLY_INIT_LEAVES(p) \
111{ \
112 u32x4 *l, init_x4; \
113 \
114 PLY_X4_SPLAT_INIT(init_x4, init); \
115 for (l = p->leaves_as_u32x4; \
116 l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); \
117 l += 4) \
118 { \
119 l[0] = init_x4; \
120 l[1] = init_x4; \
121 l[2] = init_x4; \
122 l[3] = init_x4; \
123 } \
124}
125#else
126#define PLY_INIT_LEAVES(p) \
127{ \
128 u32 *l; \
129 \
130 for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) \
131 { \
132 l[0] = init; \
133 l[1] = init; \
134 l[2] = init; \
135 l[3] = init; \
136 } \
137}
138#endif
139
140#define PLY_INIT(p, init, prefix_len, ply_base_len) \
141{ \
142 /* \
143 * A leaf is 'empty' if it represents a leaf from the covering PLY \
144 * i.e. if the prefix length of the leaf is less than or equal to \
145 * the prefix length of the PLY \
146 */ \
147 p->n_non_empty_leafs = (prefix_len > ply_base_len ? \
148 ARRAY_LEN (p->leaves) : 0); \
Dave Barachb7b92992018-10-17 10:38:51 -0400149 clib_memset (p->dst_address_bits_of_leaves, prefix_len, \
Neale Rannsa3af3372017-03-28 03:49:52 -0700150 sizeof (p->dst_address_bits_of_leaves)); \
151 p->dst_address_bits_base = ply_base_len; \
152 \
153 /* Initialize leaves. */ \
154 PLY_INIT_LEAVES(p); \
155}
156
Neale Ranns04a75e32017-03-23 06:46:01 -0700157static void
Neale Rannsa3af3372017-03-28 03:49:52 -0700158ply_8_init (ip4_fib_mtrie_8_ply_t * p,
159 ip4_fib_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len)
Neale Ranns04a75e32017-03-23 06:46:01 -0700160{
Neale Rannsa3af3372017-03-28 03:49:52 -0700161 PLY_INIT (p, init, prefix_len, ply_base_len);
162}
163
164static void
165ply_16_init (ip4_fib_mtrie_16_ply_t * p,
166 ip4_fib_mtrie_leaf_t init, uword prefix_len)
167{
Dave Barachb7b92992018-10-17 10:38:51 -0400168 clib_memset (p->dst_address_bits_of_leaves, prefix_len,
169 sizeof (p->dst_address_bits_of_leaves));
Neale Rannsa3af3372017-03-28 03:49:52 -0700170 PLY_INIT_LEAVES (p);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700171}
172
173static ip4_fib_mtrie_leaf_t
Neale Ranns04a75e32017-03-23 06:46:01 -0700174ply_create (ip4_fib_mtrie_t * m,
175 ip4_fib_mtrie_leaf_t init_leaf,
176 u32 leaf_prefix_len, u32 ply_base_len)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700177{
Neale Rannsa3af3372017-03-28 03:49:52 -0700178 ip4_fib_mtrie_8_ply_t *p;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700179 /* Get cache aligned ply. */
Neale Ranns1ec36522017-11-29 05:20:37 -0800180
Neale Rannsa3af3372017-03-28 03:49:52 -0700181 pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700182
Neale Rannsa3af3372017-03-28 03:49:52 -0700183 ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len);
184 return ip4_fib_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700185}
186
Neale Rannsa3af3372017-03-28 03:49:52 -0700187always_inline ip4_fib_mtrie_8_ply_t *
Ed Warnickecb9cada2015-12-08 15:45:58 -0700188get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
189{
190 uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700191
Neale Rannsa3af3372017-03-28 03:49:52 -0700192 return pool_elt_at_index (ip4_ply_pool, n);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700193}
194
Dave Barachd7cb1b52016-12-09 09:52:16 -0500195void
Neale Rannsa3af3372017-03-28 03:49:52 -0700196ip4_mtrie_free (ip4_fib_mtrie_t * m)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700197{
Lijian.Zhang33af8c12019-09-16 16:22:36 +0800198 /* the root ply is embedded so there is nothing to do,
Neale Rannsa3af3372017-03-28 03:49:52 -0700199 * the assumption being that the IP4 FIB table has emptied the trie
200 * before deletion.
201 */
202#if CLIB_DEBUG > 0
203 int i;
204 for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
205 {
206 ASSERT (!ip4_fib_mtrie_leaf_is_next_ply (m->root_ply.leaves[i]));
207 }
208#endif
209}
210
211void
212ip4_mtrie_init (ip4_fib_mtrie_t * m)
213{
214 ply_16_init (&m->root_ply, IP4_FIB_MTRIE_LEAF_EMPTY, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700215}
216
Dave Barachd7cb1b52016-12-09 09:52:16 -0500217typedef struct
218{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700219 ip4_address_t dst_address;
220 u32 dst_address_length;
221 u32 adj_index;
Neale Ranns04a75e32017-03-23 06:46:01 -0700222 u32 cover_address_length;
223 u32 cover_adj_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700224} ip4_fib_mtrie_set_unset_leaf_args_t;
225
226static void
227set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
Neale Rannsa3af3372017-03-28 03:49:52 -0700228 ip4_fib_mtrie_8_ply_t * ply,
Ed Warnickecb9cada2015-12-08 15:45:58 -0700229 ip4_fib_mtrie_leaf_t new_leaf,
230 uword new_leaf_dst_address_bits)
231{
232 ip4_fib_mtrie_leaf_t old_leaf;
233 uword i;
234
235 ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700236
237 for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
238 {
239 old_leaf = ply->leaves[i];
240
241 /* Recurse into sub plies. */
Dave Barachd7cb1b52016-12-09 09:52:16 -0500242 if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700243 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700244 ip4_fib_mtrie_8_ply_t *sub_ply =
245 get_next_ply_for_leaf (m, old_leaf);
Dave Barachd7cb1b52016-12-09 09:52:16 -0500246 set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
247 new_leaf_dst_address_bits);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700248 }
249
250 /* Replace less specific terminal leaves with new leaf. */
Dave Barachd7cb1b52016-12-09 09:52:16 -0500251 else if (new_leaf_dst_address_bits >=
252 ply->dst_address_bits_of_leaves[i])
Ed Warnickecb9cada2015-12-08 15:45:58 -0700253 {
jaszha03ee743762019-09-27 12:52:18 -0500254 clib_atomic_store_rel_n (&ply->leaves[i], new_leaf);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700255 ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
Neale Ranns04a75e32017-03-23 06:46:01 -0700256 ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_non_empty (ply, i);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700257 }
258 }
259}
260
261static void
262set_leaf (ip4_fib_mtrie_t * m,
Neale Rannsa3af3372017-03-28 03:49:52 -0700263 const ip4_fib_mtrie_set_unset_leaf_args_t * a,
Dave Barachd7cb1b52016-12-09 09:52:16 -0500264 u32 old_ply_index, u32 dst_address_byte_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700265{
266 ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
267 i32 n_dst_bits_next_plies;
268 u8 dst_byte;
Neale Rannsa3af3372017-03-28 03:49:52 -0700269 ip4_fib_mtrie_8_ply_t *old_ply;
270
271 old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700272
Neale Rannsf0609302017-04-11 09:13:39 -0700273 ASSERT (a->dst_address_length <= 32);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700274 ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
275
Neale Rannsa3af3372017-03-28 03:49:52 -0700276 /* how many bits of the destination address are in the next PLY */
Dave Barachd7cb1b52016-12-09 09:52:16 -0500277 n_dst_bits_next_plies =
278 a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700279
280 dst_byte = a->dst_address.as_u8[dst_address_byte_index];
281
282 /* Number of bits next plies <= 0 => insert leaves this ply. */
283 if (n_dst_bits_next_plies <= 0)
284 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700285 /* The mask length of the address to insert maps to this ply */
Neale Ranns6ff05492017-06-06 06:52:14 -0700286 uword old_leaf_is_terminal;
287 u32 i, n_dst_bits_this_ply;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700288
Neale Rannsa3af3372017-03-28 03:49:52 -0700289 /* The number of bits, and hence slots/buckets, we will fill */
Neale Ranns04a75e32017-03-23 06:46:01 -0700290 n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies);
Dave Barachd7cb1b52016-12-09 09:52:16 -0500291 ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
292 pow2_mask (n_dst_bits_this_ply)) == 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700293
Neale Rannsa3af3372017-03-28 03:49:52 -0700294 /* Starting at the value of the byte at this section of the v4 address
295 * fill the buckets/slots of the ply */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700296 for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
297 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700298 ip4_fib_mtrie_8_ply_t *new_ply;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700299
300 old_leaf = old_ply->leaves[i];
301 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
302
Ed Warnickecb9cada2015-12-08 15:45:58 -0700303 if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
304 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700305 /* The new leaf is more or equally specific than the one currently
306 * occupying the slot */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700307 new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
308
309 if (old_leaf_is_terminal)
310 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700311 /* The current leaf is terminal, we can replace it with
312 * the new one */
Neale Ranns04a75e32017-03-23 06:46:01 -0700313 old_ply->n_non_empty_leafs -=
314 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
Neale Rannsa3af3372017-03-28 03:49:52 -0700315
Dave Barachd7cb1b52016-12-09 09:52:16 -0500316 old_ply->dst_address_bits_of_leaves[i] =
317 a->dst_address_length;
jaszha03ee743762019-09-27 12:52:18 -0500318 clib_atomic_store_rel_n (&old_ply->leaves[i], new_leaf);
Neale Ranns04a75e32017-03-23 06:46:01 -0700319
Dave Barachd7cb1b52016-12-09 09:52:16 -0500320 old_ply->n_non_empty_leafs +=
Neale Ranns04a75e32017-03-23 06:46:01 -0700321 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
Dave Barachd7cb1b52016-12-09 09:52:16 -0500322 ASSERT (old_ply->n_non_empty_leafs <=
323 ARRAY_LEN (old_ply->leaves));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700324 }
325 else
326 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700327 /* Existing leaf points to another ply. We need to place
328 * new_leaf into all more specific slots. */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700329 new_ply = get_next_ply_for_leaf (m, old_leaf);
Dave Barachd7cb1b52016-12-09 09:52:16 -0500330 set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
331 a->dst_address_length);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700332 }
333 }
Dave Barachd7cb1b52016-12-09 09:52:16 -0500334 else if (!old_leaf_is_terminal)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700335 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700336 /* The current leaf is less specific and not termial (i.e. a ply),
337 * recurse on down the trie */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700338 new_ply = get_next_ply_for_leaf (m, old_leaf);
Neale Rannsa3af3372017-03-28 03:49:52 -0700339 set_leaf (m, a, new_ply - ip4_ply_pool,
Dave Barachd7cb1b52016-12-09 09:52:16 -0500340 dst_address_byte_index + 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700341 }
Neale Rannsa3af3372017-03-28 03:49:52 -0700342 /*
343 * else
344 * the route we are adding is less specific than the leaf currently
345 * occupying this slot. leave it there
346 */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700347 }
348 }
349 else
350 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700351 /* The address to insert requires us to move down at a lower level of
352 * the trie - recurse on down */
353 ip4_fib_mtrie_8_ply_t *new_ply;
Neale Ranns04a75e32017-03-23 06:46:01 -0700354 u8 ply_base_len;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700355
Neale Ranns04a75e32017-03-23 06:46:01 -0700356 ply_base_len = 8 * (dst_address_byte_index + 1);
Neale Rannsa3af3372017-03-28 03:49:52 -0700357
Ed Warnickecb9cada2015-12-08 15:45:58 -0700358 old_leaf = old_ply->leaves[dst_byte];
Neale Rannsa3af3372017-03-28 03:49:52 -0700359
Ed Warnickecb9cada2015-12-08 15:45:58 -0700360 if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
361 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700362 /* There is a leaf occupying the slot. Replace it with a new ply */
Neale Ranns04a75e32017-03-23 06:46:01 -0700363 old_ply->n_non_empty_leafs -=
364 ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
365
mu.duojiao59a82952018-10-11 14:27:30 +0800366 new_leaf =
367 ply_create (m, old_leaf,
368 old_ply->dst_address_bits_of_leaves[dst_byte],
369 ply_base_len);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700370 new_ply = get_next_ply_for_leaf (m, new_leaf);
371
372 /* Refetch since ply_create may move pool. */
Neale Rannsa3af3372017-03-28 03:49:52 -0700373 old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700374
jaszha03ee743762019-09-27 12:52:18 -0500375 clib_atomic_store_rel_n (&old_ply->leaves[dst_byte], new_leaf);
Neale Ranns04a75e32017-03-23 06:46:01 -0700376 old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700377
Neale Rannsa3af3372017-03-28 03:49:52 -0700378 old_ply->n_non_empty_leafs +=
379 ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
Neale Ranns04a75e32017-03-23 06:46:01 -0700380 ASSERT (old_ply->n_non_empty_leafs >= 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700381 }
382 else
383 new_ply = get_next_ply_for_leaf (m, old_leaf);
384
Neale Rannsa3af3372017-03-28 03:49:52 -0700385 set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
386 }
387}
388
389static void
390set_root_leaf (ip4_fib_mtrie_t * m,
391 const ip4_fib_mtrie_set_unset_leaf_args_t * a)
392{
393 ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
394 ip4_fib_mtrie_16_ply_t *old_ply;
395 i32 n_dst_bits_next_plies;
396 u16 dst_byte;
397
398 old_ply = &m->root_ply;
399
Neale Rannsf0609302017-04-11 09:13:39 -0700400 ASSERT (a->dst_address_length <= 32);
Neale Rannsa3af3372017-03-28 03:49:52 -0700401
402 /* how many bits of the destination address are in the next PLY */
403 n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
404
405 dst_byte = a->dst_address.as_u16[0];
406
407 /* Number of bits next plies <= 0 => insert leaves this ply. */
408 if (n_dst_bits_next_plies <= 0)
409 {
410 /* The mask length of the address to insert maps to this ply */
Neale Ranns6ff05492017-06-06 06:52:14 -0700411 uword old_leaf_is_terminal;
412 u32 i, n_dst_bits_this_ply;
Neale Rannsa3af3372017-03-28 03:49:52 -0700413
414 /* The number of bits, and hence slots/buckets, we will fill */
415 n_dst_bits_this_ply = 16 - a->dst_address_length;
416 ASSERT ((clib_host_to_net_u16 (a->dst_address.as_u16[0]) &
417 pow2_mask (n_dst_bits_this_ply)) == 0);
418
419 /* Starting at the value of the byte at this section of the v4 address
420 * fill the buckets/slots of the ply */
421 for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
422 {
423 ip4_fib_mtrie_8_ply_t *new_ply;
424 u16 slot;
425
426 slot = clib_net_to_host_u16 (dst_byte);
427 slot += i;
428 slot = clib_host_to_net_u16 (slot);
429
430 old_leaf = old_ply->leaves[slot];
431 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
432
433 if (a->dst_address_length >=
434 old_ply->dst_address_bits_of_leaves[slot])
435 {
436 /* The new leaf is more or equally specific than the one currently
437 * occupying the slot */
438 new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
439
440 if (old_leaf_is_terminal)
441 {
442 /* The current leaf is terminal, we can replace it with
443 * the new one */
444 old_ply->dst_address_bits_of_leaves[slot] =
445 a->dst_address_length;
jaszha03ee743762019-09-27 12:52:18 -0500446 clib_atomic_store_rel_n (&old_ply->leaves[slot], new_leaf);
Neale Rannsa3af3372017-03-28 03:49:52 -0700447 }
448 else
449 {
450 /* Existing leaf points to another ply. We need to place
451 * new_leaf into all more specific slots. */
452 new_ply = get_next_ply_for_leaf (m, old_leaf);
453 set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
454 a->dst_address_length);
455 }
456 }
457 else if (!old_leaf_is_terminal)
458 {
459 /* The current leaf is less specific and not termial (i.e. a ply),
460 * recurse on down the trie */
461 new_ply = get_next_ply_for_leaf (m, old_leaf);
462 set_leaf (m, a, new_ply - ip4_ply_pool, 2);
463 }
464 /*
465 * else
466 * the route we are adding is less specific than the leaf currently
467 * occupying this slot. leave it there
468 */
469 }
470 }
471 else
472 {
473 /* The address to insert requires us to move down at a lower level of
474 * the trie - recurse on down */
475 ip4_fib_mtrie_8_ply_t *new_ply;
476 u8 ply_base_len;
477
478 ply_base_len = 16;
479
480 old_leaf = old_ply->leaves[dst_byte];
481
482 if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
483 {
484 /* There is a leaf occupying the slot. Replace it with a new ply */
mu.duojiao59a82952018-10-11 14:27:30 +0800485 new_leaf =
486 ply_create (m, old_leaf,
487 old_ply->dst_address_bits_of_leaves[dst_byte],
488 ply_base_len);
Neale Rannsa3af3372017-03-28 03:49:52 -0700489 new_ply = get_next_ply_for_leaf (m, new_leaf);
490
jaszha03ee743762019-09-27 12:52:18 -0500491 clib_atomic_store_rel_n (&old_ply->leaves[dst_byte], new_leaf);
Neale Rannsa3af3372017-03-28 03:49:52 -0700492 old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
493 }
494 else
495 new_ply = get_next_ply_for_leaf (m, old_leaf);
496
497 set_leaf (m, a, new_ply - ip4_ply_pool, 2);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700498 }
499}
500
501static uword
502unset_leaf (ip4_fib_mtrie_t * m,
Neale Rannsa3af3372017-03-28 03:49:52 -0700503 const ip4_fib_mtrie_set_unset_leaf_args_t * a,
504 ip4_fib_mtrie_8_ply_t * old_ply, u32 dst_address_byte_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700505{
506 ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
507 i32 n_dst_bits_next_plies;
Dave Barach6f6f34f2016-08-08 13:05:31 -0400508 i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700509 u8 dst_byte;
510
Neale Rannsf0609302017-04-11 09:13:39 -0700511 ASSERT (a->dst_address_length <= 32);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700512 ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
513
Dave Barachd7cb1b52016-12-09 09:52:16 -0500514 n_dst_bits_next_plies =
515 a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700516
517 dst_byte = a->dst_address.as_u8[dst_address_byte_index];
518 if (n_dst_bits_next_plies < 0)
519 dst_byte &= ~pow2_mask (-n_dst_bits_next_plies);
520
Dave Barachd7cb1b52016-12-09 09:52:16 -0500521 n_dst_bits_this_ply =
522 n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700523 n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply);
524
525 del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
526
527 for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
528 {
529 old_leaf = old_ply->leaves[i];
530 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
531
532 if (old_leaf == del_leaf
Dave Barachd7cb1b52016-12-09 09:52:16 -0500533 || (!old_leaf_is_terminal
534 && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf),
535 dst_address_byte_index + 1)))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700536 {
Neale Ranns04a75e32017-03-23 06:46:01 -0700537 old_ply->n_non_empty_leafs -=
538 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700539
jaszha0304c01302019-09-27 15:42:02 -0500540 clib_atomic_store_rel_n (&old_ply->leaves[i],
541 ip4_fib_mtrie_leaf_set_adj_index
542 (a->cover_adj_index));
mu.duojiao9744e6d2018-10-17 10:59:09 +0800543 old_ply->dst_address_bits_of_leaves[i] = a->cover_address_length;
Neale Ranns04a75e32017-03-23 06:46:01 -0700544
545 old_ply->n_non_empty_leafs +=
546 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700547
548 ASSERT (old_ply->n_non_empty_leafs >= 0);
549 if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
550 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700551 pool_put (ip4_ply_pool, old_ply);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700552 /* Old ply was deleted. */
553 return 1;
554 }
Neale Ranns04a75e32017-03-23 06:46:01 -0700555#if CLIB_DEBUG > 0
556 else if (dst_address_byte_index)
557 {
558 int ii, count = 0;
559 for (ii = 0; ii < ARRAY_LEN (old_ply->leaves); ii++)
560 {
561 count += ip4_fib_mtrie_leaf_is_non_empty (old_ply, ii);
562 }
563 ASSERT (count);
564 }
565#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700566 }
567 }
568
569 /* Old ply was not deleted. */
570 return 0;
571}
572
Neale Rannsa3af3372017-03-28 03:49:52 -0700573static void
574unset_root_leaf (ip4_fib_mtrie_t * m,
575 const ip4_fib_mtrie_set_unset_leaf_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700576{
Neale Rannsa3af3372017-03-28 03:49:52 -0700577 ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
578 i32 n_dst_bits_next_plies;
579 i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
580 u16 dst_byte;
581 ip4_fib_mtrie_16_ply_t *old_ply;
582
Neale Rannsf0609302017-04-11 09:13:39 -0700583 ASSERT (a->dst_address_length <= 32);
Neale Rannsa3af3372017-03-28 03:49:52 -0700584
585 old_ply = &m->root_ply;
586 n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
587
588 dst_byte = a->dst_address.as_u16[0];
589
590 n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ?
591 (16 - a->dst_address_length) : 0);
592
593 del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
594
595 /* Starting at the value of the byte at this section of the v4 address
596 * fill the buckets/slots of the ply */
597 for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
598 {
599 u16 slot;
600
601 slot = clib_net_to_host_u16 (dst_byte);
602 slot += i;
603 slot = clib_host_to_net_u16 (slot);
604
605 old_leaf = old_ply->leaves[slot];
606 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
607
608 if (old_leaf == del_leaf
609 || (!old_leaf_is_terminal
610 && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2)))
611 {
jaszha0304c01302019-09-27 15:42:02 -0500612 clib_atomic_store_rel_n (&old_ply->leaves[slot],
613 ip4_fib_mtrie_leaf_set_adj_index
614 (a->cover_adj_index));
Neale Rannsa3af3372017-03-28 03:49:52 -0700615 old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length;
616 }
617 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700618}
619
620void
Neale Rannsa3af3372017-03-28 03:49:52 -0700621ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
622 const ip4_address_t * dst_address,
623 u32 dst_address_length, u32 adj_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700624{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700625 ip4_fib_mtrie_set_unset_leaf_args_t a;
Dave Barachd7cb1b52016-12-09 09:52:16 -0500626 ip4_main_t *im = &ip4_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700627
Ed Warnickecb9cada2015-12-08 15:45:58 -0700628 /* Honor dst_address_length. Fib masks are in network byte order */
Neale Rannsa3af3372017-03-28 03:49:52 -0700629 a.dst_address.as_u32 = (dst_address->as_u32 &
630 im->fib_masks[dst_address_length]);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700631 a.dst_address_length = dst_address_length;
632 a.adj_index = adj_index;
633
Neale Rannsa3af3372017-03-28 03:49:52 -0700634 set_root_leaf (m, &a);
635}
Ed Warnickecb9cada2015-12-08 15:45:58 -0700636
Neale Rannsa3af3372017-03-28 03:49:52 -0700637void
638ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
639 const ip4_address_t * dst_address,
640 u32 dst_address_length,
641 u32 adj_index,
642 u32 cover_address_length, u32 cover_adj_index)
643{
644 ip4_fib_mtrie_set_unset_leaf_args_t a;
645 ip4_main_t *im = &ip4_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700646
Neale Rannsa3af3372017-03-28 03:49:52 -0700647 /* Honor dst_address_length. Fib masks are in network byte order */
648 a.dst_address.as_u32 = (dst_address->as_u32 &
649 im->fib_masks[dst_address_length]);
650 a.dst_address_length = dst_address_length;
651 a.adj_index = adj_index;
652 a.cover_adj_index = cover_adj_index;
653 a.cover_address_length = cover_address_length;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700654
Neale Rannsa3af3372017-03-28 03:49:52 -0700655 /* the top level ply is never removed */
656 unset_root_leaf (m, &a);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700657}
658
Ed Warnickecb9cada2015-12-08 15:45:58 -0700659/* Returns number of bytes of memory used by mtrie. */
Dave Barachd7cb1b52016-12-09 09:52:16 -0500660static uword
Neale Rannsa3af3372017-03-28 03:49:52 -0700661mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700662{
663 uword bytes, i;
664
Ed Warnickecb9cada2015-12-08 15:45:58 -0700665 bytes = sizeof (p[0]);
Dave Barachd7cb1b52016-12-09 09:52:16 -0500666 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700667 {
668 ip4_fib_mtrie_leaf_t l = p->leaves[i];
669 if (ip4_fib_mtrie_leaf_is_next_ply (l))
Neale Rannsa3af3372017-03-28 03:49:52 -0700670 bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
671 }
672
673 return bytes;
674}
675
676/* Returns number of bytes of memory used by mtrie. */
Neale Rannsc87aafa2017-11-29 00:59:31 -0800677uword
678ip4_fib_mtrie_memory_usage (ip4_fib_mtrie_t * m)
Neale Rannsa3af3372017-03-28 03:49:52 -0700679{
680 uword bytes, i;
681
682 bytes = sizeof (*m);
683 for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
684 {
685 ip4_fib_mtrie_leaf_t l = m->root_ply.leaves[i];
686 if (ip4_fib_mtrie_leaf_is_next_ply (l))
687 bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700688 }
689
690 return bytes;
691}
692
Dave Barachd7cb1b52016-12-09 09:52:16 -0500693static u8 *
694format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700695{
696 ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
697
Neale Ranns04a75e32017-03-23 06:46:01 -0700698 if (ip4_fib_mtrie_leaf_is_terminal (l))
699 s = format (s, "lb-index %d", ip4_fib_mtrie_leaf_get_adj_index (l));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700700 else
701 s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
702 return s;
703}
704
mu.duojiao59a82952018-10-11 14:27:30 +0800705#define FORMAT_PLY(s, _p, _a, _i, _base_address, _ply_max_len, _indent) \
Neale Rannsa3af3372017-03-28 03:49:52 -0700706({ \
707 u32 a, ia_length; \
708 ip4_address_t ia; \
709 ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)]; \
710 \
mu.duojiao59a82952018-10-11 14:27:30 +0800711 a = (_base_address) + ((_a) << (32 - (_ply_max_len))); \
Neale Rannsa3af3372017-03-28 03:49:52 -0700712 ia.as_u32 = clib_host_to_net_u32 (a); \
713 ia_length = (_p)->dst_address_bits_of_leaves[(_i)]; \
mu.duojiao59a82952018-10-11 14:27:30 +0800714 s = format (s, "\n%U%U %U", \
715 format_white_space, (_indent) + 4, \
Neale Rannsa3af3372017-03-28 03:49:52 -0700716 format_ip4_address_and_length, &ia, ia_length, \
717 format_ip4_fib_mtrie_leaf, _l); \
718 \
719 if (ip4_fib_mtrie_leaf_is_next_ply (_l)) \
mu.duojiao59a82952018-10-11 14:27:30 +0800720 s = format (s, "\n%U", \
721 format_ip4_fib_mtrie_ply, m, a, (_indent) + 8, \
Neale Rannsa3af3372017-03-28 03:49:52 -0700722 ip4_fib_mtrie_leaf_get_next_ply_index (_l)); \
723 s; \
724})
725
Dave Barachd7cb1b52016-12-09 09:52:16 -0500726static u8 *
727format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700728{
Dave Barachd7cb1b52016-12-09 09:52:16 -0500729 ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700730 u32 base_address = va_arg (*va, u32);
mu.duojiao59a82952018-10-11 14:27:30 +0800731 u32 indent = va_arg (*va, u32);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700732 u32 ply_index = va_arg (*va, u32);
Neale Rannsa3af3372017-03-28 03:49:52 -0700733 ip4_fib_mtrie_8_ply_t *p;
Neale Rannsa3af3372017-03-28 03:49:52 -0700734 int i;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700735
Neale Rannsa3af3372017-03-28 03:49:52 -0700736 p = pool_elt_at_index (ip4_ply_pool, ply_index);
mu.duojiao59a82952018-10-11 14:27:30 +0800737 s = format (s, "%Uply index %d, %d non-empty leaves",
738 format_white_space, indent, ply_index, p->n_non_empty_leafs);
Neale Rannsa3af3372017-03-28 03:49:52 -0700739
Ed Warnickecb9cada2015-12-08 15:45:58 -0700740 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
741 {
Neale Ranns04a75e32017-03-23 06:46:01 -0700742 if (ip4_fib_mtrie_leaf_is_non_empty (p, i))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700743 {
mu.duojiao59a82952018-10-11 14:27:30 +0800744 s = FORMAT_PLY (s, p, i, i, base_address,
Neale Ranns756cd942018-04-06 09:18:11 -0700745 p->dst_address_bits_base + 8, indent);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700746 }
747 }
748
749 return s;
750}
751
Dave Barachd7cb1b52016-12-09 09:52:16 -0500752u8 *
753format_ip4_fib_mtrie (u8 * s, va_list * va)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700754{
Dave Barachd7cb1b52016-12-09 09:52:16 -0500755 ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
Neale Ranns39194252017-11-27 01:03:25 -0800756 int verbose = va_arg (*va, int);
Neale Rannsa3af3372017-03-28 03:49:52 -0700757 ip4_fib_mtrie_16_ply_t *p;
758 u32 base_address = 0;
759 int i;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700760
Neale Rannsa3af3372017-03-28 03:49:52 -0700761 s = format (s, "%d plies, memory usage %U\n",
762 pool_elts (ip4_ply_pool),
Neale Rannsc87aafa2017-11-29 00:59:31 -0800763 format_memory_size, ip4_fib_mtrie_memory_usage (m));
764 s = format (s, "root-ply");
765 p = &m->root_ply;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700766
Neale Ranns39194252017-11-27 01:03:25 -0800767 if (verbose)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700768 {
Neale Ranns39194252017-11-27 01:03:25 -0800769 s = format (s, "root-ply");
770 p = &m->root_ply;
Neale Rannsa3af3372017-03-28 03:49:52 -0700771
Neale Ranns39194252017-11-27 01:03:25 -0800772 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
Neale Rannsa3af3372017-03-28 03:49:52 -0700773 {
Neale Ranns39194252017-11-27 01:03:25 -0800774 u16 slot;
775
776 slot = clib_host_to_net_u16 (i);
777
778 if (p->dst_address_bits_of_leaves[slot] > 0)
779 {
mu.duojiao59a82952018-10-11 14:27:30 +0800780 s = FORMAT_PLY (s, p, i, slot, base_address, 16, 0);
Neale Ranns39194252017-11-27 01:03:25 -0800781 }
Neale Rannsa3af3372017-03-28 03:49:52 -0700782 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700783 }
784
785 return s;
786}
Dave Barachd7cb1b52016-12-09 09:52:16 -0500787
Neale Ranns1ec36522017-11-29 05:20:37 -0800788/** Default heap size for the IPv4 mtries */
789#define IP4_FIB_DEFAULT_MTRIE_HEAP_SIZE (32<<20)
Dave Barach01a2a102020-06-11 08:57:52 -0400790#ifndef MAP_HUGE_SHIFT
791#define MAP_HUGE_SHIFT 26
792#endif
Neale Ranns1ec36522017-11-29 05:20:37 -0800793
Neale Rannsa3af3372017-03-28 03:49:52 -0700794static clib_error_t *
795ip4_mtrie_module_init (vlib_main_t * vm)
796{
Neale Rannsa3af3372017-03-28 03:49:52 -0700797 CLIB_UNUSED (ip4_fib_mtrie_8_ply_t * p);
Neale Ranns1ec36522017-11-29 05:20:37 -0800798 clib_error_t *error = NULL;
Neale Ranns1ec36522017-11-29 05:20:37 -0800799
800 /* Burn one ply so index 0 is taken */
Neale Rannsa3af3372017-03-28 03:49:52 -0700801 pool_get (ip4_ply_pool, p);
802
Neale Ranns1ec36522017-11-29 05:20:37 -0800803 return (error);
Neale Rannsa3af3372017-03-28 03:49:52 -0700804}
805
806VLIB_INIT_FUNCTION (ip4_mtrie_module_init);
807
Dave Barachd7cb1b52016-12-09 09:52:16 -0500808/*
809 * fd.io coding-style-patch-verification: ON
810 *
811 * Local Variables:
812 * eval: (c-set-style "gnu")
813 * End:
814 */