blob: 5981a3c9e2068a767cba25e804def28408972e42 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * ip/ip4_fib.h: ip4 mtrie fib
17 *
18 * Copyright (c) 2012 Eliot Dresselhaus
19 *
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40#include <vnet/ip/ip.h>
Neale Rannsa3af3372017-03-28 03:49:52 -070041#include <vnet/ip/ip4_mtrie.h>
42#include <vnet/fib/ip4_fib.h>
43
44
45/**
46 * Global pool of IPv4 8bit PLYs
47 */
48ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
Ed Warnickecb9cada2015-12-08 15:45:58 -070049
Neale Ranns04a75e32017-03-23 06:46:01 -070050always_inline u32
Neale Rannsa3af3372017-03-28 03:49:52 -070051ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte)
Ed Warnickecb9cada2015-12-08 15:45:58 -070052{
Neale Ranns04a75e32017-03-23 06:46:01 -070053 /*
54 * It's 'non-empty' if the length of the leaf stored is greater than the
55 * length of a leaf in the covering ply. i.e. the leaf is more specific
56 * than it's would be cover in the covering ply
57 */
58 if (p->dst_address_bits_of_leaves[dst_byte] > p->dst_address_bits_base)
59 return (1);
60 return (0);
61}
62
63always_inline ip4_fib_mtrie_leaf_t
64ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index)
65{
66 ip4_fib_mtrie_leaf_t l;
67 l = 1 + 2 * adj_index;
68 ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index);
69 return l;
70}
71
72always_inline u32
73ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n)
74{
75 return (n & 1) == 0;
76}
77
78always_inline u32
79ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n)
80{
81 ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n));
82 return n >> 1;
83}
84
85always_inline ip4_fib_mtrie_leaf_t
86ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
87{
88 ip4_fib_mtrie_leaf_t l;
89 l = 0 + 2 * i;
90 ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i);
91 return l;
92}
93
Neale Rannsa3af3372017-03-28 03:49:52 -070094#ifndef __ALTIVEC__
95#define PLY_X4_SPLAT_INIT(init_x4, init) \
96 init_x4 = u32x4_splat (init);
97#else
98#define PLY_X4_SPLAT_INIT(init_x4, init) \
99{ \
100 u32x4_union_t y; \
101 y.as_u32[0] = init; \
102 y.as_u32[1] = init; \
103 y.as_u32[2] = init; \
104 y.as_u32[3] = init; \
105 init_x4 = y.as_u32x4; \
106}
107#endif
108
109#ifdef CLIB_HAVE_VEC128
110#define PLY_INIT_LEAVES(p) \
111{ \
112 u32x4 *l, init_x4; \
113 \
114 PLY_X4_SPLAT_INIT(init_x4, init); \
115 for (l = p->leaves_as_u32x4; \
116 l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); \
117 l += 4) \
118 { \
119 l[0] = init_x4; \
120 l[1] = init_x4; \
121 l[2] = init_x4; \
122 l[3] = init_x4; \
123 } \
124}
125#else
126#define PLY_INIT_LEAVES(p) \
127{ \
128 u32 *l; \
129 \
130 for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) \
131 { \
132 l[0] = init; \
133 l[1] = init; \
134 l[2] = init; \
135 l[3] = init; \
136 } \
137}
138#endif
139
140#define PLY_INIT(p, init, prefix_len, ply_base_len) \
141{ \
142 /* \
143 * A leaf is 'empty' if it represents a leaf from the covering PLY \
144 * i.e. if the prefix length of the leaf is less than or equal to \
145 * the prefix length of the PLY \
146 */ \
147 p->n_non_empty_leafs = (prefix_len > ply_base_len ? \
148 ARRAY_LEN (p->leaves) : 0); \
149 memset (p->dst_address_bits_of_leaves, prefix_len, \
150 sizeof (p->dst_address_bits_of_leaves)); \
151 p->dst_address_bits_base = ply_base_len; \
152 \
153 /* Initialize leaves. */ \
154 PLY_INIT_LEAVES(p); \
155}
156
Neale Ranns04a75e32017-03-23 06:46:01 -0700157static void
Neale Rannsa3af3372017-03-28 03:49:52 -0700158ply_8_init (ip4_fib_mtrie_8_ply_t * p,
159 ip4_fib_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len)
Neale Ranns04a75e32017-03-23 06:46:01 -0700160{
Neale Rannsa3af3372017-03-28 03:49:52 -0700161 PLY_INIT (p, init, prefix_len, ply_base_len);
162}
163
164static void
165ply_16_init (ip4_fib_mtrie_16_ply_t * p,
166 ip4_fib_mtrie_leaf_t init, uword prefix_len)
167{
Dave Barachd7cb1b52016-12-09 09:52:16 -0500168 memset (p->dst_address_bits_of_leaves, prefix_len,
169 sizeof (p->dst_address_bits_of_leaves));
Neale Rannsa3af3372017-03-28 03:49:52 -0700170 PLY_INIT_LEAVES (p);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700171}
172
173static ip4_fib_mtrie_leaf_t
Neale Ranns04a75e32017-03-23 06:46:01 -0700174ply_create (ip4_fib_mtrie_t * m,
175 ip4_fib_mtrie_leaf_t init_leaf,
176 u32 leaf_prefix_len, u32 ply_base_len)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700177{
Neale Rannsa3af3372017-03-28 03:49:52 -0700178 ip4_fib_mtrie_8_ply_t *p;
Neale Ranns1ec36522017-11-29 05:20:37 -0800179 void *old_heap;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700180 /* Get cache aligned ply. */
Neale Ranns1ec36522017-11-29 05:20:37 -0800181
182 old_heap = clib_mem_set_heap (ip4_main.mtrie_mheap);
Neale Rannsa3af3372017-03-28 03:49:52 -0700183 pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
Neale Ranns1ec36522017-11-29 05:20:37 -0800184 clib_mem_set_heap (old_heap);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700185
Neale Rannsa3af3372017-03-28 03:49:52 -0700186 ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len);
187 return ip4_fib_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700188}
189
Neale Rannsa3af3372017-03-28 03:49:52 -0700190always_inline ip4_fib_mtrie_8_ply_t *
Ed Warnickecb9cada2015-12-08 15:45:58 -0700191get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
192{
193 uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700194
Neale Rannsa3af3372017-03-28 03:49:52 -0700195 return pool_elt_at_index (ip4_ply_pool, n);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700196}
197
Dave Barachd7cb1b52016-12-09 09:52:16 -0500198void
Neale Rannsa3af3372017-03-28 03:49:52 -0700199ip4_mtrie_free (ip4_fib_mtrie_t * m)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700200{
Neale Rannsa3af3372017-03-28 03:49:52 -0700201 /* the root ply is embedded so the is nothing to do,
202 * the assumption being that the IP4 FIB table has emptied the trie
203 * before deletion.
204 */
205#if CLIB_DEBUG > 0
206 int i;
207 for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
208 {
209 ASSERT (!ip4_fib_mtrie_leaf_is_next_ply (m->root_ply.leaves[i]));
210 }
211#endif
212}
213
214void
215ip4_mtrie_init (ip4_fib_mtrie_t * m)
216{
217 ply_16_init (&m->root_ply, IP4_FIB_MTRIE_LEAF_EMPTY, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700218}
219
Dave Barachd7cb1b52016-12-09 09:52:16 -0500220typedef struct
221{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700222 ip4_address_t dst_address;
223 u32 dst_address_length;
224 u32 adj_index;
Neale Ranns04a75e32017-03-23 06:46:01 -0700225 u32 cover_address_length;
226 u32 cover_adj_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700227} ip4_fib_mtrie_set_unset_leaf_args_t;
228
229static void
230set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
Neale Rannsa3af3372017-03-28 03:49:52 -0700231 ip4_fib_mtrie_8_ply_t * ply,
Ed Warnickecb9cada2015-12-08 15:45:58 -0700232 ip4_fib_mtrie_leaf_t new_leaf,
233 uword new_leaf_dst_address_bits)
234{
235 ip4_fib_mtrie_leaf_t old_leaf;
236 uword i;
237
238 ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700239
240 for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
241 {
242 old_leaf = ply->leaves[i];
243
244 /* Recurse into sub plies. */
Dave Barachd7cb1b52016-12-09 09:52:16 -0500245 if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700246 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700247 ip4_fib_mtrie_8_ply_t *sub_ply =
248 get_next_ply_for_leaf (m, old_leaf);
Dave Barachd7cb1b52016-12-09 09:52:16 -0500249 set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
250 new_leaf_dst_address_bits);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700251 }
252
253 /* Replace less specific terminal leaves with new leaf. */
Dave Barachd7cb1b52016-12-09 09:52:16 -0500254 else if (new_leaf_dst_address_bits >=
255 ply->dst_address_bits_of_leaves[i])
Ed Warnickecb9cada2015-12-08 15:45:58 -0700256 {
Dave Barachd7cb1b52016-12-09 09:52:16 -0500257 __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf);
258 ASSERT (ply->leaves[i] == new_leaf);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700259 ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
Neale Ranns04a75e32017-03-23 06:46:01 -0700260 ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_non_empty (ply, i);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700261 }
262 }
263}
264
265static void
266set_leaf (ip4_fib_mtrie_t * m,
Neale Rannsa3af3372017-03-28 03:49:52 -0700267 const ip4_fib_mtrie_set_unset_leaf_args_t * a,
Dave Barachd7cb1b52016-12-09 09:52:16 -0500268 u32 old_ply_index, u32 dst_address_byte_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700269{
270 ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
271 i32 n_dst_bits_next_plies;
272 u8 dst_byte;
Neale Rannsa3af3372017-03-28 03:49:52 -0700273 ip4_fib_mtrie_8_ply_t *old_ply;
274
275 old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700276
Neale Rannsf0609302017-04-11 09:13:39 -0700277 ASSERT (a->dst_address_length <= 32);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700278 ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
279
Neale Rannsa3af3372017-03-28 03:49:52 -0700280 /* how many bits of the destination address are in the next PLY */
Dave Barachd7cb1b52016-12-09 09:52:16 -0500281 n_dst_bits_next_plies =
282 a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700283
284 dst_byte = a->dst_address.as_u8[dst_address_byte_index];
285
286 /* Number of bits next plies <= 0 => insert leaves this ply. */
287 if (n_dst_bits_next_plies <= 0)
288 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700289 /* The mask length of the address to insert maps to this ply */
Neale Ranns6ff05492017-06-06 06:52:14 -0700290 uword old_leaf_is_terminal;
291 u32 i, n_dst_bits_this_ply;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700292
Neale Rannsa3af3372017-03-28 03:49:52 -0700293 /* The number of bits, and hence slots/buckets, we will fill */
Neale Ranns04a75e32017-03-23 06:46:01 -0700294 n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies);
Dave Barachd7cb1b52016-12-09 09:52:16 -0500295 ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
296 pow2_mask (n_dst_bits_this_ply)) == 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700297
Neale Rannsa3af3372017-03-28 03:49:52 -0700298 /* Starting at the value of the byte at this section of the v4 address
299 * fill the buckets/slots of the ply */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700300 for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
301 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700302 ip4_fib_mtrie_8_ply_t *new_ply;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700303
304 old_leaf = old_ply->leaves[i];
305 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
306
Ed Warnickecb9cada2015-12-08 15:45:58 -0700307 if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
308 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700309 /* The new leaf is more or equally specific than the one currently
310 * occupying the slot */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700311 new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
312
313 if (old_leaf_is_terminal)
314 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700315 /* The current leaf is terminal, we can replace it with
316 * the new one */
Neale Ranns04a75e32017-03-23 06:46:01 -0700317 old_ply->n_non_empty_leafs -=
318 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
Neale Rannsa3af3372017-03-28 03:49:52 -0700319
Dave Barachd7cb1b52016-12-09 09:52:16 -0500320 old_ply->dst_address_bits_of_leaves[i] =
321 a->dst_address_length;
322 __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf,
323 new_leaf);
324 ASSERT (old_ply->leaves[i] == new_leaf);
Neale Ranns04a75e32017-03-23 06:46:01 -0700325
Dave Barachd7cb1b52016-12-09 09:52:16 -0500326 old_ply->n_non_empty_leafs +=
Neale Ranns04a75e32017-03-23 06:46:01 -0700327 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
Dave Barachd7cb1b52016-12-09 09:52:16 -0500328 ASSERT (old_ply->n_non_empty_leafs <=
329 ARRAY_LEN (old_ply->leaves));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700330 }
331 else
332 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700333 /* Existing leaf points to another ply. We need to place
334 * new_leaf into all more specific slots. */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700335 new_ply = get_next_ply_for_leaf (m, old_leaf);
Dave Barachd7cb1b52016-12-09 09:52:16 -0500336 set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
337 a->dst_address_length);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700338 }
339 }
Dave Barachd7cb1b52016-12-09 09:52:16 -0500340 else if (!old_leaf_is_terminal)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700341 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700342 /* The current leaf is less specific and not termial (i.e. a ply),
343 * recurse on down the trie */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700344 new_ply = get_next_ply_for_leaf (m, old_leaf);
Neale Rannsa3af3372017-03-28 03:49:52 -0700345 set_leaf (m, a, new_ply - ip4_ply_pool,
Dave Barachd7cb1b52016-12-09 09:52:16 -0500346 dst_address_byte_index + 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700347 }
Neale Rannsa3af3372017-03-28 03:49:52 -0700348 /*
349 * else
350 * the route we are adding is less specific than the leaf currently
351 * occupying this slot. leave it there
352 */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700353 }
354 }
355 else
356 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700357 /* The address to insert requires us to move down at a lower level of
358 * the trie - recurse on down */
359 ip4_fib_mtrie_8_ply_t *new_ply;
Neale Ranns04a75e32017-03-23 06:46:01 -0700360 u8 ply_base_len;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700361
Neale Ranns04a75e32017-03-23 06:46:01 -0700362 ply_base_len = 8 * (dst_address_byte_index + 1);
Neale Rannsa3af3372017-03-28 03:49:52 -0700363
Ed Warnickecb9cada2015-12-08 15:45:58 -0700364 old_leaf = old_ply->leaves[dst_byte];
Neale Rannsa3af3372017-03-28 03:49:52 -0700365
Ed Warnickecb9cada2015-12-08 15:45:58 -0700366 if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
367 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700368 /* There is a leaf occupying the slot. Replace it with a new ply */
Neale Ranns04a75e32017-03-23 06:46:01 -0700369 old_ply->n_non_empty_leafs -=
370 ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
371
372 new_leaf = ply_create (m, old_leaf,
373 clib_max (old_ply->dst_address_bits_of_leaves
374 [dst_byte], ply_base_len),
375 ply_base_len);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700376 new_ply = get_next_ply_for_leaf (m, new_leaf);
377
378 /* Refetch since ply_create may move pool. */
Neale Rannsa3af3372017-03-28 03:49:52 -0700379 old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700380
Dave Barachd7cb1b52016-12-09 09:52:16 -0500381 __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
382 new_leaf);
383 ASSERT (old_ply->leaves[dst_byte] == new_leaf);
Neale Ranns04a75e32017-03-23 06:46:01 -0700384 old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700385
Neale Rannsa3af3372017-03-28 03:49:52 -0700386 old_ply->n_non_empty_leafs +=
387 ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
Neale Ranns04a75e32017-03-23 06:46:01 -0700388 ASSERT (old_ply->n_non_empty_leafs >= 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700389 }
390 else
391 new_ply = get_next_ply_for_leaf (m, old_leaf);
392
Neale Rannsa3af3372017-03-28 03:49:52 -0700393 set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
394 }
395}
396
397static void
398set_root_leaf (ip4_fib_mtrie_t * m,
399 const ip4_fib_mtrie_set_unset_leaf_args_t * a)
400{
401 ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
402 ip4_fib_mtrie_16_ply_t *old_ply;
403 i32 n_dst_bits_next_plies;
404 u16 dst_byte;
405
406 old_ply = &m->root_ply;
407
Neale Rannsf0609302017-04-11 09:13:39 -0700408 ASSERT (a->dst_address_length <= 32);
Neale Rannsa3af3372017-03-28 03:49:52 -0700409
410 /* how many bits of the destination address are in the next PLY */
411 n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
412
413 dst_byte = a->dst_address.as_u16[0];
414
415 /* Number of bits next plies <= 0 => insert leaves this ply. */
416 if (n_dst_bits_next_plies <= 0)
417 {
418 /* The mask length of the address to insert maps to this ply */
Neale Ranns6ff05492017-06-06 06:52:14 -0700419 uword old_leaf_is_terminal;
420 u32 i, n_dst_bits_this_ply;
Neale Rannsa3af3372017-03-28 03:49:52 -0700421
422 /* The number of bits, and hence slots/buckets, we will fill */
423 n_dst_bits_this_ply = 16 - a->dst_address_length;
424 ASSERT ((clib_host_to_net_u16 (a->dst_address.as_u16[0]) &
425 pow2_mask (n_dst_bits_this_ply)) == 0);
426
427 /* Starting at the value of the byte at this section of the v4 address
428 * fill the buckets/slots of the ply */
429 for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
430 {
431 ip4_fib_mtrie_8_ply_t *new_ply;
432 u16 slot;
433
434 slot = clib_net_to_host_u16 (dst_byte);
435 slot += i;
436 slot = clib_host_to_net_u16 (slot);
437
438 old_leaf = old_ply->leaves[slot];
439 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
440
441 if (a->dst_address_length >=
442 old_ply->dst_address_bits_of_leaves[slot])
443 {
444 /* The new leaf is more or equally specific than the one currently
445 * occupying the slot */
446 new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
447
448 if (old_leaf_is_terminal)
449 {
450 /* The current leaf is terminal, we can replace it with
451 * the new one */
452 old_ply->dst_address_bits_of_leaves[slot] =
453 a->dst_address_length;
454 __sync_val_compare_and_swap (&old_ply->leaves[slot],
455 old_leaf, new_leaf);
456 ASSERT (old_ply->leaves[slot] == new_leaf);
457 }
458 else
459 {
460 /* Existing leaf points to another ply. We need to place
461 * new_leaf into all more specific slots. */
462 new_ply = get_next_ply_for_leaf (m, old_leaf);
463 set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
464 a->dst_address_length);
465 }
466 }
467 else if (!old_leaf_is_terminal)
468 {
469 /* The current leaf is less specific and not termial (i.e. a ply),
470 * recurse on down the trie */
471 new_ply = get_next_ply_for_leaf (m, old_leaf);
472 set_leaf (m, a, new_ply - ip4_ply_pool, 2);
473 }
474 /*
475 * else
476 * the route we are adding is less specific than the leaf currently
477 * occupying this slot. leave it there
478 */
479 }
480 }
481 else
482 {
483 /* The address to insert requires us to move down at a lower level of
484 * the trie - recurse on down */
485 ip4_fib_mtrie_8_ply_t *new_ply;
486 u8 ply_base_len;
487
488 ply_base_len = 16;
489
490 old_leaf = old_ply->leaves[dst_byte];
491
492 if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
493 {
494 /* There is a leaf occupying the slot. Replace it with a new ply */
495 new_leaf = ply_create (m, old_leaf,
496 clib_max (old_ply->dst_address_bits_of_leaves
497 [dst_byte], ply_base_len),
498 ply_base_len);
499 new_ply = get_next_ply_for_leaf (m, new_leaf);
500
501 __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
502 new_leaf);
503 ASSERT (old_ply->leaves[dst_byte] == new_leaf);
504 old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
505 }
506 else
507 new_ply = get_next_ply_for_leaf (m, old_leaf);
508
509 set_leaf (m, a, new_ply - ip4_ply_pool, 2);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700510 }
511}
512
513static uword
514unset_leaf (ip4_fib_mtrie_t * m,
Neale Rannsa3af3372017-03-28 03:49:52 -0700515 const ip4_fib_mtrie_set_unset_leaf_args_t * a,
516 ip4_fib_mtrie_8_ply_t * old_ply, u32 dst_address_byte_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700517{
518 ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
519 i32 n_dst_bits_next_plies;
Dave Barach6f6f34f2016-08-08 13:05:31 -0400520 i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700521 u8 dst_byte;
522
Neale Rannsf0609302017-04-11 09:13:39 -0700523 ASSERT (a->dst_address_length <= 32);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700524 ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
525
Dave Barachd7cb1b52016-12-09 09:52:16 -0500526 n_dst_bits_next_plies =
527 a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700528
529 dst_byte = a->dst_address.as_u8[dst_address_byte_index];
530 if (n_dst_bits_next_plies < 0)
531 dst_byte &= ~pow2_mask (-n_dst_bits_next_plies);
532
Dave Barachd7cb1b52016-12-09 09:52:16 -0500533 n_dst_bits_this_ply =
534 n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700535 n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply);
536
537 del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
538
539 for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
540 {
541 old_leaf = old_ply->leaves[i];
542 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
543
544 if (old_leaf == del_leaf
Dave Barachd7cb1b52016-12-09 09:52:16 -0500545 || (!old_leaf_is_terminal
546 && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf),
547 dst_address_byte_index + 1)))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700548 {
Neale Ranns04a75e32017-03-23 06:46:01 -0700549 old_ply->n_non_empty_leafs -=
550 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700551
Neale Ranns04a75e32017-03-23 06:46:01 -0700552 old_ply->leaves[i] =
553 ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
554 old_ply->dst_address_bits_of_leaves[i] =
555 clib_max (old_ply->dst_address_bits_base,
556 a->cover_address_length);
557
558 old_ply->n_non_empty_leafs +=
559 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700560
561 ASSERT (old_ply->n_non_empty_leafs >= 0);
562 if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
563 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700564 pool_put (ip4_ply_pool, old_ply);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700565 /* Old ply was deleted. */
566 return 1;
567 }
Neale Ranns04a75e32017-03-23 06:46:01 -0700568#if CLIB_DEBUG > 0
569 else if (dst_address_byte_index)
570 {
571 int ii, count = 0;
572 for (ii = 0; ii < ARRAY_LEN (old_ply->leaves); ii++)
573 {
574 count += ip4_fib_mtrie_leaf_is_non_empty (old_ply, ii);
575 }
576 ASSERT (count);
577 }
578#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700579 }
580 }
581
582 /* Old ply was not deleted. */
583 return 0;
584}
585
Neale Rannsa3af3372017-03-28 03:49:52 -0700586static void
587unset_root_leaf (ip4_fib_mtrie_t * m,
588 const ip4_fib_mtrie_set_unset_leaf_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700589{
Neale Rannsa3af3372017-03-28 03:49:52 -0700590 ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
591 i32 n_dst_bits_next_plies;
592 i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
593 u16 dst_byte;
594 ip4_fib_mtrie_16_ply_t *old_ply;
595
Neale Rannsf0609302017-04-11 09:13:39 -0700596 ASSERT (a->dst_address_length <= 32);
Neale Rannsa3af3372017-03-28 03:49:52 -0700597
598 old_ply = &m->root_ply;
599 n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
600
601 dst_byte = a->dst_address.as_u16[0];
602
603 n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ?
604 (16 - a->dst_address_length) : 0);
605
606 del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
607
608 /* Starting at the value of the byte at this section of the v4 address
609 * fill the buckets/slots of the ply */
610 for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
611 {
612 u16 slot;
613
614 slot = clib_net_to_host_u16 (dst_byte);
615 slot += i;
616 slot = clib_host_to_net_u16 (slot);
617
618 old_leaf = old_ply->leaves[slot];
619 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
620
621 if (old_leaf == del_leaf
622 || (!old_leaf_is_terminal
623 && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2)))
624 {
625 old_ply->leaves[slot] =
626 ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
627 old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length;
628 }
629 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700630}
631
632void
Neale Rannsa3af3372017-03-28 03:49:52 -0700633ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
634 const ip4_address_t * dst_address,
635 u32 dst_address_length, u32 adj_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700636{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700637 ip4_fib_mtrie_set_unset_leaf_args_t a;
Dave Barachd7cb1b52016-12-09 09:52:16 -0500638 ip4_main_t *im = &ip4_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700639
Ed Warnickecb9cada2015-12-08 15:45:58 -0700640 /* Honor dst_address_length. Fib masks are in network byte order */
Neale Rannsa3af3372017-03-28 03:49:52 -0700641 a.dst_address.as_u32 = (dst_address->as_u32 &
642 im->fib_masks[dst_address_length]);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700643 a.dst_address_length = dst_address_length;
644 a.adj_index = adj_index;
645
Neale Rannsa3af3372017-03-28 03:49:52 -0700646 set_root_leaf (m, &a);
647}
Ed Warnickecb9cada2015-12-08 15:45:58 -0700648
Neale Rannsa3af3372017-03-28 03:49:52 -0700649void
650ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
651 const ip4_address_t * dst_address,
652 u32 dst_address_length,
653 u32 adj_index,
654 u32 cover_address_length, u32 cover_adj_index)
655{
656 ip4_fib_mtrie_set_unset_leaf_args_t a;
657 ip4_main_t *im = &ip4_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700658
Neale Rannsa3af3372017-03-28 03:49:52 -0700659 /* Honor dst_address_length. Fib masks are in network byte order */
660 a.dst_address.as_u32 = (dst_address->as_u32 &
661 im->fib_masks[dst_address_length]);
662 a.dst_address_length = dst_address_length;
663 a.adj_index = adj_index;
664 a.cover_adj_index = cover_adj_index;
665 a.cover_address_length = cover_address_length;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700666
Neale Rannsa3af3372017-03-28 03:49:52 -0700667 /* the top level ply is never removed */
668 unset_root_leaf (m, &a);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700669}
670
Ed Warnickecb9cada2015-12-08 15:45:58 -0700671/* Returns number of bytes of memory used by mtrie. */
Dave Barachd7cb1b52016-12-09 09:52:16 -0500672static uword
Neale Rannsa3af3372017-03-28 03:49:52 -0700673mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700674{
675 uword bytes, i;
676
Ed Warnickecb9cada2015-12-08 15:45:58 -0700677 bytes = sizeof (p[0]);
Dave Barachd7cb1b52016-12-09 09:52:16 -0500678 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700679 {
680 ip4_fib_mtrie_leaf_t l = p->leaves[i];
681 if (ip4_fib_mtrie_leaf_is_next_ply (l))
Neale Rannsa3af3372017-03-28 03:49:52 -0700682 bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
683 }
684
685 return bytes;
686}
687
688/* Returns number of bytes of memory used by mtrie. */
Neale Rannsc87aafa2017-11-29 00:59:31 -0800689uword
690ip4_fib_mtrie_memory_usage (ip4_fib_mtrie_t * m)
Neale Rannsa3af3372017-03-28 03:49:52 -0700691{
692 uword bytes, i;
693
694 bytes = sizeof (*m);
695 for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
696 {
697 ip4_fib_mtrie_leaf_t l = m->root_ply.leaves[i];
698 if (ip4_fib_mtrie_leaf_is_next_ply (l))
699 bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700700 }
701
702 return bytes;
703}
704
Dave Barachd7cb1b52016-12-09 09:52:16 -0500705static u8 *
706format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700707{
708 ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
709
Neale Ranns04a75e32017-03-23 06:46:01 -0700710 if (ip4_fib_mtrie_leaf_is_terminal (l))
711 s = format (s, "lb-index %d", ip4_fib_mtrie_leaf_get_adj_index (l));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700712 else
713 s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
714 return s;
715}
716
Neale Rannsa3af3372017-03-28 03:49:52 -0700717#define FORMAT_PLY(s, _p, _i, _base_address, _ply_max_len, _indent) \
718({ \
719 u32 a, ia_length; \
720 ip4_address_t ia; \
721 ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)]; \
722 \
723 a = (_base_address) + ((_i) << (32 - (_ply_max_len))); \
724 ia.as_u32 = clib_host_to_net_u32 (a); \
725 ia_length = (_p)->dst_address_bits_of_leaves[(_i)]; \
726 s = format (s, "\n%U%20U %U", \
727 format_white_space, (_indent) + 2, \
728 format_ip4_address_and_length, &ia, ia_length, \
729 format_ip4_fib_mtrie_leaf, _l); \
730 \
731 if (ip4_fib_mtrie_leaf_is_next_ply (_l)) \
732 s = format (s, "\n%U%U", \
733 format_white_space, (_indent) + 2, \
734 format_ip4_fib_mtrie_ply, m, a, \
735 ip4_fib_mtrie_leaf_get_next_ply_index (_l)); \
736 s; \
737})
738
Dave Barachd7cb1b52016-12-09 09:52:16 -0500739static u8 *
740format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700741{
Dave Barachd7cb1b52016-12-09 09:52:16 -0500742 ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700743 u32 base_address = va_arg (*va, u32);
744 u32 ply_index = va_arg (*va, u32);
Neale Rannsa3af3372017-03-28 03:49:52 -0700745 ip4_fib_mtrie_8_ply_t *p;
Christophe Fontained3c008d2017-10-02 18:10:54 +0200746 u32 indent;
Neale Rannsa3af3372017-03-28 03:49:52 -0700747 int i;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700748
Neale Rannsa3af3372017-03-28 03:49:52 -0700749 p = pool_elt_at_index (ip4_ply_pool, ply_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700750 indent = format_get_indent (s);
Neale Rannsa3af3372017-03-28 03:49:52 -0700751 s = format (s, "ply index %d, %d non-empty leaves", ply_index,
752 p->n_non_empty_leafs);
753
Ed Warnickecb9cada2015-12-08 15:45:58 -0700754 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
755 {
Neale Ranns04a75e32017-03-23 06:46:01 -0700756 if (ip4_fib_mtrie_leaf_is_non_empty (p, i))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700757 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700758 FORMAT_PLY (s, p, i, base_address,
759 p->dst_address_bits_base + 8, indent);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700760 }
761 }
762
763 return s;
764}
765
Dave Barachd7cb1b52016-12-09 09:52:16 -0500766u8 *
767format_ip4_fib_mtrie (u8 * s, va_list * va)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700768{
Dave Barachd7cb1b52016-12-09 09:52:16 -0500769 ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
Neale Ranns39194252017-11-27 01:03:25 -0800770 int verbose = va_arg (*va, int);
Neale Rannsa3af3372017-03-28 03:49:52 -0700771 ip4_fib_mtrie_16_ply_t *p;
772 u32 base_address = 0;
773 int i;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700774
Neale Rannsa3af3372017-03-28 03:49:52 -0700775 s = format (s, "%d plies, memory usage %U\n",
776 pool_elts (ip4_ply_pool),
Neale Rannsc87aafa2017-11-29 00:59:31 -0800777 format_memory_size, ip4_fib_mtrie_memory_usage (m));
778 s = format (s, "root-ply");
779 p = &m->root_ply;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700780
Neale Ranns39194252017-11-27 01:03:25 -0800781 if (verbose)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700782 {
Neale Ranns39194252017-11-27 01:03:25 -0800783 s = format (s, "root-ply");
784 p = &m->root_ply;
Neale Rannsa3af3372017-03-28 03:49:52 -0700785
Neale Ranns39194252017-11-27 01:03:25 -0800786 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
Neale Rannsa3af3372017-03-28 03:49:52 -0700787 {
Neale Ranns39194252017-11-27 01:03:25 -0800788 u16 slot;
789
790 slot = clib_host_to_net_u16 (i);
791
792 if (p->dst_address_bits_of_leaves[slot] > 0)
793 {
794 FORMAT_PLY (s, p, slot, base_address, 16, 2);
795 }
Neale Rannsa3af3372017-03-28 03:49:52 -0700796 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700797 }
798
799 return s;
800}
Dave Barachd7cb1b52016-12-09 09:52:16 -0500801
Neale Ranns1ec36522017-11-29 05:20:37 -0800802/** Default heap size for the IPv4 mtries */
803#define IP4_FIB_DEFAULT_MTRIE_HEAP_SIZE (32<<20)
804
Neale Rannsa3af3372017-03-28 03:49:52 -0700805static clib_error_t *
806ip4_mtrie_module_init (vlib_main_t * vm)
807{
Neale Rannsa3af3372017-03-28 03:49:52 -0700808 CLIB_UNUSED (ip4_fib_mtrie_8_ply_t * p);
Neale Ranns1ec36522017-11-29 05:20:37 -0800809 ip4_main_t *im = &ip4_main;
810 clib_error_t *error = NULL;
811 uword *old_heap;
Neale Rannsa3af3372017-03-28 03:49:52 -0700812
Neale Ranns1ec36522017-11-29 05:20:37 -0800813 if (0 == im->mtrie_heap_size)
814 im->mtrie_heap_size = IP4_FIB_DEFAULT_MTRIE_HEAP_SIZE;
815 im->mtrie_mheap = mheap_alloc (0, im->mtrie_heap_size);
816
817 /* Burn one ply so index 0 is taken */
818 old_heap = clib_mem_set_heap (ip4_main.mtrie_mheap);
Neale Rannsa3af3372017-03-28 03:49:52 -0700819 pool_get (ip4_ply_pool, p);
Neale Ranns1ec36522017-11-29 05:20:37 -0800820 clib_mem_set_heap (old_heap);
Neale Rannsa3af3372017-03-28 03:49:52 -0700821
Neale Ranns1ec36522017-11-29 05:20:37 -0800822 return (error);
Neale Rannsa3af3372017-03-28 03:49:52 -0700823}
824
825VLIB_INIT_FUNCTION (ip4_mtrie_module_init);
826
Dave Barachd7cb1b52016-12-09 09:52:16 -0500827/*
828 * fd.io coding-style-patch-verification: ON
829 *
830 * Local Variables:
831 * eval: (c-set-style "gnu")
832 * End:
833 */