blob: e1987d567820730419b34868262156852204f689 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * ip/ip4_fib.h: ip4 mtrie fib
17 *
18 * Copyright (c) 2012 Eliot Dresselhaus
19 *
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40#include <vnet/ip/ip.h>
Neale Rannsa3af3372017-03-28 03:49:52 -070041#include <vnet/ip/ip4_mtrie.h>
42#include <vnet/fib/ip4_fib.h>
43
44
45/**
46 * Global pool of IPv4 8bit PLYs
47 */
48ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
Ed Warnickecb9cada2015-12-08 15:45:58 -070049
Neale Ranns04a75e32017-03-23 06:46:01 -070050always_inline u32
Neale Rannsa3af3372017-03-28 03:49:52 -070051ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte)
Ed Warnickecb9cada2015-12-08 15:45:58 -070052{
Neale Ranns04a75e32017-03-23 06:46:01 -070053 /*
54 * It's 'non-empty' if the length of the leaf stored is greater than the
55 * length of a leaf in the covering ply. i.e. the leaf is more specific
56 * than it's would be cover in the covering ply
57 */
58 if (p->dst_address_bits_of_leaves[dst_byte] > p->dst_address_bits_base)
59 return (1);
60 return (0);
61}
62
63always_inline ip4_fib_mtrie_leaf_t
64ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index)
65{
66 ip4_fib_mtrie_leaf_t l;
67 l = 1 + 2 * adj_index;
68 ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index);
69 return l;
70}
71
72always_inline u32
73ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n)
74{
75 return (n & 1) == 0;
76}
77
78always_inline u32
79ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n)
80{
81 ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n));
82 return n >> 1;
83}
84
85always_inline ip4_fib_mtrie_leaf_t
86ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
87{
88 ip4_fib_mtrie_leaf_t l;
89 l = 0 + 2 * i;
90 ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i);
91 return l;
92}
93
Neale Rannsa3af3372017-03-28 03:49:52 -070094#ifndef __ALTIVEC__
95#define PLY_X4_SPLAT_INIT(init_x4, init) \
96 init_x4 = u32x4_splat (init);
97#else
98#define PLY_X4_SPLAT_INIT(init_x4, init) \
99{ \
100 u32x4_union_t y; \
101 y.as_u32[0] = init; \
102 y.as_u32[1] = init; \
103 y.as_u32[2] = init; \
104 y.as_u32[3] = init; \
105 init_x4 = y.as_u32x4; \
106}
107#endif
108
109#ifdef CLIB_HAVE_VEC128
110#define PLY_INIT_LEAVES(p) \
111{ \
112 u32x4 *l, init_x4; \
113 \
114 PLY_X4_SPLAT_INIT(init_x4, init); \
115 for (l = p->leaves_as_u32x4; \
116 l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); \
117 l += 4) \
118 { \
119 l[0] = init_x4; \
120 l[1] = init_x4; \
121 l[2] = init_x4; \
122 l[3] = init_x4; \
123 } \
124}
125#else
126#define PLY_INIT_LEAVES(p) \
127{ \
128 u32 *l; \
129 \
130 for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) \
131 { \
132 l[0] = init; \
133 l[1] = init; \
134 l[2] = init; \
135 l[3] = init; \
136 } \
137}
138#endif
139
140#define PLY_INIT(p, init, prefix_len, ply_base_len) \
141{ \
142 /* \
143 * A leaf is 'empty' if it represents a leaf from the covering PLY \
144 * i.e. if the prefix length of the leaf is less than or equal to \
145 * the prefix length of the PLY \
146 */ \
147 p->n_non_empty_leafs = (prefix_len > ply_base_len ? \
148 ARRAY_LEN (p->leaves) : 0); \
149 memset (p->dst_address_bits_of_leaves, prefix_len, \
150 sizeof (p->dst_address_bits_of_leaves)); \
151 p->dst_address_bits_base = ply_base_len; \
152 \
153 /* Initialize leaves. */ \
154 PLY_INIT_LEAVES(p); \
155}
156
Neale Ranns04a75e32017-03-23 06:46:01 -0700157static void
Neale Rannsa3af3372017-03-28 03:49:52 -0700158ply_8_init (ip4_fib_mtrie_8_ply_t * p,
159 ip4_fib_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len)
Neale Ranns04a75e32017-03-23 06:46:01 -0700160{
Neale Rannsa3af3372017-03-28 03:49:52 -0700161 PLY_INIT (p, init, prefix_len, ply_base_len);
162}
163
164static void
165ply_16_init (ip4_fib_mtrie_16_ply_t * p,
166 ip4_fib_mtrie_leaf_t init, uword prefix_len)
167{
Dave Barachd7cb1b52016-12-09 09:52:16 -0500168 memset (p->dst_address_bits_of_leaves, prefix_len,
169 sizeof (p->dst_address_bits_of_leaves));
Neale Rannsa3af3372017-03-28 03:49:52 -0700170 PLY_INIT_LEAVES (p);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700171}
172
173static ip4_fib_mtrie_leaf_t
Neale Ranns04a75e32017-03-23 06:46:01 -0700174ply_create (ip4_fib_mtrie_t * m,
175 ip4_fib_mtrie_leaf_t init_leaf,
176 u32 leaf_prefix_len, u32 ply_base_len)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700177{
Neale Rannsa3af3372017-03-28 03:49:52 -0700178 ip4_fib_mtrie_8_ply_t *p;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700179
180 /* Get cache aligned ply. */
Neale Rannsa3af3372017-03-28 03:49:52 -0700181 pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700182
Neale Rannsa3af3372017-03-28 03:49:52 -0700183 ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len);
184 return ip4_fib_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700185}
186
Neale Rannsa3af3372017-03-28 03:49:52 -0700187always_inline ip4_fib_mtrie_8_ply_t *
Ed Warnickecb9cada2015-12-08 15:45:58 -0700188get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
189{
190 uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700191
Neale Rannsa3af3372017-03-28 03:49:52 -0700192 return pool_elt_at_index (ip4_ply_pool, n);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700193}
194
Dave Barachd7cb1b52016-12-09 09:52:16 -0500195void
Neale Rannsa3af3372017-03-28 03:49:52 -0700196ip4_mtrie_free (ip4_fib_mtrie_t * m)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700197{
Neale Rannsa3af3372017-03-28 03:49:52 -0700198 /* the root ply is embedded so the is nothing to do,
199 * the assumption being that the IP4 FIB table has emptied the trie
200 * before deletion.
201 */
202#if CLIB_DEBUG > 0
203 int i;
204 for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
205 {
206 ASSERT (!ip4_fib_mtrie_leaf_is_next_ply (m->root_ply.leaves[i]));
207 }
208#endif
209}
210
211void
212ip4_mtrie_init (ip4_fib_mtrie_t * m)
213{
214 ply_16_init (&m->root_ply, IP4_FIB_MTRIE_LEAF_EMPTY, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700215}
216
Dave Barachd7cb1b52016-12-09 09:52:16 -0500217typedef struct
218{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700219 ip4_address_t dst_address;
220 u32 dst_address_length;
221 u32 adj_index;
Neale Ranns04a75e32017-03-23 06:46:01 -0700222 u32 cover_address_length;
223 u32 cover_adj_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700224} ip4_fib_mtrie_set_unset_leaf_args_t;
225
226static void
227set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
Neale Rannsa3af3372017-03-28 03:49:52 -0700228 ip4_fib_mtrie_8_ply_t * ply,
Ed Warnickecb9cada2015-12-08 15:45:58 -0700229 ip4_fib_mtrie_leaf_t new_leaf,
230 uword new_leaf_dst_address_bits)
231{
232 ip4_fib_mtrie_leaf_t old_leaf;
233 uword i;
234
235 ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700236
237 for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
238 {
239 old_leaf = ply->leaves[i];
240
241 /* Recurse into sub plies. */
Dave Barachd7cb1b52016-12-09 09:52:16 -0500242 if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700243 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700244 ip4_fib_mtrie_8_ply_t *sub_ply =
245 get_next_ply_for_leaf (m, old_leaf);
Dave Barachd7cb1b52016-12-09 09:52:16 -0500246 set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
247 new_leaf_dst_address_bits);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700248 }
249
250 /* Replace less specific terminal leaves with new leaf. */
Dave Barachd7cb1b52016-12-09 09:52:16 -0500251 else if (new_leaf_dst_address_bits >=
252 ply->dst_address_bits_of_leaves[i])
Ed Warnickecb9cada2015-12-08 15:45:58 -0700253 {
Dave Barachd7cb1b52016-12-09 09:52:16 -0500254 __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf);
255 ASSERT (ply->leaves[i] == new_leaf);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700256 ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
Neale Ranns04a75e32017-03-23 06:46:01 -0700257 ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_non_empty (ply, i);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700258 }
259 }
260}
261
262static void
263set_leaf (ip4_fib_mtrie_t * m,
Neale Rannsa3af3372017-03-28 03:49:52 -0700264 const ip4_fib_mtrie_set_unset_leaf_args_t * a,
Dave Barachd7cb1b52016-12-09 09:52:16 -0500265 u32 old_ply_index, u32 dst_address_byte_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700266{
267 ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
268 i32 n_dst_bits_next_plies;
269 u8 dst_byte;
Neale Rannsa3af3372017-03-28 03:49:52 -0700270 ip4_fib_mtrie_8_ply_t *old_ply;
271
272 old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700273
Neale Rannsf0609302017-04-11 09:13:39 -0700274 ASSERT (a->dst_address_length <= 32);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700275 ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
276
Neale Rannsa3af3372017-03-28 03:49:52 -0700277 /* how many bits of the destination address are in the next PLY */
Dave Barachd7cb1b52016-12-09 09:52:16 -0500278 n_dst_bits_next_plies =
279 a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700280
281 dst_byte = a->dst_address.as_u8[dst_address_byte_index];
282
283 /* Number of bits next plies <= 0 => insert leaves this ply. */
284 if (n_dst_bits_next_plies <= 0)
285 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700286 /* The mask length of the address to insert maps to this ply */
Neale Rannsf0609302017-04-11 09:13:39 -0700287 uword i, old_leaf_is_terminal;
288 u32 n_dst_bits_this_ply;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700289
Neale Rannsa3af3372017-03-28 03:49:52 -0700290 /* The number of bits, and hence slots/buckets, we will fill */
Neale Ranns04a75e32017-03-23 06:46:01 -0700291 n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies);
Dave Barachd7cb1b52016-12-09 09:52:16 -0500292 ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
293 pow2_mask (n_dst_bits_this_ply)) == 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700294
Neale Rannsa3af3372017-03-28 03:49:52 -0700295 /* Starting at the value of the byte at this section of the v4 address
296 * fill the buckets/slots of the ply */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700297 for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
298 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700299 ip4_fib_mtrie_8_ply_t *new_ply;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700300
301 old_leaf = old_ply->leaves[i];
302 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
303
Ed Warnickecb9cada2015-12-08 15:45:58 -0700304 if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
305 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700306 /* The new leaf is more or equally specific than the one currently
307 * occupying the slot */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700308 new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
309
310 if (old_leaf_is_terminal)
311 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700312 /* The current leaf is terminal, we can replace it with
313 * the new one */
Neale Ranns04a75e32017-03-23 06:46:01 -0700314 old_ply->n_non_empty_leafs -=
315 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
Neale Rannsa3af3372017-03-28 03:49:52 -0700316
Dave Barachd7cb1b52016-12-09 09:52:16 -0500317 old_ply->dst_address_bits_of_leaves[i] =
318 a->dst_address_length;
319 __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf,
320 new_leaf);
321 ASSERT (old_ply->leaves[i] == new_leaf);
Neale Ranns04a75e32017-03-23 06:46:01 -0700322
Dave Barachd7cb1b52016-12-09 09:52:16 -0500323 old_ply->n_non_empty_leafs +=
Neale Ranns04a75e32017-03-23 06:46:01 -0700324 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
Dave Barachd7cb1b52016-12-09 09:52:16 -0500325 ASSERT (old_ply->n_non_empty_leafs <=
326 ARRAY_LEN (old_ply->leaves));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700327 }
328 else
329 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700330 /* Existing leaf points to another ply. We need to place
331 * new_leaf into all more specific slots. */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700332 new_ply = get_next_ply_for_leaf (m, old_leaf);
Dave Barachd7cb1b52016-12-09 09:52:16 -0500333 set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
334 a->dst_address_length);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700335 }
336 }
Dave Barachd7cb1b52016-12-09 09:52:16 -0500337 else if (!old_leaf_is_terminal)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700338 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700339 /* The current leaf is less specific and not termial (i.e. a ply),
340 * recurse on down the trie */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700341 new_ply = get_next_ply_for_leaf (m, old_leaf);
Neale Rannsa3af3372017-03-28 03:49:52 -0700342 set_leaf (m, a, new_ply - ip4_ply_pool,
Dave Barachd7cb1b52016-12-09 09:52:16 -0500343 dst_address_byte_index + 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700344 }
Neale Rannsa3af3372017-03-28 03:49:52 -0700345 /*
346 * else
347 * the route we are adding is less specific than the leaf currently
348 * occupying this slot. leave it there
349 */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700350 }
351 }
352 else
353 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700354 /* The address to insert requires us to move down at a lower level of
355 * the trie - recurse on down */
356 ip4_fib_mtrie_8_ply_t *new_ply;
Neale Ranns04a75e32017-03-23 06:46:01 -0700357 u8 ply_base_len;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700358
Neale Ranns04a75e32017-03-23 06:46:01 -0700359 ply_base_len = 8 * (dst_address_byte_index + 1);
Neale Rannsa3af3372017-03-28 03:49:52 -0700360
Ed Warnickecb9cada2015-12-08 15:45:58 -0700361 old_leaf = old_ply->leaves[dst_byte];
Neale Rannsa3af3372017-03-28 03:49:52 -0700362
Ed Warnickecb9cada2015-12-08 15:45:58 -0700363 if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
364 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700365 /* There is a leaf occupying the slot. Replace it with a new ply */
Neale Ranns04a75e32017-03-23 06:46:01 -0700366 old_ply->n_non_empty_leafs -=
367 ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
368
369 new_leaf = ply_create (m, old_leaf,
370 clib_max (old_ply->dst_address_bits_of_leaves
371 [dst_byte], ply_base_len),
372 ply_base_len);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700373 new_ply = get_next_ply_for_leaf (m, new_leaf);
374
375 /* Refetch since ply_create may move pool. */
Neale Rannsa3af3372017-03-28 03:49:52 -0700376 old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700377
Dave Barachd7cb1b52016-12-09 09:52:16 -0500378 __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
379 new_leaf);
380 ASSERT (old_ply->leaves[dst_byte] == new_leaf);
Neale Ranns04a75e32017-03-23 06:46:01 -0700381 old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700382
Neale Rannsa3af3372017-03-28 03:49:52 -0700383 old_ply->n_non_empty_leafs +=
384 ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
Neale Ranns04a75e32017-03-23 06:46:01 -0700385 ASSERT (old_ply->n_non_empty_leafs >= 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700386 }
387 else
388 new_ply = get_next_ply_for_leaf (m, old_leaf);
389
Neale Rannsa3af3372017-03-28 03:49:52 -0700390 set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
391 }
392}
393
394static void
395set_root_leaf (ip4_fib_mtrie_t * m,
396 const ip4_fib_mtrie_set_unset_leaf_args_t * a)
397{
398 ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
399 ip4_fib_mtrie_16_ply_t *old_ply;
400 i32 n_dst_bits_next_plies;
401 u16 dst_byte;
402
403 old_ply = &m->root_ply;
404
Neale Rannsf0609302017-04-11 09:13:39 -0700405 ASSERT (a->dst_address_length <= 32);
Neale Rannsa3af3372017-03-28 03:49:52 -0700406
407 /* how many bits of the destination address are in the next PLY */
408 n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
409
410 dst_byte = a->dst_address.as_u16[0];
411
412 /* Number of bits next plies <= 0 => insert leaves this ply. */
413 if (n_dst_bits_next_plies <= 0)
414 {
415 /* The mask length of the address to insert maps to this ply */
Neale Rannsf0609302017-04-11 09:13:39 -0700416 uword i, old_leaf_is_terminal;
417 u32 n_dst_bits_this_ply;
Neale Rannsa3af3372017-03-28 03:49:52 -0700418
419 /* The number of bits, and hence slots/buckets, we will fill */
420 n_dst_bits_this_ply = 16 - a->dst_address_length;
421 ASSERT ((clib_host_to_net_u16 (a->dst_address.as_u16[0]) &
422 pow2_mask (n_dst_bits_this_ply)) == 0);
423
424 /* Starting at the value of the byte at this section of the v4 address
425 * fill the buckets/slots of the ply */
426 for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
427 {
428 ip4_fib_mtrie_8_ply_t *new_ply;
429 u16 slot;
430
431 slot = clib_net_to_host_u16 (dst_byte);
432 slot += i;
433 slot = clib_host_to_net_u16 (slot);
434
435 old_leaf = old_ply->leaves[slot];
436 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
437
438 if (a->dst_address_length >=
439 old_ply->dst_address_bits_of_leaves[slot])
440 {
441 /* The new leaf is more or equally specific than the one currently
442 * occupying the slot */
443 new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
444
445 if (old_leaf_is_terminal)
446 {
447 /* The current leaf is terminal, we can replace it with
448 * the new one */
449 old_ply->dst_address_bits_of_leaves[slot] =
450 a->dst_address_length;
451 __sync_val_compare_and_swap (&old_ply->leaves[slot],
452 old_leaf, new_leaf);
453 ASSERT (old_ply->leaves[slot] == new_leaf);
454 }
455 else
456 {
457 /* Existing leaf points to another ply. We need to place
458 * new_leaf into all more specific slots. */
459 new_ply = get_next_ply_for_leaf (m, old_leaf);
460 set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
461 a->dst_address_length);
462 }
463 }
464 else if (!old_leaf_is_terminal)
465 {
466 /* The current leaf is less specific and not termial (i.e. a ply),
467 * recurse on down the trie */
468 new_ply = get_next_ply_for_leaf (m, old_leaf);
469 set_leaf (m, a, new_ply - ip4_ply_pool, 2);
470 }
471 /*
472 * else
473 * the route we are adding is less specific than the leaf currently
474 * occupying this slot. leave it there
475 */
476 }
477 }
478 else
479 {
480 /* The address to insert requires us to move down at a lower level of
481 * the trie - recurse on down */
482 ip4_fib_mtrie_8_ply_t *new_ply;
483 u8 ply_base_len;
484
485 ply_base_len = 16;
486
487 old_leaf = old_ply->leaves[dst_byte];
488
489 if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
490 {
491 /* There is a leaf occupying the slot. Replace it with a new ply */
492 new_leaf = ply_create (m, old_leaf,
493 clib_max (old_ply->dst_address_bits_of_leaves
494 [dst_byte], ply_base_len),
495 ply_base_len);
496 new_ply = get_next_ply_for_leaf (m, new_leaf);
497
498 __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
499 new_leaf);
500 ASSERT (old_ply->leaves[dst_byte] == new_leaf);
501 old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
502 }
503 else
504 new_ply = get_next_ply_for_leaf (m, old_leaf);
505
506 set_leaf (m, a, new_ply - ip4_ply_pool, 2);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700507 }
508}
509
510static uword
511unset_leaf (ip4_fib_mtrie_t * m,
Neale Rannsa3af3372017-03-28 03:49:52 -0700512 const ip4_fib_mtrie_set_unset_leaf_args_t * a,
513 ip4_fib_mtrie_8_ply_t * old_ply, u32 dst_address_byte_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700514{
515 ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
516 i32 n_dst_bits_next_plies;
Dave Barach6f6f34f2016-08-08 13:05:31 -0400517 i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700518 u8 dst_byte;
519
Neale Rannsf0609302017-04-11 09:13:39 -0700520 ASSERT (a->dst_address_length <= 32);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700521 ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
522
Dave Barachd7cb1b52016-12-09 09:52:16 -0500523 n_dst_bits_next_plies =
524 a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700525
526 dst_byte = a->dst_address.as_u8[dst_address_byte_index];
527 if (n_dst_bits_next_plies < 0)
528 dst_byte &= ~pow2_mask (-n_dst_bits_next_plies);
529
Dave Barachd7cb1b52016-12-09 09:52:16 -0500530 n_dst_bits_this_ply =
531 n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700532 n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply);
533
534 del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
535
536 for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
537 {
538 old_leaf = old_ply->leaves[i];
539 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
540
541 if (old_leaf == del_leaf
Dave Barachd7cb1b52016-12-09 09:52:16 -0500542 || (!old_leaf_is_terminal
543 && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf),
544 dst_address_byte_index + 1)))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700545 {
Neale Ranns04a75e32017-03-23 06:46:01 -0700546 old_ply->n_non_empty_leafs -=
547 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700548
Neale Ranns04a75e32017-03-23 06:46:01 -0700549 old_ply->leaves[i] =
550 ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
551 old_ply->dst_address_bits_of_leaves[i] =
552 clib_max (old_ply->dst_address_bits_base,
553 a->cover_address_length);
554
555 old_ply->n_non_empty_leafs +=
556 ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700557
558 ASSERT (old_ply->n_non_empty_leafs >= 0);
559 if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
560 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700561 pool_put (ip4_ply_pool, old_ply);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700562 /* Old ply was deleted. */
563 return 1;
564 }
Neale Ranns04a75e32017-03-23 06:46:01 -0700565#if CLIB_DEBUG > 0
566 else if (dst_address_byte_index)
567 {
568 int ii, count = 0;
569 for (ii = 0; ii < ARRAY_LEN (old_ply->leaves); ii++)
570 {
571 count += ip4_fib_mtrie_leaf_is_non_empty (old_ply, ii);
572 }
573 ASSERT (count);
574 }
575#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700576 }
577 }
578
579 /* Old ply was not deleted. */
580 return 0;
581}
582
Neale Rannsa3af3372017-03-28 03:49:52 -0700583static void
584unset_root_leaf (ip4_fib_mtrie_t * m,
585 const ip4_fib_mtrie_set_unset_leaf_args_t * a)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700586{
Neale Rannsa3af3372017-03-28 03:49:52 -0700587 ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
588 i32 n_dst_bits_next_plies;
589 i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
590 u16 dst_byte;
591 ip4_fib_mtrie_16_ply_t *old_ply;
592
Neale Rannsf0609302017-04-11 09:13:39 -0700593 ASSERT (a->dst_address_length <= 32);
Neale Rannsa3af3372017-03-28 03:49:52 -0700594
595 old_ply = &m->root_ply;
596 n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
597
598 dst_byte = a->dst_address.as_u16[0];
599
600 n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ?
601 (16 - a->dst_address_length) : 0);
602
603 del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
604
605 /* Starting at the value of the byte at this section of the v4 address
606 * fill the buckets/slots of the ply */
607 for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
608 {
609 u16 slot;
610
611 slot = clib_net_to_host_u16 (dst_byte);
612 slot += i;
613 slot = clib_host_to_net_u16 (slot);
614
615 old_leaf = old_ply->leaves[slot];
616 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
617
618 if (old_leaf == del_leaf
619 || (!old_leaf_is_terminal
620 && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2)))
621 {
622 old_ply->leaves[slot] =
623 ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
624 old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length;
625 }
626 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700627}
628
629void
Neale Rannsa3af3372017-03-28 03:49:52 -0700630ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
631 const ip4_address_t * dst_address,
632 u32 dst_address_length, u32 adj_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700633{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700634 ip4_fib_mtrie_set_unset_leaf_args_t a;
Dave Barachd7cb1b52016-12-09 09:52:16 -0500635 ip4_main_t *im = &ip4_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700636
Ed Warnickecb9cada2015-12-08 15:45:58 -0700637 /* Honor dst_address_length. Fib masks are in network byte order */
Neale Rannsa3af3372017-03-28 03:49:52 -0700638 a.dst_address.as_u32 = (dst_address->as_u32 &
639 im->fib_masks[dst_address_length]);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700640 a.dst_address_length = dst_address_length;
641 a.adj_index = adj_index;
642
Neale Rannsa3af3372017-03-28 03:49:52 -0700643 set_root_leaf (m, &a);
644}
Ed Warnickecb9cada2015-12-08 15:45:58 -0700645
Neale Rannsa3af3372017-03-28 03:49:52 -0700646void
647ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
648 const ip4_address_t * dst_address,
649 u32 dst_address_length,
650 u32 adj_index,
651 u32 cover_address_length, u32 cover_adj_index)
652{
653 ip4_fib_mtrie_set_unset_leaf_args_t a;
654 ip4_main_t *im = &ip4_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700655
Neale Rannsa3af3372017-03-28 03:49:52 -0700656 /* Honor dst_address_length. Fib masks are in network byte order */
657 a.dst_address.as_u32 = (dst_address->as_u32 &
658 im->fib_masks[dst_address_length]);
659 a.dst_address_length = dst_address_length;
660 a.adj_index = adj_index;
661 a.cover_adj_index = cover_adj_index;
662 a.cover_address_length = cover_address_length;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700663
Neale Rannsa3af3372017-03-28 03:49:52 -0700664 /* the top level ply is never removed */
665 unset_root_leaf (m, &a);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700666}
667
Ed Warnickecb9cada2015-12-08 15:45:58 -0700668/* Returns number of bytes of memory used by mtrie. */
Dave Barachd7cb1b52016-12-09 09:52:16 -0500669static uword
Neale Rannsa3af3372017-03-28 03:49:52 -0700670mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700671{
672 uword bytes, i;
673
Ed Warnickecb9cada2015-12-08 15:45:58 -0700674 bytes = sizeof (p[0]);
Dave Barachd7cb1b52016-12-09 09:52:16 -0500675 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700676 {
677 ip4_fib_mtrie_leaf_t l = p->leaves[i];
678 if (ip4_fib_mtrie_leaf_is_next_ply (l))
Neale Rannsa3af3372017-03-28 03:49:52 -0700679 bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
680 }
681
682 return bytes;
683}
684
685/* Returns number of bytes of memory used by mtrie. */
686static uword
687mtrie_memory_usage (ip4_fib_mtrie_t * m)
688{
689 uword bytes, i;
690
691 bytes = sizeof (*m);
692 for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
693 {
694 ip4_fib_mtrie_leaf_t l = m->root_ply.leaves[i];
695 if (ip4_fib_mtrie_leaf_is_next_ply (l))
696 bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700697 }
698
699 return bytes;
700}
701
Dave Barachd7cb1b52016-12-09 09:52:16 -0500702static u8 *
703format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700704{
705 ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
706
Neale Ranns04a75e32017-03-23 06:46:01 -0700707 if (ip4_fib_mtrie_leaf_is_terminal (l))
708 s = format (s, "lb-index %d", ip4_fib_mtrie_leaf_get_adj_index (l));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700709 else
710 s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
711 return s;
712}
713
Neale Rannsa3af3372017-03-28 03:49:52 -0700714#define FORMAT_PLY(s, _p, _i, _base_address, _ply_max_len, _indent) \
715({ \
716 u32 a, ia_length; \
717 ip4_address_t ia; \
718 ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)]; \
719 \
720 a = (_base_address) + ((_i) << (32 - (_ply_max_len))); \
721 ia.as_u32 = clib_host_to_net_u32 (a); \
722 ia_length = (_p)->dst_address_bits_of_leaves[(_i)]; \
723 s = format (s, "\n%U%20U %U", \
724 format_white_space, (_indent) + 2, \
725 format_ip4_address_and_length, &ia, ia_length, \
726 format_ip4_fib_mtrie_leaf, _l); \
727 \
728 if (ip4_fib_mtrie_leaf_is_next_ply (_l)) \
729 s = format (s, "\n%U%U", \
730 format_white_space, (_indent) + 2, \
731 format_ip4_fib_mtrie_ply, m, a, \
732 ip4_fib_mtrie_leaf_get_next_ply_index (_l)); \
733 s; \
734})
735
Dave Barachd7cb1b52016-12-09 09:52:16 -0500736static u8 *
737format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700738{
Dave Barachd7cb1b52016-12-09 09:52:16 -0500739 ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700740 u32 base_address = va_arg (*va, u32);
741 u32 ply_index = va_arg (*va, u32);
Neale Rannsa3af3372017-03-28 03:49:52 -0700742 ip4_fib_mtrie_8_ply_t *p;
743 uword indent;
744 int i;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700745
Neale Rannsa3af3372017-03-28 03:49:52 -0700746 p = pool_elt_at_index (ip4_ply_pool, ply_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700747 indent = format_get_indent (s);
Neale Rannsa3af3372017-03-28 03:49:52 -0700748 s = format (s, "ply index %d, %d non-empty leaves", ply_index,
749 p->n_non_empty_leafs);
750
Ed Warnickecb9cada2015-12-08 15:45:58 -0700751 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
752 {
Neale Ranns04a75e32017-03-23 06:46:01 -0700753 if (ip4_fib_mtrie_leaf_is_non_empty (p, i))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700754 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700755 FORMAT_PLY (s, p, i, base_address,
756 p->dst_address_bits_base + 8, indent);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700757 }
758 }
759
760 return s;
761}
762
Dave Barachd7cb1b52016-12-09 09:52:16 -0500763u8 *
764format_ip4_fib_mtrie (u8 * s, va_list * va)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700765{
Dave Barachd7cb1b52016-12-09 09:52:16 -0500766 ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
Neale Rannsa3af3372017-03-28 03:49:52 -0700767 ip4_fib_mtrie_16_ply_t *p;
768 u32 base_address = 0;
769 int i;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700770
Neale Rannsa3af3372017-03-28 03:49:52 -0700771 s = format (s, "%d plies, memory usage %U\n",
772 pool_elts (ip4_ply_pool),
773 format_memory_size, mtrie_memory_usage (m));
774 s = format (s, "root-ply");
775 p = &m->root_ply;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700776
Neale Rannsa3af3372017-03-28 03:49:52 -0700777 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700778 {
Neale Rannsa3af3372017-03-28 03:49:52 -0700779 u16 slot;
780
781 slot = clib_host_to_net_u16 (i);
782
783 if (p->dst_address_bits_of_leaves[slot] > 0)
784 {
785 FORMAT_PLY (s, p, slot, base_address, 16, 2);
786 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700787 }
788
789 return s;
790}
Dave Barachd7cb1b52016-12-09 09:52:16 -0500791
Neale Rannsa3af3372017-03-28 03:49:52 -0700792static clib_error_t *
793ip4_mtrie_module_init (vlib_main_t * vm)
794{
795 /* Burn one ply so index 0 is taken */
796 CLIB_UNUSED (ip4_fib_mtrie_8_ply_t * p);
797
798 pool_get (ip4_ply_pool, p);
799
800 return (NULL);
801}
802
803VLIB_INIT_FUNCTION (ip4_mtrie_module_init);
804
Dave Barachd7cb1b52016-12-09 09:52:16 -0500805/*
806 * fd.io coding-style-patch-verification: ON
807 *
808 * Local Variables:
809 * eval: (c-set-style "gnu")
810 * End:
811 */