blob: ed4a0d9f44f9e0bb49b8a4e1af1706436c6ead3a [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 * ip/ip4_fib.h: ip4 mtrie fib
17 *
18 * Copyright (c) 2012 Eliot Dresselhaus
19 *
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
27 *
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
30 *
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40#include <vnet/ip/ip.h>
41
42static void
43ply_init (ip4_fib_mtrie_ply_t * p, ip4_fib_mtrie_leaf_t init, uword prefix_len)
44{
45 p->n_non_empty_leafs = ip4_fib_mtrie_leaf_is_empty (init) ? 0 : ARRAY_LEN (p->leaves);
46 memset (p->dst_address_bits_of_leaves, prefix_len, sizeof (p->dst_address_bits_of_leaves));
47
48 /* Initialize leaves. */
49#ifdef CLIB_HAVE_VEC128
50 {
51 u32x4 * l, init_x4;
52
53#ifndef __ALTIVEC__
54 init_x4 = u32x4_splat (init);
55#else
56 {
57 u32x4_union_t y;
58 y.as_u32[0] = init;
59 y.as_u32[1] = init;
60 y.as_u32[2] = init;
61 y.as_u32[3] = init;
62 init_x4 = y.as_u32x4;
63 }
64#endif
65
66 for (l = p->leaves_as_u32x4; l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); l += 4)
67 {
68 l[0] = init_x4;
69 l[1] = init_x4;
70 l[2] = init_x4;
71 l[3] = init_x4;
72 }
73 }
74#else
75 {
76 u32 * l;
77
78 for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4)
79 {
80 l[0] = init;
81 l[1] = init;
82 l[2] = init;
83 l[3] = init;
84 }
85 }
86#endif
87}
88
89static ip4_fib_mtrie_leaf_t
90ply_create (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t init_leaf, uword prefix_len)
91{
92 ip4_fib_mtrie_ply_t * p;
93
94 /* Get cache aligned ply. */
95 pool_get_aligned (m->ply_pool, p, sizeof (p[0]));
96
97 ply_init (p, init_leaf, prefix_len);
98 return ip4_fib_mtrie_leaf_set_next_ply_index (p - m->ply_pool);
99}
100
101always_inline ip4_fib_mtrie_ply_t *
102get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
103{
104 uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
105 /* It better not be the root ply. */
106 ASSERT (n != 0);
107 return pool_elt_at_index (m->ply_pool, n);
108}
109
110static void
111ply_free (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
112{
113 uword i, is_root;
114
115 is_root = p - m->ply_pool == 0;
116
117 for (i = 0 ; i < ARRAY_LEN (p->leaves); i++)
118 {
119 ip4_fib_mtrie_leaf_t l = p->leaves[i];
120 if (ip4_fib_mtrie_leaf_is_next_ply (l))
121 ply_free (m, get_next_ply_for_leaf (m, l));
122 }
123
124 if (is_root)
125 ply_init (p, IP4_FIB_MTRIE_LEAF_EMPTY, /* prefix_len */ 0);
126 else
127 pool_put (m->ply_pool, p);
128}
129
130void ip4_fib_free (ip4_fib_mtrie_t * m)
131{
132 ip4_fib_mtrie_ply_t * root_ply = pool_elt_at_index (m->ply_pool, 0);
133 ply_free (m, root_ply);
134}
135
136u32 ip4_mtrie_lookup_address (ip4_fib_mtrie_t * m, ip4_address_t dst)
137{
138 ip4_fib_mtrie_ply_t * p = pool_elt_at_index (m->ply_pool, 0);
139 ip4_fib_mtrie_leaf_t l;
140
141 l = p->leaves[dst.as_u8[0]];
142 if (ip4_fib_mtrie_leaf_is_terminal (l))
143 return ip4_fib_mtrie_leaf_get_adj_index (l);
144
145 p = get_next_ply_for_leaf (m, l);
146 l = p->leaves[dst.as_u8[1]];
147 if (ip4_fib_mtrie_leaf_is_terminal (l))
148 return ip4_fib_mtrie_leaf_get_adj_index (l);
149
150 p = get_next_ply_for_leaf (m, l);
151 l = p->leaves[dst.as_u8[2]];
152 if (ip4_fib_mtrie_leaf_is_terminal (l))
153 return ip4_fib_mtrie_leaf_get_adj_index (l);
154
155 p = get_next_ply_for_leaf (m, l);
156 l = p->leaves[dst.as_u8[3]];
157
158 ASSERT (ip4_fib_mtrie_leaf_is_terminal (l));
159 return ip4_fib_mtrie_leaf_get_adj_index (l);
160}
161
162typedef struct {
163 ip4_address_t dst_address;
164 u32 dst_address_length;
165 u32 adj_index;
166} ip4_fib_mtrie_set_unset_leaf_args_t;
167
168static void
169set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
170 ip4_fib_mtrie_ply_t * ply,
171 ip4_fib_mtrie_leaf_t new_leaf,
172 uword new_leaf_dst_address_bits)
173{
174 ip4_fib_mtrie_leaf_t old_leaf;
175 uword i;
176
177 ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
178 ASSERT (! ip4_fib_mtrie_leaf_is_empty (new_leaf));
179
180 for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
181 {
182 old_leaf = ply->leaves[i];
183
184 /* Recurse into sub plies. */
185 if (! ip4_fib_mtrie_leaf_is_terminal (old_leaf))
186 {
187 ip4_fib_mtrie_ply_t * sub_ply = get_next_ply_for_leaf (m, old_leaf);
188 set_ply_with_more_specific_leaf (m, sub_ply, new_leaf, new_leaf_dst_address_bits);
189 }
190
191 /* Replace less specific terminal leaves with new leaf. */
192 else if (new_leaf_dst_address_bits >= ply->dst_address_bits_of_leaves[i])
193 {
194 ply->leaves[i] = new_leaf;
195 ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
196 ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf);
197 }
198 }
199}
200
201static void
202set_leaf (ip4_fib_mtrie_t * m,
203 ip4_fib_mtrie_set_unset_leaf_args_t * a,
204 u32 old_ply_index,
205 u32 dst_address_byte_index)
206{
207 ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
208 i32 n_dst_bits_next_plies;
209 u8 dst_byte;
210
211 ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32);
212 ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
213
214 n_dst_bits_next_plies = a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
215
216 dst_byte = a->dst_address.as_u8[dst_address_byte_index];
217
218 /* Number of bits next plies <= 0 => insert leaves this ply. */
219 if (n_dst_bits_next_plies <= 0)
220 {
221 uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
222
223 n_dst_bits_this_ply = -n_dst_bits_next_plies;
224 ASSERT ((a->dst_address.as_u8[dst_address_byte_index] & pow2_mask (n_dst_bits_this_ply)) == 0);
225
226 for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
227 {
228 ip4_fib_mtrie_ply_t * old_ply, * new_ply;
229
230 old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
231
232 old_leaf = old_ply->leaves[i];
233 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
234
235 /* Is leaf to be inserted more specific? */
236 if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
237 {
238 new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
239
240 if (old_leaf_is_terminal)
241 {
242 old_ply->dst_address_bits_of_leaves[i] = a->dst_address_length;
243 old_ply->leaves[i] = new_leaf;
244 old_ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_empty (old_leaf);
245 ASSERT (old_ply->n_non_empty_leafs <= ARRAY_LEN (old_ply->leaves));
246 }
247 else
248 {
249 /* Existing leaf points to another ply. We need to place new_leaf into all
250 more specific slots. */
251 new_ply = get_next_ply_for_leaf (m, old_leaf);
252 set_ply_with_more_specific_leaf (m, new_ply, new_leaf, a->dst_address_length);
253 }
254 }
255
256 else if (! old_leaf_is_terminal)
257 {
258 new_ply = get_next_ply_for_leaf (m, old_leaf);
259 set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1);
260 }
261 }
262 }
263 else
264 {
265 ip4_fib_mtrie_ply_t * old_ply, * new_ply;
266
267 old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
268 old_leaf = old_ply->leaves[dst_byte];
269 if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
270 {
271 new_leaf = ply_create (m, old_leaf, old_ply->dst_address_bits_of_leaves[dst_byte]);
272 new_ply = get_next_ply_for_leaf (m, new_leaf);
273
274 /* Refetch since ply_create may move pool. */
275 old_ply = pool_elt_at_index (m->ply_pool, old_ply_index);
276
277 old_ply->leaves[dst_byte] = new_leaf;
278 old_ply->dst_address_bits_of_leaves[dst_byte] = 0;
279
280 old_ply->n_non_empty_leafs -= ip4_fib_mtrie_leaf_is_non_empty (old_leaf);
281 ASSERT (old_ply->n_non_empty_leafs >= 0);
282
283 /* Account for the ply we just created. */
284 old_ply->n_non_empty_leafs += 1;
285 }
286 else
287 new_ply = get_next_ply_for_leaf (m, old_leaf);
288
289 set_leaf (m, a, new_ply - m->ply_pool, dst_address_byte_index + 1);
290 }
291}
292
293static uword
294unset_leaf (ip4_fib_mtrie_t * m,
295 ip4_fib_mtrie_set_unset_leaf_args_t * a,
296 ip4_fib_mtrie_ply_t * old_ply,
297 u32 dst_address_byte_index)
298{
299 ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
300 i32 n_dst_bits_next_plies;
301 uword i, n_dst_bits_this_ply, old_leaf_is_terminal;
302 u8 dst_byte;
303
304 ASSERT (a->dst_address_length > 0 && a->dst_address_length <= 32);
305 ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
306
307 n_dst_bits_next_plies = a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
308
309 dst_byte = a->dst_address.as_u8[dst_address_byte_index];
310 if (n_dst_bits_next_plies < 0)
311 dst_byte &= ~pow2_mask (-n_dst_bits_next_plies);
312
313 n_dst_bits_this_ply = n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0;
314 n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply);
315
316 del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
317
318 for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
319 {
320 old_leaf = old_ply->leaves[i];
321 old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
322
323 if (old_leaf == del_leaf
324 || (! old_leaf_is_terminal
325 && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), dst_address_byte_index + 1)))
326 {
327 old_ply->leaves[i] = IP4_FIB_MTRIE_LEAF_EMPTY;
328 old_ply->dst_address_bits_of_leaves[i] = 0;
329
330 /* No matter what we just deleted a non-empty leaf. */
331 ASSERT (! ip4_fib_mtrie_leaf_is_empty (old_leaf));
332 old_ply->n_non_empty_leafs -= 1;
333
334 ASSERT (old_ply->n_non_empty_leafs >= 0);
335 if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
336 {
337 pool_put (m->ply_pool, old_ply);
338 /* Old ply was deleted. */
339 return 1;
340 }
341 }
342 }
343
344 /* Old ply was not deleted. */
345 return 0;
346}
347
348void ip4_mtrie_init (ip4_fib_mtrie_t * m)
349{
350 ip4_fib_mtrie_leaf_t root;
351 memset (m, 0, sizeof (m[0]));
352 m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY;
353 root = ply_create (m, IP4_FIB_MTRIE_LEAF_EMPTY, /* dst_address_bits_of_leaves */ 0);
354 ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (root) == 0);
355}
356
357void
358ip4_fib_mtrie_add_del_route (ip4_fib_t * fib,
359 ip4_address_t dst_address,
360 u32 dst_address_length,
361 u32 adj_index,
362 u32 is_del)
363{
364 ip4_fib_mtrie_t * m = &fib->mtrie;
365 ip4_fib_mtrie_ply_t * root_ply;
366 ip4_fib_mtrie_set_unset_leaf_args_t a;
367 ip4_main_t * im = &ip4_main;
368
369 ASSERT(m->ply_pool != 0);
370
371 root_ply = pool_elt_at_index (m->ply_pool, 0);
372
373 /* Honor dst_address_length. Fib masks are in network byte order */
374 dst_address.as_u32 &= im->fib_masks[dst_address_length];
375 a.dst_address = dst_address;
376 a.dst_address_length = dst_address_length;
377 a.adj_index = adj_index;
378
379 if (! is_del)
380 {
381 if (dst_address_length == 0)
382 m->default_leaf = ip4_fib_mtrie_leaf_set_adj_index (adj_index);
383 else
384 set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
385 }
386 else
387 {
388 if (dst_address_length == 0)
389 m->default_leaf = IP4_FIB_MTRIE_LEAF_EMPTY;
390
391 else
392 {
393 ip4_main_t * im = &ip4_main;
394 uword i;
395
396 unset_leaf (m, &a, root_ply, 0);
397
398 /* Find next less specific route and insert into mtrie. */
399 for (i = ARRAY_LEN (fib->adj_index_by_dst_address) - 1; i >= 1; i--)
400 {
401 uword * p;
402 ip4_address_t key;
403
404 if (! fib->adj_index_by_dst_address[i])
405 continue;
406
407 key.as_u32 = dst_address.as_u32 & im->fib_masks[i];
408 p = hash_get (fib->adj_index_by_dst_address[i], key.as_u32);
409 if (p)
410 {
411 a.dst_address = key;
412 a.dst_address_length = i;
413 a.adj_index = p[0];
414 set_leaf (m, &a, /* ply_index */ 0, /* dst_address_byte_index */ 0);
415 break;
416 }
417 }
418 }
419 }
420}
421
422always_inline uword
423maybe_remap_leaf (ip_lookup_main_t * lm, ip4_fib_mtrie_leaf_t * p)
424{
425 ip4_fib_mtrie_leaf_t l = p[0];
426 uword was_remapped_to_empty_leaf = 0;
427 if (ip4_fib_mtrie_leaf_is_terminal (l))
428 {
429 u32 adj_index = ip4_fib_mtrie_leaf_get_adj_index (l);
430 u32 m = vec_elt (lm->adjacency_remap_table, adj_index);
431 if (m)
432 {
433 was_remapped_to_empty_leaf = m == ~0;
434 if (was_remapped_to_empty_leaf)
435 p[0] = (was_remapped_to_empty_leaf
436 ? IP4_FIB_MTRIE_LEAF_EMPTY
437 : ip4_fib_mtrie_leaf_set_adj_index (m - 1));
438 }
439 }
440 return was_remapped_to_empty_leaf;
441}
442
443static void maybe_remap_ply (ip_lookup_main_t * lm, ip4_fib_mtrie_ply_t * ply)
444{
445 u32 n_remapped_to_empty = 0;
446 u32 i;
447 for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
448 n_remapped_to_empty += maybe_remap_leaf (lm, &ply->leaves[i]);
449 if (n_remapped_to_empty > 0)
450 {
451 ASSERT (n_remapped_to_empty <= ply->n_non_empty_leafs);
452 ply->n_non_empty_leafs -= n_remapped_to_empty;
453 if (ply->n_non_empty_leafs == 0)
454 os_panic ();
455 }
456}
457
458void ip4_mtrie_maybe_remap_adjacencies (ip_lookup_main_t * lm, ip4_fib_mtrie_t * m)
459{
460 ip4_fib_mtrie_ply_t * ply;
461 pool_foreach (ply, m->ply_pool, maybe_remap_ply (lm, ply));
462 maybe_remap_leaf (lm, &m->default_leaf);
463}
464
465/* Returns number of bytes of memory used by mtrie. */
466static uword mtrie_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_ply_t * p)
467{
468 uword bytes, i;
469
470 if (! p)
471 {
472 if (pool_is_free_index (m->ply_pool, 0))
473 return 0;
474 p = pool_elt_at_index (m->ply_pool, 0);
475 }
476
477 bytes = sizeof (p[0]);
478 for (i = 0 ; i < ARRAY_LEN (p->leaves); i++)
479 {
480 ip4_fib_mtrie_leaf_t l = p->leaves[i];
481 if (ip4_fib_mtrie_leaf_is_next_ply (l))
482 bytes += mtrie_memory_usage (m, get_next_ply_for_leaf (m, l));
483 }
484
485 return bytes;
486}
487
488static u8 * format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
489{
490 ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
491
492 if (ip4_fib_mtrie_leaf_is_empty (l))
493 s = format (s, "miss");
494 else if (ip4_fib_mtrie_leaf_is_terminal (l))
495 s = format (s, "adj %d", ip4_fib_mtrie_leaf_get_adj_index (l));
496 else
497 s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
498 return s;
499}
500
501static u8 * format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
502{
503 ip4_fib_mtrie_t * m = va_arg (*va, ip4_fib_mtrie_t *);
504 u32 base_address = va_arg (*va, u32);
505 u32 ply_index = va_arg (*va, u32);
506 u32 dst_address_byte_index = va_arg (*va, u32);
507 ip4_fib_mtrie_ply_t * p;
508 uword i, indent;
509
510 p = pool_elt_at_index (m->ply_pool, ply_index);
511 indent = format_get_indent (s);
512 s = format (s, "ply index %d, %d non-empty leaves", ply_index, p->n_non_empty_leafs);
513 for (i = 0; i < ARRAY_LEN (p->leaves); i++)
514 {
515 ip4_fib_mtrie_leaf_t l = p->leaves[i];
516
517 if (! ip4_fib_mtrie_leaf_is_empty (l))
518 {
519 u32 a, ia_length;
520 ip4_address_t ia;
521
522 a = base_address + (i << (24 - 8*dst_address_byte_index));
523 ia.as_u32 = clib_host_to_net_u32 (a);
524 if (ip4_fib_mtrie_leaf_is_terminal (l))
525 ia_length = p->dst_address_bits_of_leaves[i];
526 else
527 ia_length = 8*(1 + dst_address_byte_index);
528 s = format (s, "\n%U%20U %U",
529 format_white_space, indent + 2,
530 format_ip4_address_and_length, &ia, ia_length,
531 format_ip4_fib_mtrie_leaf, l);
532
533 if (ip4_fib_mtrie_leaf_is_next_ply (l))
534 s = format (s, "\n%U%U",
535 format_white_space, indent + 2,
536 format_ip4_fib_mtrie_ply, m, a,
537 ip4_fib_mtrie_leaf_get_next_ply_index (l),
538 dst_address_byte_index + 1);
539 }
540 }
541
542 return s;
543}
544
545u8 * format_ip4_fib_mtrie (u8 * s, va_list * va)
546{
547 ip4_fib_mtrie_t * m = va_arg (*va, ip4_fib_mtrie_t *);
548
549 s = format (s, "%d plies, memory usage %U",
550 pool_elts (m->ply_pool),
551 format_memory_size, mtrie_memory_usage (m, 0));
552
553 if (pool_elts (m->ply_pool) > 0)
554 {
555 ip4_address_t base_address;
556 base_address.as_u32 = 0;
557 s = format (s, "\n %U", format_ip4_fib_mtrie_ply, m, base_address, 0, 0);
558 }
559
560 return s;
561}