blob: 84dd625c2259d7e21c3d54ec9b064ea4cf661123 [file] [log] [blame]
Neale Ranns8f5fef22020-12-21 08:29:34 +00001/*
2 *------------------------------------------------------------------
3 * ip_path_mtu.c
4 *
5 * Copyright (c) 2021 Graphiant.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
18 */
19
20#include <vnet/ip/ip_path_mtu.h>
21#include <vnet/ip/ip_frag.h>
22#include <vnet/adj/adj_delegate.h>
23#include <vnet/adj/adj_nbr.h>
24#include <vnet/fib/fib_table.h>
25#include <vnet/fib/fib_entry_track.h>
26
27#include <vnet/dpo/drop_dpo.h>
28
29/**
30 * Path MTU
31 *
32 * A path is a peer. A peer is known by an IP address (in a table).
33 * Insert a DPO in the forwarding chain for the peer to perform the
34 * fragmentation.
35 * For attached peers, all traffic will use the peer's adjacency, there
36 * is already an MTU chekc in the adjacency (for the link's MTU) so as an
37 * optimisation, instead of using a DPO, we add a delegate to the adjacency
38 * to set the adjacency's MTU to the path MTU.
39 */
40
41/**
42 * the logger
43 */
44static vlib_log_class_t ip_pmtu_logger;
45
46static adj_delegate_type_t ip_pmtu_adj_delegate_type;
47static fib_source_t ip_pmtu_source;
48
49/**
50 * DPO pool
51 */
52ip_pmtu_dpo_t *ip_pmtu_dpo_pool;
53
54/**
55 * DPO type registered for these GBP FWD
56 */
57static dpo_type_t ip_pmtu_dpo_type;
58
59/**
60 * Fib node type for the tracker
61 */
62static fib_node_type_t ip_pmtu_fib_type;
63
64/**
65 * Path MTU tracker pool
66 */
67ip_pmtu_t *ip_pmtu_pool;
68
69/**
70 * Delegate added to adjacencies to track path MTU
71 */
72typedef struct ip_path_mtu_adj_delegate_t_
73{
74 u16 pmtu;
75} ip_path_mtu_adj_delegate_t;
76
77static ip_path_mtu_adj_delegate_t *ip_path_mtu_adj_delegate_pool;
78
79/* DB of all FIB PMTU settings */
80typedef struct ip_pmtu_key_t_
81{
82 ip46_address_t nh;
83 u32 table_id;
84 fib_protocol_t fproto;
85} __clib_packed ip_pmtu_key_t;
86
87static uword *ip_pmtu_db;
88
89#define IP_PMTU_TRKR_DBG(_ipt, _fmt, _args...) \
90 { \
91 vlib_log_debug (ip_pmtu_logger, "[%U]: " _fmt ": ", format_ip_pmtu, \
92 _ipt - ip_pmtu_pool, ##_args); \
93 }
94#define IP_PMTU_DBG(_fmt, _args...) \
95 { \
96 vlib_log_debug (ip_pmtu_logger, _fmt ": ", ##_args); \
97 }
98
99static u8 *
100format_ip_pmtu_flags (u8 *s, va_list *ap)
101{
102 ip_pmtu_flags_t f = va_arg (*ap, ip_pmtu_flags_t);
103
104 if (0)
105 ;
106#define _(a, b, c) else if (f & IP_PMTU_FLAG_##a) s = format (s, "%s ", c);
107 foreach_ip_pmtu_flag
108#undef _
109
110 return (s);
111}
112
113u32
114ip_pmtu_get_table_id (const ip_pmtu_t *ipt)
115{
116 const fib_prefix_t *pfx;
117 u32 fib_index;
118
119 pfx = fib_entry_get_prefix (ipt->ipt_fib_entry);
120 fib_index = fib_entry_get_fib_index (ipt->ipt_fib_entry);
121
122 return (fib_table_get_table_id (fib_index, pfx->fp_proto));
123}
124
125void
126ip_pmtu_get_ip (const ip_pmtu_t *ipt, ip_address_t *ip)
127{
128 const fib_prefix_t *pfx;
129
130 pfx = fib_entry_get_prefix (ipt->ipt_fib_entry);
131 ip_address_from_46 (&pfx->fp_addr, pfx->fp_proto, ip);
132}
133
134static u8 *
135format_ip_pmtu (u8 *s, va_list *ap)
136{
137 ip_pmtu_t *ipt;
138 index_t ipti = va_arg (*ap, index_t);
139 const fib_prefix_t *pfx;
140 u32 fib_index;
141
142 ipt = pool_elt_at_index (ip_pmtu_pool, ipti);
143 pfx = fib_entry_get_prefix (ipt->ipt_fib_entry);
144 fib_index = fib_entry_get_fib_index (ipt->ipt_fib_entry);
145
146 s =
147 format (s, "[%d] [tbl:[%d:%d]] %U pmtu:[cfg:%d, oper:%d, parent:%d] [%U]",
148 ipti, ip_pmtu_get_table_id (ipt), fib_index, format_fib_prefix,
149 pfx, ipt->ipt_cfg_pmtu, ipt->ipt_oper_pmtu, ipt->ipt_parent_pmtu,
150 format_ip_pmtu_flags, ipt->ipt_flags);
151
152 return (s);
153}
154
155static u8 *
156format_ip_path_mtu_adj_delegate (const adj_delegate_t *aed, u8 *s)
157{
158 ip_path_mtu_adj_delegate_t *ip_adj;
159
160 ip_adj = pool_elt_at_index (ip_path_mtu_adj_delegate_pool, aed->ad_index);
161
162 s = format (s, "IP path-MTU: %d", ip_adj->pmtu);
163
164 return (s);
165}
166
167static void
168ip_pmtu_adj_delegate_adj_created (adj_index_t ai)
169{
170 ip_path_mtu_adj_delegate_t *ipp_ad;
171 const ip_pmtu_t *ipt;
172 ip_adjacency_t *adj;
173 u32 table_id;
174 uword *p;
175
176 adj = adj_get (ai);
177
178 switch (adj->lookup_next_index)
179 {
180 case IP_LOOKUP_NEXT_DROP:
181 case IP_LOOKUP_NEXT_PUNT:
182 case IP_LOOKUP_NEXT_LOCAL:
183 case IP_LOOKUP_NEXT_GLEAN:
184 case IP_LOOKUP_NEXT_MCAST:
185 case IP_LOOKUP_NEXT_BCAST:
186 case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
187 case IP_LOOKUP_NEXT_ICMP_ERROR:
188 case IP_LOOKUP_N_NEXT:
189 return;
190
191 case IP_LOOKUP_NEXT_ARP:
192 case IP_LOOKUP_NEXT_REWRITE:
193 case IP_LOOKUP_NEXT_MIDCHAIN:
194 break;
195 }
196
197 table_id = fib_table_get_table_id_for_sw_if_index (
198 adj->ia_nh_proto, adj->rewrite_header.sw_if_index);
199
200 ip_pmtu_key_t key = {
201 .nh = adj->sub_type.nbr.next_hop,
202 .table_id = table_id,
203 .fproto = adj->ia_nh_proto,
204 };
205
206 p = hash_get_mem (ip_pmtu_db, &key);
207
208 if (NULL == p)
209 return;
210
211 ipt = pool_elt_at_index (ip_pmtu_pool, p[0]);
212
213 pool_get (ip_path_mtu_adj_delegate_pool, ipp_ad);
214 ipp_ad->pmtu = ipt->ipt_cfg_pmtu;
215
216 adj_delegate_add (adj, ip_pmtu_adj_delegate_type,
217 ipp_ad - ip_path_mtu_adj_delegate_pool);
218
219 adj_nbr_set_mtu (ai, ipp_ad->pmtu);
220
221 IP_PMTU_TRKR_DBG (ipt, "adj-added:", ai);
222}
223
224static void
225ip_pmtu_adj_delegate_adj_deleted (adj_delegate_t *ad)
226{
227 pool_put_index (ip_path_mtu_adj_delegate_pool, ad->ad_index);
228}
229
230static void
231ip_pmtu_adj_delegate_adj_modified (adj_delegate_t *ad)
232{
233 ip_path_mtu_adj_delegate_t *ipp_ad;
234
235 ipp_ad = pool_elt_at_index (ip_path_mtu_adj_delegate_pool, ad->ad_index);
236
237 adj_nbr_set_mtu (ad->ad_adj_index, ipp_ad->pmtu);
238}
239
240const adj_delegate_vft_t ip_path_adj_delegate_vft = {
241 .adv_format = format_ip_path_mtu_adj_delegate,
242 .adv_adj_deleted = ip_pmtu_adj_delegate_adj_deleted,
243 .adv_adj_modified = ip_pmtu_adj_delegate_adj_modified,
244 .adv_adj_created = ip_pmtu_adj_delegate_adj_created,
245};
246
247static bool
248ip_path_mtu_value_invalid (u16 pmtu)
249{
250 return (pmtu == 0 || pmtu == 0xffff);
251}
252
253static adj_walk_rc_t
254ip_ptmu_adj_walk_remove (adj_index_t ai, void *ctx)
255{
256 adj_delegate_t *ad;
257
258 ad = adj_delegate_get (adj_get (ai), ip_pmtu_adj_delegate_type);
259
260 if (ad)
261 {
262 adj_nbr_set_mtu (ai, 0);
263
264 pool_put_index (ip_path_mtu_adj_delegate_pool, ad->ad_index);
265 adj_delegate_remove (ai, ip_pmtu_adj_delegate_type);
266 }
267 return (ADJ_WALK_RC_CONTINUE);
268}
269
270static adj_walk_rc_t
271ip_ptmu_adj_walk_update (adj_index_t ai, void *ctx)
272{
273 ip_path_mtu_adj_delegate_t *ipp_ad;
274 adj_delegate_t *ad;
275 u16 *pmtup;
276
277 pmtup = ctx;
278 ad = adj_delegate_get (adj_get (ai), ip_pmtu_adj_delegate_type);
279
280 if (ad)
281 ipp_ad = pool_elt_at_index (ip_path_mtu_adj_delegate_pool, ad->ad_index);
282 else
283 {
284 pool_get (ip_path_mtu_adj_delegate_pool, ipp_ad);
285
286 adj_delegate_add (adj_get (ai), ip_pmtu_adj_delegate_type,
287 ipp_ad - ip_path_mtu_adj_delegate_pool);
288 }
289
290 ipp_ad->pmtu = *pmtup;
291
292 adj_nbr_set_mtu (ai, ipp_ad->pmtu);
293
294 return (ADJ_WALK_RC_CONTINUE);
295}
296
297static ip_pmtu_dpo_t *
298ip_pmtu_dpo_alloc (void)
299{
Neale Ranns758ec132022-02-24 10:35:02 +0000300 vlib_main_t *vm = vlib_get_main ();
301 u8 need_barrier_sync = 0;
Neale Ranns8f5fef22020-12-21 08:29:34 +0000302 ip_pmtu_dpo_t *ipm;
303
Neale Ranns758ec132022-02-24 10:35:02 +0000304 pool_get_aligned_will_expand (ip_pmtu_dpo_pool, need_barrier_sync,
305 sizeof (ip_pmtu_dpo_t));
306
307 if (need_barrier_sync)
308 vlib_worker_thread_barrier_sync (vm);
309
Neale Ranns8f5fef22020-12-21 08:29:34 +0000310 pool_get_aligned_zero (ip_pmtu_dpo_pool, ipm, sizeof (ip_pmtu_dpo_t));
311
Neale Ranns758ec132022-02-24 10:35:02 +0000312 if (need_barrier_sync)
313 vlib_worker_thread_barrier_release (vm);
314
Neale Ranns8f5fef22020-12-21 08:29:34 +0000315 return (ipm);
316}
317
318static ip_pmtu_dpo_t *
319ip_pmtu_dpo_get_from_dpo (const dpo_id_t *dpo)
320{
321 ASSERT (ip_pmtu_dpo_type == dpo->dpoi_type);
322
323 return (ip_pmtu_dpo_get (dpo->dpoi_index));
324}
325
326static index_t
327ip_pmtu_dpo_get_index (ip_pmtu_dpo_t *ipm)
328{
329 return (ipm - ip_pmtu_dpo_pool);
330}
331
332static void
333ip_pmtu_dpo_lock (dpo_id_t *dpo)
334{
335 ip_pmtu_dpo_t *ipm;
336
337 ipm = ip_pmtu_dpo_get_from_dpo (dpo);
338 ipm->ipm_locks++;
339}
340
341static void
342ip_pmtu_dpo_unlock (dpo_id_t *dpo)
343{
344 ip_pmtu_dpo_t *ipm;
345
346 ipm = ip_pmtu_dpo_get_from_dpo (dpo);
347 ipm->ipm_locks--;
348
349 if (0 == ipm->ipm_locks)
350 {
351 dpo_reset (&ipm->ipm_dpo);
352 pool_put (ip_pmtu_dpo_pool, ipm);
353 }
354}
355
356static u32
357ip_pmtu_dpo_get_urpf (const dpo_id_t *dpo)
358{
359 ip_pmtu_dpo_t *ipm;
360
361 ipm = ip_pmtu_dpo_get_from_dpo (dpo);
362
363 return (dpo_get_urpf (&ipm->ipm_dpo));
364}
365
366void
Neale Ranns758ec132022-02-24 10:35:02 +0000367ip_pmtu_dpo_add_or_lock (u16 pmtu, const dpo_id_t *parent, dpo_id_t *dpo)
Neale Ranns8f5fef22020-12-21 08:29:34 +0000368{
369 ip_pmtu_dpo_t *ipm;
Neale Ranns8f5fef22020-12-21 08:29:34 +0000370
371 ipm = ip_pmtu_dpo_alloc ();
372
Neale Ranns758ec132022-02-24 10:35:02 +0000373 ipm->ipm_proto = parent->dpoi_proto;
Neale Ranns8f5fef22020-12-21 08:29:34 +0000374 ipm->ipm_pmtu = pmtu;
375
Neale Ranns758ec132022-02-24 10:35:02 +0000376 dpo_stack (ip_pmtu_dpo_type, ipm->ipm_proto, &ipm->ipm_dpo, parent);
Neale Ranns8f5fef22020-12-21 08:29:34 +0000377 dpo_set (dpo, ip_pmtu_dpo_type, ipm->ipm_proto, ip_pmtu_dpo_get_index (ipm));
378}
379
380u8 *
381format_ip_pmtu_dpo (u8 *s, va_list *ap)
382{
383 index_t index = va_arg (*ap, index_t);
384 u32 indent = va_arg (*ap, u32);
385 ip_pmtu_dpo_t *ipm = ip_pmtu_dpo_get (index);
386
387 s = format (s, "ip-pmtu-dpo: %U, mtu:%d", format_dpo_proto, ipm->ipm_proto,
388 ipm->ipm_pmtu);
389 s = format (s, "\n%U", format_white_space, indent + 2);
390 s = format (s, "%U", format_dpo_id, &ipm->ipm_dpo, indent + 4);
391
392 return (s);
393}
394
395/**
396 * Interpose a path MTU DPO
397 */
398static void
399ip_pmtu_dpo_interpose (const dpo_id_t *original, const dpo_id_t *parent,
400 dpo_id_t *clone)
401{
402 ip_pmtu_dpo_t *ipm, *ipm_clone;
403
404 ipm_clone = ip_pmtu_dpo_alloc ();
405 ipm = ip_pmtu_dpo_get (original->dpoi_index);
406
407 ipm_clone->ipm_proto = ipm->ipm_proto;
408 ipm_clone->ipm_pmtu = ipm->ipm_pmtu;
409
410 dpo_stack (ip_pmtu_dpo_type, ipm_clone->ipm_proto, &ipm_clone->ipm_dpo,
411 parent);
412 dpo_set (clone, ip_pmtu_dpo_type, ipm_clone->ipm_proto,
413 ip_pmtu_dpo_get_index (ipm_clone));
414}
415
416static u16
417ip_pmtu_dpo_get_mtu (const dpo_id_t *dpo)
418{
419 ip_pmtu_dpo_t *ipd;
420
421 ipd = pool_elt_at_index (ip_pmtu_dpo_pool, dpo->dpoi_index);
422
423 return (ipd->ipm_pmtu);
424}
425
426const static dpo_vft_t ip_pmtu_dpo_vft = {
427 .dv_lock = ip_pmtu_dpo_lock,
428 .dv_unlock = ip_pmtu_dpo_unlock,
429 .dv_format = format_ip_pmtu_dpo,
430 .dv_get_urpf = ip_pmtu_dpo_get_urpf,
431 .dv_mk_interpose = ip_pmtu_dpo_interpose,
432 .dv_get_mtu = ip_pmtu_dpo_get_mtu,
433};
434
435/**
436 * @brief The per-protocol VLIB graph nodes that are assigned to a glean
437 * object.
438 *
439 * this means that these graph nodes are ones from which a glean is the
440 * parent object in the DPO-graph.
441 */
442const static char *const ip_pmtu_dpo_ip4_nodes[] = {
443 "ip4-pmtu-dpo",
444 NULL,
445};
446
447const static char *const ip_pmtu_dpo_ip6_nodes[] = {
448 "ip6-pmtu-dpo",
449 NULL,
450};
451
452const static char *const *const ip_pmtu_dpo_nodes[DPO_PROTO_NUM] = {
453 [DPO_PROTO_IP4] = ip_pmtu_dpo_ip4_nodes,
454 [DPO_PROTO_IP6] = ip_pmtu_dpo_ip6_nodes,
455};
456
457static bool
458ip_mtu_fib_entry_is_attached (fib_node_index_t fib_entry)
459{
460 const fib_prefix_t *pfx;
461 u32 cover, fib_index;
462
463 fib_index = fib_entry_get_fib_index (fib_entry);
464 pfx = fib_entry_get_prefix (fib_entry);
465
466 /*
467 * If the tracked prefix's cover is attached, then all packets that
468 * are forwarded to this neighbour will use the adjacency, this is a
469 * more efficient place to perform the MTU check and fragging
470 */
471 cover = fib_table_get_less_specific (fib_index, pfx);
472
473 return (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags (cover) ||
474 FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags (fib_entry));
475}
476
477static index_t
478ip_pmtu_alloc (u32 fib_index, const fib_prefix_t *pfx,
479 const ip_pmtu_key_t *key, u16 pmtu)
480{
481 dpo_id_t ip_dpo = DPO_INVALID;
482 ip_pmtu_t *ipt;
483 fib_node_index_t cover;
484 const dpo_id_t *lb_dpo;
485 index_t ipti;
486
487 pool_get (ip_pmtu_pool, ipt);
488 fib_node_init (&(ipt->ipt_node), ip_pmtu_fib_type);
489
490 ipti = ipt - ip_pmtu_pool;
491 hash_set_mem_alloc (&ip_pmtu_db, key, ipti);
492
493 ipt->ipt_cfg_pmtu = pmtu;
494 ipt->ipt_fib_entry = fib_entry_track (fib_index, pfx, ip_pmtu_fib_type, ipti,
495 &ipt->ipt_sibling);
496
497 /*
498 * If the tracked prefix's cover is attached, then all packets that
499 * are forwarded to this neighbour will use the adjacency, this is a
500 * more efficient place to perform the MTU check and fragging
501 */
502 cover = fib_table_get_less_specific (fib_index, pfx);
503
504 if (ip_mtu_fib_entry_is_attached (ipt->ipt_fib_entry))
505 {
506 u32 sw_if_index;
507
508 ipt->ipt_flags |= IP_PMTU_FLAG_ATTACHED;
509 ipt->ipt_oper_pmtu = ipt->ipt_cfg_pmtu;
510
511 sw_if_index = fib_entry_get_resolving_interface (cover);
512
513 /* walk all adjs to add/update delegate */
514 adj_nbr_walk_nh (sw_if_index, pfx->fp_proto, &pfx->fp_addr,
515 ip_ptmu_adj_walk_update, &ipt->ipt_oper_pmtu);
516 }
517 else
518 {
519 ipt->ipt_flags |= IP_PMTU_FLAG_REMOTE;
520
521 lb_dpo = fib_entry_contribute_ip_forwarding (ipt->ipt_fib_entry);
522
523 ipt->ipt_oper_pmtu = clib_min (dpo_get_mtu (lb_dpo), ipt->ipt_cfg_pmtu);
524
525 /*
526 * interpose a policy DPO from the nh so that MTU is applied
527 */
Neale Ranns758ec132022-02-24 10:35:02 +0000528 ip_pmtu_dpo_add_or_lock (ipt->ipt_oper_pmtu,
529 drop_dpo_get (fib_proto_to_dpo (pfx->fp_proto)),
530 &ip_dpo);
Neale Ranns8f5fef22020-12-21 08:29:34 +0000531
532 fib_table_entry_special_dpo_add (fib_index, pfx, ip_pmtu_source,
533 FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo);
534 dpo_reset (&ip_dpo);
535 }
536
537 IP_PMTU_TRKR_DBG (ipt, "create");
538
539 return (ipti);
540}
541
542static void
543ip_pmtu_stack (ip_pmtu_t *ipt)
544{
545 bool was_attached, is_attached;
546 const fib_prefix_t *pfx;
547 u32 fib_index;
548
549 pfx = fib_entry_get_prefix (ipt->ipt_fib_entry);
550 fib_index = fib_entry_get_fib_index (ipt->ipt_fib_entry);
551
552 was_attached = !!(ipt->ipt_flags & IP_PMTU_FLAG_ATTACHED);
553 is_attached = ip_mtu_fib_entry_is_attached (ipt->ipt_fib_entry);
554
555 if (was_attached && !is_attached)
556 {
557 /* transition from attached to remote - walk all adjs to remove delegate
558 */
559 adj_nbr_walk_nh (fib_entry_get_resolving_interface (ipt->ipt_fib_entry),
560 pfx->fp_proto, &pfx->fp_addr, ip_ptmu_adj_walk_remove,
561 &ipt->ipt_oper_pmtu);
562 ipt->ipt_flags &= ~IP_PMTU_FLAG_ATTACHED;
563 }
564 if (!was_attached && is_attached)
565 {
566 /* transition from remote to attached - remove the DPO */
567 fib_table_entry_special_remove (fib_index, pfx, ip_pmtu_source);
568 ipt->ipt_flags &= ~IP_PMTU_FLAG_REMOTE;
569 }
570
571 if (is_attached)
572 {
573 /* walk all adjs to add/update delegate */
574 ipt->ipt_oper_pmtu = ipt->ipt_cfg_pmtu;
575 adj_nbr_walk_nh (fib_entry_get_resolving_interface (ipt->ipt_fib_entry),
576 pfx->fp_proto, &pfx->fp_addr, ip_ptmu_adj_walk_update,
577 &ipt->ipt_oper_pmtu);
578 ipt->ipt_flags |= IP_PMTU_FLAG_ATTACHED;
579 }
580 else
581 {
582 const dpo_id_t *lb_dpo;
583 u16 dpo_mtu;
584
585 fib_table_entry_special_remove (fib_index, pfx, ip_pmtu_source);
586
587 ipt->ipt_flags |= IP_PMTU_FLAG_REMOTE;
588 lb_dpo = fib_entry_contribute_ip_forwarding (ipt->ipt_fib_entry);
589 dpo_mtu = dpo_get_mtu (lb_dpo);
590
591 ipt->ipt_oper_pmtu = clib_min (dpo_mtu, ipt->ipt_cfg_pmtu);
592
593 /*
594 * if the configured path-MTU is less that the egress/interface then
595 * interpose a policy DPO from the nh so that MTU is applied
596 */
597 if (ipt->ipt_oper_pmtu < dpo_mtu)
598 {
599 dpo_id_t ip_dpo = DPO_INVALID;
600
Neale Ranns758ec132022-02-24 10:35:02 +0000601 ip_pmtu_dpo_add_or_lock (
602 ipt->ipt_oper_pmtu,
603 drop_dpo_get (fib_proto_to_dpo (pfx->fp_proto)), &ip_dpo);
Neale Ranns8f5fef22020-12-21 08:29:34 +0000604
605 fib_table_entry_special_dpo_update (
606 fib_index, pfx, ip_pmtu_source, FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo);
607 dpo_reset (&ip_dpo);
608 }
609 }
610 IP_PMTU_TRKR_DBG (ipt, "stack");
611}
612
613static void
614ip_pmtu_update (index_t ipti, u16 pmtu)
615{
616 ip_pmtu_t *ipt;
617
618 ipt = pool_elt_at_index (ip_pmtu_pool, ipti);
619 ipt->ipt_flags &= ~IP_PMTU_FLAG_STALE;
620 ipt->ipt_cfg_pmtu = pmtu;
621
622 ip_pmtu_stack (ipt);
623}
624
625static index_t
626ip_pmtu_destroy (index_t ipti, const ip_pmtu_key_t *key)
627{
628 ip_pmtu_t *ipt;
629 const fib_prefix_t *pfx;
630
631 ipt = pool_elt_at_index (ip_pmtu_pool, ipti);
632 pfx = fib_entry_get_prefix (ipt->ipt_fib_entry);
633
634 IP_PMTU_TRKR_DBG (ipt, "destroy");
635
636 if (ipt->ipt_flags & IP_PMTU_FLAG_REMOTE)
637 fib_table_entry_special_remove (
638 fib_entry_get_fib_index (ipt->ipt_fib_entry), pfx, ip_pmtu_source);
639 else
640 /* remove the delegate from all the adjacencies */
641 adj_nbr_walk_nh (fib_entry_get_resolving_interface (ipt->ipt_fib_entry),
642 pfx->fp_proto, &pfx->fp_addr, ip_ptmu_adj_walk_remove,
643 NULL);
644
645 /*
646 * Drop the fib entry we're tracking
647 */
648 fib_entry_untrack (ipt->ipt_fib_entry, ipt->ipt_sibling);
649
650 /*
651 * remove from DB and return to pool
652 */
653 hash_unset_mem_free (&ip_pmtu_db, key);
654 pool_put (ip_pmtu_pool, ipt);
655
656 return (ipti);
657}
658
659int
660ip_path_mtu_update (const ip_address_t *nh, u32 table_id, u16 pmtu)
661{
662 fib_prefix_t pfx;
663 u32 fib_index;
664 uword *p;
665
666 ip_address_to_fib_prefix (nh, &pfx);
667 fib_index = fib_table_find (pfx.fp_proto, table_id);
668
669 if (~0 == fib_index)
670 return (VNET_API_ERROR_NO_SUCH_TABLE);
671
672 ip_pmtu_key_t key = {
673 .fproto = pfx.fp_proto,
674 .table_id = table_id,
675 .nh = pfx.fp_addr,
676 };
677
678 p = hash_get_mem (ip_pmtu_db, &key);
679
680 if (!ip_path_mtu_value_invalid (pmtu))
681 {
682 /* Add or update of path MTU */
683 if (NULL == p)
684 ip_pmtu_alloc (fib_index, &pfx, &key, pmtu);
685 else
686 ip_pmtu_update (p[0], pmtu);
687 }
688 else
689 {
690 if (NULL != p)
691 ip_pmtu_destroy (p[0], &key);
692 }
693
694 return (0);
695}
696
697static walk_rc_t
698ip_path_mtu_walk_mark (index_t ipti, void *ctx)
699{
700 ip_pmtu_t *ipt;
701
702 ipt = ip_path_mtu_get (ipti);
703
704 ipt->ipt_flags |= IP_PMTU_FLAG_STALE;
705
706 return (WALK_CONTINUE);
707}
708
709typedef struct ip_path_mtu_walk_sweep_ctx_t_
710{
711 index_t *indicies;
712} ip_path_mtu_walk_sweep_ctx_t;
713
714static walk_rc_t
715ip_path_mtu_walk_sweep (index_t ipti, void *arg)
716{
717 ip_path_mtu_walk_sweep_ctx_t *ctx = arg;
718 ip_pmtu_t *ipt;
719
720 ipt = ip_path_mtu_get (ipti);
721
722 if (ipt->ipt_flags & IP_PMTU_FLAG_STALE)
723 {
724 vec_add1 (ctx->indicies, ipti);
725 }
726
727 return (WALK_CONTINUE);
728}
729
730int
731ip_path_mtu_replace_begin (void)
732{
733 IP_PMTU_DBG ("replace-begin");
734
735 ip_path_mtu_walk (ip_path_mtu_walk_mark, NULL);
736
737 return (0);
738}
739
740int
741ip_path_mtu_replace_end (void)
742{
743 index_t *ipti;
744
745 IP_PMTU_DBG ("replace-end");
746
747 /*
748 * not safe to walk the pool whilst deleting, so create
749 * temporary storage of stale entries
750 */
751 ip_path_mtu_walk_sweep_ctx_t ctx = {
752 .indicies = NULL,
753 };
754
755 ip_path_mtu_walk (ip_path_mtu_walk_sweep, &ctx);
756
757 vec_foreach (ipti, ctx.indicies)
758 {
759 ip_pmtu_t *ipt;
760 ip_address_t ip;
761
762 ipt = ip_path_mtu_get (*ipti);
763 ip_pmtu_get_ip (ipt, &ip);
764 ip_path_mtu_update (&ip, ip_pmtu_get_table_id (ipt), 0);
765 }
766
767 vec_free (ctx.indicies);
768
769 return (0);
770}
771
772void
773ip_path_mtu_walk (ip_path_mtu_walk_t fn, void *ctx)
774{
775 index_t ipmi;
776
777 pool_foreach_index (ipmi, ip_pmtu_pool)
778 {
779 if (WALK_STOP == fn (ipmi, ctx))
780 break;
781 }
782}
783
784static fib_node_t *
785ip_pmtu_get_node (fib_node_index_t index)
786{
787 ip_pmtu_t *ipt;
788
789 ipt = pool_elt_at_index (ip_pmtu_pool, index);
790
791 return (&(ipt->ipt_node));
792}
793
794static ip_pmtu_t *
795ip_pmtu_get_from_node (fib_node_t *node)
796{
797 return (
798 (ip_pmtu_t *) (((char *) node) - STRUCT_OFFSET_OF (ip_pmtu_t, ipt_node)));
799}
800
801static void
802ip_pmtu_last_lock_gone (fib_node_t *node)
803{
804 /*
805 * the lifetime of the entry is managed by the API.
806 */
807 ASSERT (0);
808}
809
810/*
811 * A back walk has reached this BIER entry
812 */
813static fib_node_back_walk_rc_t
814ip_pmtu_back_walk_notify (fib_node_t *node, fib_node_back_walk_ctx_t *ctx)
815{
816 /*
817 * re-populate the ECMP tables with new choices
818 */
819 ip_pmtu_t *ipr = ip_pmtu_get_from_node (node);
820
821 ip_pmtu_stack (ipr);
822
823 /*
824 * no need to propagate further up the graph, since there's nothing there
825 */
826 return (FIB_NODE_BACK_WALK_CONTINUE);
827}
828
829static const fib_node_vft_t ip_ptmu_fib_node_vft = {
830 .fnv_get = ip_pmtu_get_node,
831 .fnv_last_lock = ip_pmtu_last_lock_gone,
832 .fnv_back_walk = ip_pmtu_back_walk_notify,
833};
834
835static clib_error_t *
836ip_path_module_init (vlib_main_t *vm)
837{
838 ip_pmtu_adj_delegate_type =
839 adj_delegate_register_new_type (&ip_path_adj_delegate_vft);
840 ip_pmtu_source = fib_source_allocate ("path-mtu", FIB_SOURCE_PRIORITY_HI,
841 FIB_SOURCE_BH_SIMPLE);
Neale Ranns20089122021-12-02 17:07:14 +0000842 ip_pmtu_fib_type =
843 fib_node_register_new_type ("ip-pmtu", &ip_ptmu_fib_node_vft);
Neale Ranns8f5fef22020-12-21 08:29:34 +0000844
845 ip_pmtu_db = hash_create_mem (0, sizeof (ip_pmtu_key_t), sizeof (index_t));
846 ip_pmtu_logger = vlib_log_register_class ("ip", "pmtu");
847 ip_pmtu_dpo_type =
848 dpo_register_new_type (&ip_pmtu_dpo_vft, ip_pmtu_dpo_nodes);
849
850 return (NULL);
851}
852
853VLIB_INIT_FUNCTION (ip_path_module_init);
854
855static clib_error_t *
856show_ip_pmtu_command (vlib_main_t *vm, unformat_input_t *input,
857 vlib_cli_command_t *cmd)
858{
859 index_t ipti;
860
861 if (unformat (input, "%d", &ipti))
862 {
863 /*
864 * show one in detail
865 */
866 if (!pool_is_free_index (ip_pmtu_pool, ipti))
867 vlib_cli_output (vm, "%U", format_ip_pmtu, ipti);
868 else
869 vlib_cli_output (vm, "entry %d invalid", ipti);
870 }
871 else
872 {
873 /*
874 * show all
875 */
876 pool_foreach_index (ipti, ip_pmtu_pool)
877 {
878 vlib_cli_output (vm, "%U", format_ip_pmtu, ipti);
879 }
880 }
881
882 return (NULL);
883}
884
885VLIB_CLI_COMMAND (show_fib_entry, static) = {
886 .path = "show ip pmtu",
887 .function = show_ip_pmtu_command,
888 .short_help = "show ip path MTU",
889};
890
891/*
892 * fd.io coding-style-patch-verification: ON
893 *
894 * Local Variables:
895 * eval: (c-set-style "gnu")
896 * End:
897 */