Neale Ranns | 8f5fef2 | 2020-12-21 08:29:34 +0000 | [diff] [blame] | 1 | /* |
| 2 | *------------------------------------------------------------------ |
| 3 | * ip_path_mtu.c |
| 4 | * |
| 5 | * Copyright (c) 2021 Graphiant. |
| 6 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | * you may not use this file except in compliance with the License. |
| 8 | * You may obtain a copy of the License at: |
| 9 | * |
| 10 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | * |
| 12 | * Unless required by applicable law or agreed to in writing, software |
| 13 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | * See the License for the specific language governing permissions and |
| 16 | * limitations under the License. |
| 17 | *------------------------------------------------------------------ |
| 18 | */ |
| 19 | |
| 20 | #include <vnet/ip/ip_path_mtu.h> |
| 21 | #include <vnet/ip/ip_frag.h> |
| 22 | #include <vnet/adj/adj_delegate.h> |
| 23 | #include <vnet/adj/adj_nbr.h> |
| 24 | #include <vnet/fib/fib_table.h> |
| 25 | #include <vnet/fib/fib_entry_track.h> |
| 26 | |
| 27 | #include <vnet/dpo/drop_dpo.h> |
| 28 | |
| 29 | /** |
| 30 | * Path MTU |
| 31 | * |
| 32 | * A path is a peer. A peer is known by an IP address (in a table). |
| 33 | * Insert a DPO in the forwarding chain for the peer to perform the |
| 34 | * fragmentation. |
| 35 | * For attached peers, all traffic will use the peer's adjacency, there |
| 36 | * is already an MTU chekc in the adjacency (for the link's MTU) so as an |
| 37 | * optimisation, instead of using a DPO, we add a delegate to the adjacency |
| 38 | * to set the adjacency's MTU to the path MTU. |
| 39 | */ |
| 40 | |
| 41 | /** |
| 42 | * the logger |
| 43 | */ |
| 44 | static vlib_log_class_t ip_pmtu_logger; |
| 45 | |
| 46 | static adj_delegate_type_t ip_pmtu_adj_delegate_type; |
| 47 | static fib_source_t ip_pmtu_source; |
| 48 | |
| 49 | /** |
| 50 | * DPO pool |
| 51 | */ |
| 52 | ip_pmtu_dpo_t *ip_pmtu_dpo_pool; |
| 53 | |
| 54 | /** |
| 55 | * DPO type registered for these GBP FWD |
| 56 | */ |
| 57 | static dpo_type_t ip_pmtu_dpo_type; |
| 58 | |
| 59 | /** |
| 60 | * Fib node type for the tracker |
| 61 | */ |
| 62 | static fib_node_type_t ip_pmtu_fib_type; |
| 63 | |
| 64 | /** |
| 65 | * Path MTU tracker pool |
| 66 | */ |
| 67 | ip_pmtu_t *ip_pmtu_pool; |
| 68 | |
| 69 | /** |
| 70 | * Delegate added to adjacencies to track path MTU |
| 71 | */ |
| 72 | typedef struct ip_path_mtu_adj_delegate_t_ |
| 73 | { |
| 74 | u16 pmtu; |
| 75 | } ip_path_mtu_adj_delegate_t; |
| 76 | |
| 77 | static ip_path_mtu_adj_delegate_t *ip_path_mtu_adj_delegate_pool; |
| 78 | |
| 79 | /* DB of all FIB PMTU settings */ |
| 80 | typedef struct ip_pmtu_key_t_ |
| 81 | { |
| 82 | ip46_address_t nh; |
| 83 | u32 table_id; |
| 84 | fib_protocol_t fproto; |
| 85 | } __clib_packed ip_pmtu_key_t; |
| 86 | |
| 87 | static uword *ip_pmtu_db; |
| 88 | |
| 89 | #define IP_PMTU_TRKR_DBG(_ipt, _fmt, _args...) \ |
| 90 | { \ |
| 91 | vlib_log_debug (ip_pmtu_logger, "[%U]: " _fmt ": ", format_ip_pmtu, \ |
| 92 | _ipt - ip_pmtu_pool, ##_args); \ |
| 93 | } |
| 94 | #define IP_PMTU_DBG(_fmt, _args...) \ |
| 95 | { \ |
| 96 | vlib_log_debug (ip_pmtu_logger, _fmt ": ", ##_args); \ |
| 97 | } |
| 98 | |
| 99 | static u8 * |
| 100 | format_ip_pmtu_flags (u8 *s, va_list *ap) |
| 101 | { |
| 102 | ip_pmtu_flags_t f = va_arg (*ap, ip_pmtu_flags_t); |
| 103 | |
| 104 | if (0) |
| 105 | ; |
| 106 | #define _(a, b, c) else if (f & IP_PMTU_FLAG_##a) s = format (s, "%s ", c); |
| 107 | foreach_ip_pmtu_flag |
| 108 | #undef _ |
| 109 | |
| 110 | return (s); |
| 111 | } |
| 112 | |
| 113 | u32 |
| 114 | ip_pmtu_get_table_id (const ip_pmtu_t *ipt) |
| 115 | { |
| 116 | const fib_prefix_t *pfx; |
| 117 | u32 fib_index; |
| 118 | |
| 119 | pfx = fib_entry_get_prefix (ipt->ipt_fib_entry); |
| 120 | fib_index = fib_entry_get_fib_index (ipt->ipt_fib_entry); |
| 121 | |
| 122 | return (fib_table_get_table_id (fib_index, pfx->fp_proto)); |
| 123 | } |
| 124 | |
| 125 | void |
| 126 | ip_pmtu_get_ip (const ip_pmtu_t *ipt, ip_address_t *ip) |
| 127 | { |
| 128 | const fib_prefix_t *pfx; |
| 129 | |
| 130 | pfx = fib_entry_get_prefix (ipt->ipt_fib_entry); |
| 131 | ip_address_from_46 (&pfx->fp_addr, pfx->fp_proto, ip); |
| 132 | } |
| 133 | |
| 134 | static u8 * |
| 135 | format_ip_pmtu (u8 *s, va_list *ap) |
| 136 | { |
| 137 | ip_pmtu_t *ipt; |
| 138 | index_t ipti = va_arg (*ap, index_t); |
| 139 | const fib_prefix_t *pfx; |
| 140 | u32 fib_index; |
| 141 | |
| 142 | ipt = pool_elt_at_index (ip_pmtu_pool, ipti); |
| 143 | pfx = fib_entry_get_prefix (ipt->ipt_fib_entry); |
| 144 | fib_index = fib_entry_get_fib_index (ipt->ipt_fib_entry); |
| 145 | |
| 146 | s = |
| 147 | format (s, "[%d] [tbl:[%d:%d]] %U pmtu:[cfg:%d, oper:%d, parent:%d] [%U]", |
| 148 | ipti, ip_pmtu_get_table_id (ipt), fib_index, format_fib_prefix, |
| 149 | pfx, ipt->ipt_cfg_pmtu, ipt->ipt_oper_pmtu, ipt->ipt_parent_pmtu, |
| 150 | format_ip_pmtu_flags, ipt->ipt_flags); |
| 151 | |
| 152 | return (s); |
| 153 | } |
| 154 | |
| 155 | static u8 * |
| 156 | format_ip_path_mtu_adj_delegate (const adj_delegate_t *aed, u8 *s) |
| 157 | { |
| 158 | ip_path_mtu_adj_delegate_t *ip_adj; |
| 159 | |
| 160 | ip_adj = pool_elt_at_index (ip_path_mtu_adj_delegate_pool, aed->ad_index); |
| 161 | |
| 162 | s = format (s, "IP path-MTU: %d", ip_adj->pmtu); |
| 163 | |
| 164 | return (s); |
| 165 | } |
| 166 | |
| 167 | static void |
| 168 | ip_pmtu_adj_delegate_adj_created (adj_index_t ai) |
| 169 | { |
| 170 | ip_path_mtu_adj_delegate_t *ipp_ad; |
| 171 | const ip_pmtu_t *ipt; |
| 172 | ip_adjacency_t *adj; |
| 173 | u32 table_id; |
| 174 | uword *p; |
| 175 | |
| 176 | adj = adj_get (ai); |
| 177 | |
| 178 | switch (adj->lookup_next_index) |
| 179 | { |
| 180 | case IP_LOOKUP_NEXT_DROP: |
| 181 | case IP_LOOKUP_NEXT_PUNT: |
| 182 | case IP_LOOKUP_NEXT_LOCAL: |
| 183 | case IP_LOOKUP_NEXT_GLEAN: |
| 184 | case IP_LOOKUP_NEXT_MCAST: |
| 185 | case IP_LOOKUP_NEXT_BCAST: |
| 186 | case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: |
| 187 | case IP_LOOKUP_NEXT_ICMP_ERROR: |
| 188 | case IP_LOOKUP_N_NEXT: |
| 189 | return; |
| 190 | |
| 191 | case IP_LOOKUP_NEXT_ARP: |
| 192 | case IP_LOOKUP_NEXT_REWRITE: |
| 193 | case IP_LOOKUP_NEXT_MIDCHAIN: |
| 194 | break; |
| 195 | } |
| 196 | |
| 197 | table_id = fib_table_get_table_id_for_sw_if_index ( |
| 198 | adj->ia_nh_proto, adj->rewrite_header.sw_if_index); |
| 199 | |
| 200 | ip_pmtu_key_t key = { |
| 201 | .nh = adj->sub_type.nbr.next_hop, |
| 202 | .table_id = table_id, |
| 203 | .fproto = adj->ia_nh_proto, |
| 204 | }; |
| 205 | |
| 206 | p = hash_get_mem (ip_pmtu_db, &key); |
| 207 | |
| 208 | if (NULL == p) |
| 209 | return; |
| 210 | |
| 211 | ipt = pool_elt_at_index (ip_pmtu_pool, p[0]); |
| 212 | |
| 213 | pool_get (ip_path_mtu_adj_delegate_pool, ipp_ad); |
| 214 | ipp_ad->pmtu = ipt->ipt_cfg_pmtu; |
| 215 | |
| 216 | adj_delegate_add (adj, ip_pmtu_adj_delegate_type, |
| 217 | ipp_ad - ip_path_mtu_adj_delegate_pool); |
| 218 | |
| 219 | adj_nbr_set_mtu (ai, ipp_ad->pmtu); |
| 220 | |
| 221 | IP_PMTU_TRKR_DBG (ipt, "adj-added:", ai); |
| 222 | } |
| 223 | |
| 224 | static void |
| 225 | ip_pmtu_adj_delegate_adj_deleted (adj_delegate_t *ad) |
| 226 | { |
| 227 | pool_put_index (ip_path_mtu_adj_delegate_pool, ad->ad_index); |
| 228 | } |
| 229 | |
| 230 | static void |
| 231 | ip_pmtu_adj_delegate_adj_modified (adj_delegate_t *ad) |
| 232 | { |
| 233 | ip_path_mtu_adj_delegate_t *ipp_ad; |
| 234 | |
| 235 | ipp_ad = pool_elt_at_index (ip_path_mtu_adj_delegate_pool, ad->ad_index); |
| 236 | |
| 237 | adj_nbr_set_mtu (ad->ad_adj_index, ipp_ad->pmtu); |
| 238 | } |
| 239 | |
| 240 | const adj_delegate_vft_t ip_path_adj_delegate_vft = { |
| 241 | .adv_format = format_ip_path_mtu_adj_delegate, |
| 242 | .adv_adj_deleted = ip_pmtu_adj_delegate_adj_deleted, |
| 243 | .adv_adj_modified = ip_pmtu_adj_delegate_adj_modified, |
| 244 | .adv_adj_created = ip_pmtu_adj_delegate_adj_created, |
| 245 | }; |
| 246 | |
| 247 | static bool |
| 248 | ip_path_mtu_value_invalid (u16 pmtu) |
| 249 | { |
| 250 | return (pmtu == 0 || pmtu == 0xffff); |
| 251 | } |
| 252 | |
| 253 | static adj_walk_rc_t |
| 254 | ip_ptmu_adj_walk_remove (adj_index_t ai, void *ctx) |
| 255 | { |
| 256 | adj_delegate_t *ad; |
| 257 | |
| 258 | ad = adj_delegate_get (adj_get (ai), ip_pmtu_adj_delegate_type); |
| 259 | |
| 260 | if (ad) |
| 261 | { |
| 262 | adj_nbr_set_mtu (ai, 0); |
| 263 | |
| 264 | pool_put_index (ip_path_mtu_adj_delegate_pool, ad->ad_index); |
| 265 | adj_delegate_remove (ai, ip_pmtu_adj_delegate_type); |
| 266 | } |
| 267 | return (ADJ_WALK_RC_CONTINUE); |
| 268 | } |
| 269 | |
| 270 | static adj_walk_rc_t |
| 271 | ip_ptmu_adj_walk_update (adj_index_t ai, void *ctx) |
| 272 | { |
| 273 | ip_path_mtu_adj_delegate_t *ipp_ad; |
| 274 | adj_delegate_t *ad; |
| 275 | u16 *pmtup; |
| 276 | |
| 277 | pmtup = ctx; |
| 278 | ad = adj_delegate_get (adj_get (ai), ip_pmtu_adj_delegate_type); |
| 279 | |
| 280 | if (ad) |
| 281 | ipp_ad = pool_elt_at_index (ip_path_mtu_adj_delegate_pool, ad->ad_index); |
| 282 | else |
| 283 | { |
| 284 | pool_get (ip_path_mtu_adj_delegate_pool, ipp_ad); |
| 285 | |
| 286 | adj_delegate_add (adj_get (ai), ip_pmtu_adj_delegate_type, |
| 287 | ipp_ad - ip_path_mtu_adj_delegate_pool); |
| 288 | } |
| 289 | |
| 290 | ipp_ad->pmtu = *pmtup; |
| 291 | |
| 292 | adj_nbr_set_mtu (ai, ipp_ad->pmtu); |
| 293 | |
| 294 | return (ADJ_WALK_RC_CONTINUE); |
| 295 | } |
| 296 | |
| 297 | static ip_pmtu_dpo_t * |
| 298 | ip_pmtu_dpo_alloc (void) |
| 299 | { |
| 300 | ip_pmtu_dpo_t *ipm; |
| 301 | |
| 302 | pool_get_aligned_zero (ip_pmtu_dpo_pool, ipm, sizeof (ip_pmtu_dpo_t)); |
| 303 | |
| 304 | return (ipm); |
| 305 | } |
| 306 | |
| 307 | static ip_pmtu_dpo_t * |
| 308 | ip_pmtu_dpo_get_from_dpo (const dpo_id_t *dpo) |
| 309 | { |
| 310 | ASSERT (ip_pmtu_dpo_type == dpo->dpoi_type); |
| 311 | |
| 312 | return (ip_pmtu_dpo_get (dpo->dpoi_index)); |
| 313 | } |
| 314 | |
| 315 | static index_t |
| 316 | ip_pmtu_dpo_get_index (ip_pmtu_dpo_t *ipm) |
| 317 | { |
| 318 | return (ipm - ip_pmtu_dpo_pool); |
| 319 | } |
| 320 | |
| 321 | static void |
| 322 | ip_pmtu_dpo_lock (dpo_id_t *dpo) |
| 323 | { |
| 324 | ip_pmtu_dpo_t *ipm; |
| 325 | |
| 326 | ipm = ip_pmtu_dpo_get_from_dpo (dpo); |
| 327 | ipm->ipm_locks++; |
| 328 | } |
| 329 | |
| 330 | static void |
| 331 | ip_pmtu_dpo_unlock (dpo_id_t *dpo) |
| 332 | { |
| 333 | ip_pmtu_dpo_t *ipm; |
| 334 | |
| 335 | ipm = ip_pmtu_dpo_get_from_dpo (dpo); |
| 336 | ipm->ipm_locks--; |
| 337 | |
| 338 | if (0 == ipm->ipm_locks) |
| 339 | { |
| 340 | dpo_reset (&ipm->ipm_dpo); |
| 341 | pool_put (ip_pmtu_dpo_pool, ipm); |
| 342 | } |
| 343 | } |
| 344 | |
| 345 | static u32 |
| 346 | ip_pmtu_dpo_get_urpf (const dpo_id_t *dpo) |
| 347 | { |
| 348 | ip_pmtu_dpo_t *ipm; |
| 349 | |
| 350 | ipm = ip_pmtu_dpo_get_from_dpo (dpo); |
| 351 | |
| 352 | return (dpo_get_urpf (&ipm->ipm_dpo)); |
| 353 | } |
| 354 | |
| 355 | void |
| 356 | ip_pmtu_dpo_add_or_lock (fib_protocol_t fproto, u16 pmtu, dpo_id_t *dpo) |
| 357 | { |
| 358 | ip_pmtu_dpo_t *ipm; |
| 359 | dpo_id_t parent = DPO_INVALID; |
| 360 | |
| 361 | ipm = ip_pmtu_dpo_alloc (); |
| 362 | |
| 363 | ipm->ipm_proto = fib_proto_to_dpo (fproto); |
| 364 | ipm->ipm_pmtu = pmtu; |
| 365 | |
| 366 | dpo_copy (&parent, drop_dpo_get (ipm->ipm_proto)); |
| 367 | dpo_stack (ip_pmtu_dpo_type, ipm->ipm_proto, &ipm->ipm_dpo, &parent); |
| 368 | dpo_set (dpo, ip_pmtu_dpo_type, ipm->ipm_proto, ip_pmtu_dpo_get_index (ipm)); |
| 369 | } |
| 370 | |
| 371 | u8 * |
| 372 | format_ip_pmtu_dpo (u8 *s, va_list *ap) |
| 373 | { |
| 374 | index_t index = va_arg (*ap, index_t); |
| 375 | u32 indent = va_arg (*ap, u32); |
| 376 | ip_pmtu_dpo_t *ipm = ip_pmtu_dpo_get (index); |
| 377 | |
| 378 | s = format (s, "ip-pmtu-dpo: %U, mtu:%d", format_dpo_proto, ipm->ipm_proto, |
| 379 | ipm->ipm_pmtu); |
| 380 | s = format (s, "\n%U", format_white_space, indent + 2); |
| 381 | s = format (s, "%U", format_dpo_id, &ipm->ipm_dpo, indent + 4); |
| 382 | |
| 383 | return (s); |
| 384 | } |
| 385 | |
| 386 | /** |
| 387 | * Interpose a path MTU DPO |
| 388 | */ |
| 389 | static void |
| 390 | ip_pmtu_dpo_interpose (const dpo_id_t *original, const dpo_id_t *parent, |
| 391 | dpo_id_t *clone) |
| 392 | { |
| 393 | ip_pmtu_dpo_t *ipm, *ipm_clone; |
| 394 | |
| 395 | ipm_clone = ip_pmtu_dpo_alloc (); |
| 396 | ipm = ip_pmtu_dpo_get (original->dpoi_index); |
| 397 | |
| 398 | ipm_clone->ipm_proto = ipm->ipm_proto; |
| 399 | ipm_clone->ipm_pmtu = ipm->ipm_pmtu; |
| 400 | |
| 401 | dpo_stack (ip_pmtu_dpo_type, ipm_clone->ipm_proto, &ipm_clone->ipm_dpo, |
| 402 | parent); |
| 403 | dpo_set (clone, ip_pmtu_dpo_type, ipm_clone->ipm_proto, |
| 404 | ip_pmtu_dpo_get_index (ipm_clone)); |
| 405 | } |
| 406 | |
| 407 | static u16 |
| 408 | ip_pmtu_dpo_get_mtu (const dpo_id_t *dpo) |
| 409 | { |
| 410 | ip_pmtu_dpo_t *ipd; |
| 411 | |
| 412 | ipd = pool_elt_at_index (ip_pmtu_dpo_pool, dpo->dpoi_index); |
| 413 | |
| 414 | return (ipd->ipm_pmtu); |
| 415 | } |
| 416 | |
| 417 | const static dpo_vft_t ip_pmtu_dpo_vft = { |
| 418 | .dv_lock = ip_pmtu_dpo_lock, |
| 419 | .dv_unlock = ip_pmtu_dpo_unlock, |
| 420 | .dv_format = format_ip_pmtu_dpo, |
| 421 | .dv_get_urpf = ip_pmtu_dpo_get_urpf, |
| 422 | .dv_mk_interpose = ip_pmtu_dpo_interpose, |
| 423 | .dv_get_mtu = ip_pmtu_dpo_get_mtu, |
| 424 | }; |
| 425 | |
| 426 | /** |
| 427 | * @brief The per-protocol VLIB graph nodes that are assigned to a glean |
| 428 | * object. |
| 429 | * |
| 430 | * this means that these graph nodes are ones from which a glean is the |
| 431 | * parent object in the DPO-graph. |
| 432 | */ |
| 433 | const static char *const ip_pmtu_dpo_ip4_nodes[] = { |
| 434 | "ip4-pmtu-dpo", |
| 435 | NULL, |
| 436 | }; |
| 437 | |
| 438 | const static char *const ip_pmtu_dpo_ip6_nodes[] = { |
| 439 | "ip6-pmtu-dpo", |
| 440 | NULL, |
| 441 | }; |
| 442 | |
| 443 | const static char *const *const ip_pmtu_dpo_nodes[DPO_PROTO_NUM] = { |
| 444 | [DPO_PROTO_IP4] = ip_pmtu_dpo_ip4_nodes, |
| 445 | [DPO_PROTO_IP6] = ip_pmtu_dpo_ip6_nodes, |
| 446 | }; |
| 447 | |
| 448 | static bool |
| 449 | ip_mtu_fib_entry_is_attached (fib_node_index_t fib_entry) |
| 450 | { |
| 451 | const fib_prefix_t *pfx; |
| 452 | u32 cover, fib_index; |
| 453 | |
| 454 | fib_index = fib_entry_get_fib_index (fib_entry); |
| 455 | pfx = fib_entry_get_prefix (fib_entry); |
| 456 | |
| 457 | /* |
| 458 | * If the tracked prefix's cover is attached, then all packets that |
| 459 | * are forwarded to this neighbour will use the adjacency, this is a |
| 460 | * more efficient place to perform the MTU check and fragging |
| 461 | */ |
| 462 | cover = fib_table_get_less_specific (fib_index, pfx); |
| 463 | |
| 464 | return (FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags (cover) || |
| 465 | FIB_ENTRY_FLAG_ATTACHED & fib_entry_get_flags (fib_entry)); |
| 466 | } |
| 467 | |
| 468 | static index_t |
| 469 | ip_pmtu_alloc (u32 fib_index, const fib_prefix_t *pfx, |
| 470 | const ip_pmtu_key_t *key, u16 pmtu) |
| 471 | { |
| 472 | dpo_id_t ip_dpo = DPO_INVALID; |
| 473 | ip_pmtu_t *ipt; |
| 474 | fib_node_index_t cover; |
| 475 | const dpo_id_t *lb_dpo; |
| 476 | index_t ipti; |
| 477 | |
| 478 | pool_get (ip_pmtu_pool, ipt); |
| 479 | fib_node_init (&(ipt->ipt_node), ip_pmtu_fib_type); |
| 480 | |
| 481 | ipti = ipt - ip_pmtu_pool; |
| 482 | hash_set_mem_alloc (&ip_pmtu_db, key, ipti); |
| 483 | |
| 484 | ipt->ipt_cfg_pmtu = pmtu; |
| 485 | ipt->ipt_fib_entry = fib_entry_track (fib_index, pfx, ip_pmtu_fib_type, ipti, |
| 486 | &ipt->ipt_sibling); |
| 487 | |
| 488 | /* |
| 489 | * If the tracked prefix's cover is attached, then all packets that |
| 490 | * are forwarded to this neighbour will use the adjacency, this is a |
| 491 | * more efficient place to perform the MTU check and fragging |
| 492 | */ |
| 493 | cover = fib_table_get_less_specific (fib_index, pfx); |
| 494 | |
| 495 | if (ip_mtu_fib_entry_is_attached (ipt->ipt_fib_entry)) |
| 496 | { |
| 497 | u32 sw_if_index; |
| 498 | |
| 499 | ipt->ipt_flags |= IP_PMTU_FLAG_ATTACHED; |
| 500 | ipt->ipt_oper_pmtu = ipt->ipt_cfg_pmtu; |
| 501 | |
| 502 | sw_if_index = fib_entry_get_resolving_interface (cover); |
| 503 | |
| 504 | /* walk all adjs to add/update delegate */ |
| 505 | adj_nbr_walk_nh (sw_if_index, pfx->fp_proto, &pfx->fp_addr, |
| 506 | ip_ptmu_adj_walk_update, &ipt->ipt_oper_pmtu); |
| 507 | } |
| 508 | else |
| 509 | { |
| 510 | ipt->ipt_flags |= IP_PMTU_FLAG_REMOTE; |
| 511 | |
| 512 | lb_dpo = fib_entry_contribute_ip_forwarding (ipt->ipt_fib_entry); |
| 513 | |
| 514 | ipt->ipt_oper_pmtu = clib_min (dpo_get_mtu (lb_dpo), ipt->ipt_cfg_pmtu); |
| 515 | |
| 516 | /* |
| 517 | * interpose a policy DPO from the nh so that MTU is applied |
| 518 | */ |
| 519 | ip_pmtu_dpo_add_or_lock (pfx->fp_proto, ipt->ipt_oper_pmtu, &ip_dpo); |
| 520 | |
| 521 | fib_table_entry_special_dpo_add (fib_index, pfx, ip_pmtu_source, |
| 522 | FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo); |
| 523 | dpo_reset (&ip_dpo); |
| 524 | } |
| 525 | |
| 526 | IP_PMTU_TRKR_DBG (ipt, "create"); |
| 527 | |
| 528 | return (ipti); |
| 529 | } |
| 530 | |
| 531 | static void |
| 532 | ip_pmtu_stack (ip_pmtu_t *ipt) |
| 533 | { |
| 534 | bool was_attached, is_attached; |
| 535 | const fib_prefix_t *pfx; |
| 536 | u32 fib_index; |
| 537 | |
| 538 | pfx = fib_entry_get_prefix (ipt->ipt_fib_entry); |
| 539 | fib_index = fib_entry_get_fib_index (ipt->ipt_fib_entry); |
| 540 | |
| 541 | was_attached = !!(ipt->ipt_flags & IP_PMTU_FLAG_ATTACHED); |
| 542 | is_attached = ip_mtu_fib_entry_is_attached (ipt->ipt_fib_entry); |
| 543 | |
| 544 | if (was_attached && !is_attached) |
| 545 | { |
| 546 | /* transition from attached to remote - walk all adjs to remove delegate |
| 547 | */ |
| 548 | adj_nbr_walk_nh (fib_entry_get_resolving_interface (ipt->ipt_fib_entry), |
| 549 | pfx->fp_proto, &pfx->fp_addr, ip_ptmu_adj_walk_remove, |
| 550 | &ipt->ipt_oper_pmtu); |
| 551 | ipt->ipt_flags &= ~IP_PMTU_FLAG_ATTACHED; |
| 552 | } |
| 553 | if (!was_attached && is_attached) |
| 554 | { |
| 555 | /* transition from remote to attached - remove the DPO */ |
| 556 | fib_table_entry_special_remove (fib_index, pfx, ip_pmtu_source); |
| 557 | ipt->ipt_flags &= ~IP_PMTU_FLAG_REMOTE; |
| 558 | } |
| 559 | |
| 560 | if (is_attached) |
| 561 | { |
| 562 | /* walk all adjs to add/update delegate */ |
| 563 | ipt->ipt_oper_pmtu = ipt->ipt_cfg_pmtu; |
| 564 | adj_nbr_walk_nh (fib_entry_get_resolving_interface (ipt->ipt_fib_entry), |
| 565 | pfx->fp_proto, &pfx->fp_addr, ip_ptmu_adj_walk_update, |
| 566 | &ipt->ipt_oper_pmtu); |
| 567 | ipt->ipt_flags |= IP_PMTU_FLAG_ATTACHED; |
| 568 | } |
| 569 | else |
| 570 | { |
| 571 | const dpo_id_t *lb_dpo; |
| 572 | u16 dpo_mtu; |
| 573 | |
| 574 | fib_table_entry_special_remove (fib_index, pfx, ip_pmtu_source); |
| 575 | |
| 576 | ipt->ipt_flags |= IP_PMTU_FLAG_REMOTE; |
| 577 | lb_dpo = fib_entry_contribute_ip_forwarding (ipt->ipt_fib_entry); |
| 578 | dpo_mtu = dpo_get_mtu (lb_dpo); |
| 579 | |
| 580 | ipt->ipt_oper_pmtu = clib_min (dpo_mtu, ipt->ipt_cfg_pmtu); |
| 581 | |
| 582 | /* |
| 583 | * if the configured path-MTU is less that the egress/interface then |
| 584 | * interpose a policy DPO from the nh so that MTU is applied |
| 585 | */ |
| 586 | if (ipt->ipt_oper_pmtu < dpo_mtu) |
| 587 | { |
| 588 | dpo_id_t ip_dpo = DPO_INVALID; |
| 589 | |
| 590 | ip_pmtu_dpo_add_or_lock (pfx->fp_proto, ipt->ipt_oper_pmtu, &ip_dpo); |
| 591 | |
| 592 | fib_table_entry_special_dpo_update ( |
| 593 | fib_index, pfx, ip_pmtu_source, FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo); |
| 594 | dpo_reset (&ip_dpo); |
| 595 | } |
| 596 | } |
| 597 | IP_PMTU_TRKR_DBG (ipt, "stack"); |
| 598 | } |
| 599 | |
| 600 | static void |
| 601 | ip_pmtu_update (index_t ipti, u16 pmtu) |
| 602 | { |
| 603 | ip_pmtu_t *ipt; |
| 604 | |
| 605 | ipt = pool_elt_at_index (ip_pmtu_pool, ipti); |
| 606 | ipt->ipt_flags &= ~IP_PMTU_FLAG_STALE; |
| 607 | ipt->ipt_cfg_pmtu = pmtu; |
| 608 | |
| 609 | ip_pmtu_stack (ipt); |
| 610 | } |
| 611 | |
| 612 | static index_t |
| 613 | ip_pmtu_destroy (index_t ipti, const ip_pmtu_key_t *key) |
| 614 | { |
| 615 | ip_pmtu_t *ipt; |
| 616 | const fib_prefix_t *pfx; |
| 617 | |
| 618 | ipt = pool_elt_at_index (ip_pmtu_pool, ipti); |
| 619 | pfx = fib_entry_get_prefix (ipt->ipt_fib_entry); |
| 620 | |
| 621 | IP_PMTU_TRKR_DBG (ipt, "destroy"); |
| 622 | |
| 623 | if (ipt->ipt_flags & IP_PMTU_FLAG_REMOTE) |
| 624 | fib_table_entry_special_remove ( |
| 625 | fib_entry_get_fib_index (ipt->ipt_fib_entry), pfx, ip_pmtu_source); |
| 626 | else |
| 627 | /* remove the delegate from all the adjacencies */ |
| 628 | adj_nbr_walk_nh (fib_entry_get_resolving_interface (ipt->ipt_fib_entry), |
| 629 | pfx->fp_proto, &pfx->fp_addr, ip_ptmu_adj_walk_remove, |
| 630 | NULL); |
| 631 | |
| 632 | /* |
| 633 | * Drop the fib entry we're tracking |
| 634 | */ |
| 635 | fib_entry_untrack (ipt->ipt_fib_entry, ipt->ipt_sibling); |
| 636 | |
| 637 | /* |
| 638 | * remove from DB and return to pool |
| 639 | */ |
| 640 | hash_unset_mem_free (&ip_pmtu_db, key); |
| 641 | pool_put (ip_pmtu_pool, ipt); |
| 642 | |
| 643 | return (ipti); |
| 644 | } |
| 645 | |
| 646 | int |
| 647 | ip_path_mtu_update (const ip_address_t *nh, u32 table_id, u16 pmtu) |
| 648 | { |
| 649 | fib_prefix_t pfx; |
| 650 | u32 fib_index; |
| 651 | uword *p; |
| 652 | |
| 653 | ip_address_to_fib_prefix (nh, &pfx); |
| 654 | fib_index = fib_table_find (pfx.fp_proto, table_id); |
| 655 | |
| 656 | if (~0 == fib_index) |
| 657 | return (VNET_API_ERROR_NO_SUCH_TABLE); |
| 658 | |
| 659 | ip_pmtu_key_t key = { |
| 660 | .fproto = pfx.fp_proto, |
| 661 | .table_id = table_id, |
| 662 | .nh = pfx.fp_addr, |
| 663 | }; |
| 664 | |
| 665 | p = hash_get_mem (ip_pmtu_db, &key); |
| 666 | |
| 667 | if (!ip_path_mtu_value_invalid (pmtu)) |
| 668 | { |
| 669 | /* Add or update of path MTU */ |
| 670 | if (NULL == p) |
| 671 | ip_pmtu_alloc (fib_index, &pfx, &key, pmtu); |
| 672 | else |
| 673 | ip_pmtu_update (p[0], pmtu); |
| 674 | } |
| 675 | else |
| 676 | { |
| 677 | if (NULL != p) |
| 678 | ip_pmtu_destroy (p[0], &key); |
| 679 | } |
| 680 | |
| 681 | return (0); |
| 682 | } |
| 683 | |
| 684 | static walk_rc_t |
| 685 | ip_path_mtu_walk_mark (index_t ipti, void *ctx) |
| 686 | { |
| 687 | ip_pmtu_t *ipt; |
| 688 | |
| 689 | ipt = ip_path_mtu_get (ipti); |
| 690 | |
| 691 | ipt->ipt_flags |= IP_PMTU_FLAG_STALE; |
| 692 | |
| 693 | return (WALK_CONTINUE); |
| 694 | } |
| 695 | |
| 696 | typedef struct ip_path_mtu_walk_sweep_ctx_t_ |
| 697 | { |
| 698 | index_t *indicies; |
| 699 | } ip_path_mtu_walk_sweep_ctx_t; |
| 700 | |
| 701 | static walk_rc_t |
| 702 | ip_path_mtu_walk_sweep (index_t ipti, void *arg) |
| 703 | { |
| 704 | ip_path_mtu_walk_sweep_ctx_t *ctx = arg; |
| 705 | ip_pmtu_t *ipt; |
| 706 | |
| 707 | ipt = ip_path_mtu_get (ipti); |
| 708 | |
| 709 | if (ipt->ipt_flags & IP_PMTU_FLAG_STALE) |
| 710 | { |
| 711 | vec_add1 (ctx->indicies, ipti); |
| 712 | } |
| 713 | |
| 714 | return (WALK_CONTINUE); |
| 715 | } |
| 716 | |
| 717 | int |
| 718 | ip_path_mtu_replace_begin (void) |
| 719 | { |
| 720 | IP_PMTU_DBG ("replace-begin"); |
| 721 | |
| 722 | ip_path_mtu_walk (ip_path_mtu_walk_mark, NULL); |
| 723 | |
| 724 | return (0); |
| 725 | } |
| 726 | |
| 727 | int |
| 728 | ip_path_mtu_replace_end (void) |
| 729 | { |
| 730 | index_t *ipti; |
| 731 | |
| 732 | IP_PMTU_DBG ("replace-end"); |
| 733 | |
| 734 | /* |
| 735 | * not safe to walk the pool whilst deleting, so create |
| 736 | * temporary storage of stale entries |
| 737 | */ |
| 738 | ip_path_mtu_walk_sweep_ctx_t ctx = { |
| 739 | .indicies = NULL, |
| 740 | }; |
| 741 | |
| 742 | ip_path_mtu_walk (ip_path_mtu_walk_sweep, &ctx); |
| 743 | |
| 744 | vec_foreach (ipti, ctx.indicies) |
| 745 | { |
| 746 | ip_pmtu_t *ipt; |
| 747 | ip_address_t ip; |
| 748 | |
| 749 | ipt = ip_path_mtu_get (*ipti); |
| 750 | ip_pmtu_get_ip (ipt, &ip); |
| 751 | ip_path_mtu_update (&ip, ip_pmtu_get_table_id (ipt), 0); |
| 752 | } |
| 753 | |
| 754 | vec_free (ctx.indicies); |
| 755 | |
| 756 | return (0); |
| 757 | } |
| 758 | |
| 759 | void |
| 760 | ip_path_mtu_walk (ip_path_mtu_walk_t fn, void *ctx) |
| 761 | { |
| 762 | index_t ipmi; |
| 763 | |
| 764 | pool_foreach_index (ipmi, ip_pmtu_pool) |
| 765 | { |
| 766 | if (WALK_STOP == fn (ipmi, ctx)) |
| 767 | break; |
| 768 | } |
| 769 | } |
| 770 | |
| 771 | static fib_node_t * |
| 772 | ip_pmtu_get_node (fib_node_index_t index) |
| 773 | { |
| 774 | ip_pmtu_t *ipt; |
| 775 | |
| 776 | ipt = pool_elt_at_index (ip_pmtu_pool, index); |
| 777 | |
| 778 | return (&(ipt->ipt_node)); |
| 779 | } |
| 780 | |
| 781 | static ip_pmtu_t * |
| 782 | ip_pmtu_get_from_node (fib_node_t *node) |
| 783 | { |
| 784 | return ( |
| 785 | (ip_pmtu_t *) (((char *) node) - STRUCT_OFFSET_OF (ip_pmtu_t, ipt_node))); |
| 786 | } |
| 787 | |
| 788 | static void |
| 789 | ip_pmtu_last_lock_gone (fib_node_t *node) |
| 790 | { |
| 791 | /* |
| 792 | * the lifetime of the entry is managed by the API. |
| 793 | */ |
| 794 | ASSERT (0); |
| 795 | } |
| 796 | |
| 797 | /* |
| 798 | * A back walk has reached this BIER entry |
| 799 | */ |
| 800 | static fib_node_back_walk_rc_t |
| 801 | ip_pmtu_back_walk_notify (fib_node_t *node, fib_node_back_walk_ctx_t *ctx) |
| 802 | { |
| 803 | /* |
| 804 | * re-populate the ECMP tables with new choices |
| 805 | */ |
| 806 | ip_pmtu_t *ipr = ip_pmtu_get_from_node (node); |
| 807 | |
| 808 | ip_pmtu_stack (ipr); |
| 809 | |
| 810 | /* |
| 811 | * no need to propagate further up the graph, since there's nothing there |
| 812 | */ |
| 813 | return (FIB_NODE_BACK_WALK_CONTINUE); |
| 814 | } |
| 815 | |
| 816 | static const fib_node_vft_t ip_ptmu_fib_node_vft = { |
| 817 | .fnv_get = ip_pmtu_get_node, |
| 818 | .fnv_last_lock = ip_pmtu_last_lock_gone, |
| 819 | .fnv_back_walk = ip_pmtu_back_walk_notify, |
| 820 | }; |
| 821 | |
| 822 | static clib_error_t * |
| 823 | ip_path_module_init (vlib_main_t *vm) |
| 824 | { |
| 825 | ip_pmtu_adj_delegate_type = |
| 826 | adj_delegate_register_new_type (&ip_path_adj_delegate_vft); |
| 827 | ip_pmtu_source = fib_source_allocate ("path-mtu", FIB_SOURCE_PRIORITY_HI, |
| 828 | FIB_SOURCE_BH_SIMPLE); |
Neale Ranns | 2008912 | 2021-12-02 17:07:14 +0000 | [diff] [blame] | 829 | ip_pmtu_fib_type = |
| 830 | fib_node_register_new_type ("ip-pmtu", &ip_ptmu_fib_node_vft); |
Neale Ranns | 8f5fef2 | 2020-12-21 08:29:34 +0000 | [diff] [blame] | 831 | |
| 832 | ip_pmtu_db = hash_create_mem (0, sizeof (ip_pmtu_key_t), sizeof (index_t)); |
| 833 | ip_pmtu_logger = vlib_log_register_class ("ip", "pmtu"); |
| 834 | ip_pmtu_dpo_type = |
| 835 | dpo_register_new_type (&ip_pmtu_dpo_vft, ip_pmtu_dpo_nodes); |
| 836 | |
| 837 | return (NULL); |
| 838 | } |
| 839 | |
| 840 | VLIB_INIT_FUNCTION (ip_path_module_init); |
| 841 | |
| 842 | static clib_error_t * |
| 843 | show_ip_pmtu_command (vlib_main_t *vm, unformat_input_t *input, |
| 844 | vlib_cli_command_t *cmd) |
| 845 | { |
| 846 | index_t ipti; |
| 847 | |
| 848 | if (unformat (input, "%d", &ipti)) |
| 849 | { |
| 850 | /* |
| 851 | * show one in detail |
| 852 | */ |
| 853 | if (!pool_is_free_index (ip_pmtu_pool, ipti)) |
| 854 | vlib_cli_output (vm, "%U", format_ip_pmtu, ipti); |
| 855 | else |
| 856 | vlib_cli_output (vm, "entry %d invalid", ipti); |
| 857 | } |
| 858 | else |
| 859 | { |
| 860 | /* |
| 861 | * show all |
| 862 | */ |
| 863 | pool_foreach_index (ipti, ip_pmtu_pool) |
| 864 | { |
| 865 | vlib_cli_output (vm, "%U", format_ip_pmtu, ipti); |
| 866 | } |
| 867 | } |
| 868 | |
| 869 | return (NULL); |
| 870 | } |
| 871 | |
| 872 | VLIB_CLI_COMMAND (show_fib_entry, static) = { |
| 873 | .path = "show ip pmtu", |
| 874 | .function = show_ip_pmtu_command, |
| 875 | .short_help = "show ip path MTU", |
| 876 | }; |
| 877 | |
| 878 | /* |
| 879 | * fd.io coding-style-patch-verification: ON |
| 880 | * |
| 881 | * Local Variables: |
| 882 | * eval: (c-set-style "gnu") |
| 883 | * End: |
| 884 | */ |