Neale Ranns | 32e1c01 | 2016-11-22 17:07:28 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2016 Cisco and/or its affiliates. |
| 3 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | * you may not use this file except in compliance with the License. |
| 5 | * You may obtain a copy of the License at: |
| 6 | * |
| 7 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | * |
| 9 | * Unless required by applicable law or agreed to in writing, software |
| 10 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | * See the License for the specific language governing permissions and |
| 13 | * limitations under the License. |
| 14 | */ |
| 15 | |
Neale Ranns | 0f26c5a | 2017-03-01 15:12:11 -0800 | [diff] [blame] | 16 | #include <vnet/adj/adj_mcast.h> |
Neale Ranns | 32e1c01 | 2016-11-22 17:07:28 +0000 | [diff] [blame] | 17 | #include <vnet/adj/adj_internal.h> |
| 18 | #include <vnet/fib/fib_walk.h> |
| 19 | #include <vnet/ip/ip.h> |
| 20 | |
| 21 | /* |
| 22 | * The 'DB' of all mcast adjs. |
| 23 | * There is only one mcast per-interface per-protocol, so this is a per-interface |
| 24 | * vector |
| 25 | */ |
| 26 | static adj_index_t *adj_mcasts[FIB_PROTOCOL_MAX]; |
| 27 | |
| 28 | static u32 |
| 29 | adj_get_mcast_node (fib_protocol_t proto) |
| 30 | { |
| 31 | switch (proto) { |
| 32 | case FIB_PROTOCOL_IP4: |
| 33 | return (ip4_rewrite_mcast_node.index); |
| 34 | case FIB_PROTOCOL_IP6: |
| 35 | return (ip6_rewrite_mcast_node.index); |
| 36 | case FIB_PROTOCOL_MPLS: |
| 37 | break; |
| 38 | } |
| 39 | ASSERT(0); |
| 40 | return (0); |
| 41 | } |
| 42 | |
| 43 | /* |
| 44 | * adj_mcast_add_or_lock |
| 45 | * |
| 46 | * The next_hop address here is used for source address selection in the DP. |
| 47 | * The mcast adj is added to an interface's connected prefix, the next-hop |
| 48 | * passed here is the local prefix on the same interface. |
| 49 | */ |
| 50 | adj_index_t |
| 51 | adj_mcast_add_or_lock (fib_protocol_t proto, |
| 52 | vnet_link_t link_type, |
| 53 | u32 sw_if_index) |
| 54 | { |
| 55 | ip_adjacency_t * adj; |
| 56 | |
| 57 | vec_validate_init_empty(adj_mcasts[proto], sw_if_index, ADJ_INDEX_INVALID); |
| 58 | |
| 59 | if (ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index]) |
| 60 | { |
| 61 | vnet_main_t *vnm; |
| 62 | |
| 63 | vnm = vnet_get_main(); |
| 64 | adj = adj_alloc(proto); |
| 65 | |
| 66 | adj->lookup_next_index = IP_LOOKUP_NEXT_MCAST; |
| 67 | adj->ia_nh_proto = proto; |
| 68 | adj->ia_link = link_type; |
| 69 | adj_mcasts[proto][sw_if_index] = adj_get_index(adj); |
| 70 | adj_lock(adj_get_index(adj)); |
| 71 | |
| 72 | vnet_rewrite_init(vnm, sw_if_index, |
| 73 | adj_get_mcast_node(proto), |
| 74 | vnet_tx_node_index_for_sw_interface(vnm, sw_if_index), |
| 75 | &adj->rewrite_header); |
| 76 | |
| 77 | /* |
| 78 | * we need a rewrite where the destination IP address is converted |
| 79 | * to the appropriate link-layer address. This is interface specific. |
| 80 | * So ask the interface to do it. |
| 81 | */ |
| 82 | vnet_update_adjacency_for_sw_interface(vnm, sw_if_index, |
| 83 | adj_get_index(adj)); |
| 84 | } |
| 85 | else |
| 86 | { |
| 87 | adj = adj_get(adj_mcasts[proto][sw_if_index]); |
| 88 | adj_lock(adj_get_index(adj)); |
| 89 | } |
| 90 | |
| 91 | return (adj_get_index(adj)); |
| 92 | } |
| 93 | |
| 94 | /** |
| 95 | * adj_mcast_update_rewrite |
| 96 | * |
| 97 | * Update the adjacency's rewrite string. A NULL string implies the |
| 98 | * rewirte is reset (i.e. when ARP/ND etnry is gone). |
| 99 | * NB: the adj being updated may be handling traffic in the DP. |
| 100 | */ |
| 101 | void |
| 102 | adj_mcast_update_rewrite (adj_index_t adj_index, |
Neale Ranns | 2e7fbcc | 2017-03-15 04:22:25 -0700 | [diff] [blame] | 103 | u8 *rewrite, |
| 104 | u8 offset, |
| 105 | u32 mask) |
Neale Ranns | 32e1c01 | 2016-11-22 17:07:28 +0000 | [diff] [blame] | 106 | { |
| 107 | ip_adjacency_t *adj; |
| 108 | |
| 109 | ASSERT(ADJ_INDEX_INVALID != adj_index); |
| 110 | |
| 111 | adj = adj_get(adj_index); |
| 112 | |
| 113 | /* |
| 114 | * update the adj's rewrite string and build the arc |
| 115 | * from the rewrite node to the interface's TX node |
| 116 | */ |
| 117 | adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_MCAST, |
| 118 | adj_get_mcast_node(adj->ia_nh_proto), |
| 119 | vnet_tx_node_index_for_sw_interface( |
| 120 | vnet_get_main(), |
| 121 | adj->rewrite_header.sw_if_index), |
| 122 | rewrite); |
Neale Ranns | 2e7fbcc | 2017-03-15 04:22:25 -0700 | [diff] [blame] | 123 | /* |
| 124 | * set the fields corresponding to the mcast IP address rewrite |
| 125 | * The mask must be stored in network byte order, since the packet's |
| 126 | * IP address will also be in network order. |
| 127 | */ |
| 128 | adj->rewrite_header.dst_mcast_offset = offset; |
| 129 | adj->rewrite_header.dst_mcast_mask = clib_host_to_net_u32(mask); |
Neale Ranns | 32e1c01 | 2016-11-22 17:07:28 +0000 | [diff] [blame] | 130 | } |
| 131 | |
Neale Ranns | 0f26c5a | 2017-03-01 15:12:11 -0800 | [diff] [blame] | 132 | /** |
| 133 | * adj_mcast_midchain_update_rewrite |
| 134 | * |
| 135 | * Update the adjacency's rewrite string. A NULL string implies the |
| 136 | * rewirte is reset (i.e. when ARP/ND etnry is gone). |
| 137 | * NB: the adj being updated may be handling traffic in the DP. |
| 138 | */ |
| 139 | void |
| 140 | adj_mcast_midchain_update_rewrite (adj_index_t adj_index, |
| 141 | adj_midchain_fixup_t fixup, |
| 142 | adj_flags_t flags, |
| 143 | u8 *rewrite, |
| 144 | u8 offset, |
| 145 | u32 mask) |
| 146 | { |
| 147 | ip_adjacency_t *adj; |
| 148 | |
| 149 | ASSERT(ADJ_INDEX_INVALID != adj_index); |
| 150 | |
| 151 | adj = adj_get(adj_index); |
| 152 | |
| 153 | /* |
| 154 | * one time only update. since we don't support chainging the tunnel |
| 155 | * src,dst, this is all we need. |
| 156 | */ |
| 157 | ASSERT(adj->lookup_next_index == IP_LOOKUP_NEXT_MCAST); |
| 158 | /* |
| 159 | * tunnels can always provide a rewrite. |
| 160 | */ |
| 161 | ASSERT(NULL != rewrite); |
| 162 | |
| 163 | adj_midchain_setup(adj_index, fixup, flags); |
| 164 | |
| 165 | /* |
| 166 | * update the adj's rewrite string and build the arc |
| 167 | * from the rewrite node to the interface's TX node |
| 168 | */ |
| 169 | adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_MCAST_MIDCHAIN, |
| 170 | adj_get_mcast_node(adj->ia_nh_proto), |
| 171 | vnet_tx_node_index_for_sw_interface( |
| 172 | vnet_get_main(), |
| 173 | adj->rewrite_header.sw_if_index), |
| 174 | rewrite); |
| 175 | |
| 176 | /* |
| 177 | * set the fields corresponding to the mcast IP address rewrite |
| 178 | * The mask must be stored in network byte order, since the packet's |
| 179 | * IP address will also be in network order. |
| 180 | */ |
| 181 | adj->rewrite_header.dst_mcast_offset = offset; |
| 182 | adj->rewrite_header.dst_mcast_mask = clib_host_to_net_u32(mask); |
| 183 | } |
| 184 | |
Neale Ranns | 32e1c01 | 2016-11-22 17:07:28 +0000 | [diff] [blame] | 185 | void |
| 186 | adj_mcast_remove (fib_protocol_t proto, |
| 187 | u32 sw_if_index) |
| 188 | { |
| 189 | ASSERT(sw_if_index < vec_len(adj_mcasts[proto])); |
| 190 | |
| 191 | adj_mcasts[proto][sw_if_index] = ADJ_INDEX_INVALID; |
| 192 | } |
| 193 | |
| 194 | static clib_error_t * |
| 195 | adj_mcast_interface_state_change (vnet_main_t * vnm, |
| 196 | u32 sw_if_index, |
| 197 | u32 flags) |
| 198 | { |
| 199 | /* |
| 200 | * for each mcast on the interface trigger a walk back to the children |
| 201 | */ |
| 202 | fib_protocol_t proto; |
| 203 | ip_adjacency_t *adj; |
| 204 | |
| 205 | |
| 206 | for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) |
| 207 | { |
| 208 | if (sw_if_index >= vec_len(adj_mcasts[proto]) || |
| 209 | ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index]) |
| 210 | continue; |
| 211 | |
| 212 | adj = adj_get(adj_mcasts[proto][sw_if_index]); |
| 213 | |
| 214 | fib_node_back_walk_ctx_t bw_ctx = { |
| 215 | .fnbw_reason = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? |
| 216 | FIB_NODE_BW_REASON_FLAG_INTERFACE_UP : |
| 217 | FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN), |
| 218 | }; |
| 219 | |
| 220 | fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx); |
| 221 | } |
| 222 | |
| 223 | return (NULL); |
| 224 | } |
| 225 | |
| 226 | VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION(adj_mcast_interface_state_change); |
| 227 | |
| 228 | /** |
| 229 | * @brief Invoked on each SW interface of a HW interface when the |
| 230 | * HW interface state changes |
| 231 | */ |
| 232 | static void |
| 233 | adj_nbr_hw_sw_interface_state_change (vnet_main_t * vnm, |
| 234 | u32 sw_if_index, |
| 235 | void *arg) |
| 236 | { |
| 237 | adj_mcast_interface_state_change(vnm, sw_if_index, (uword) arg); |
| 238 | } |
| 239 | |
| 240 | /** |
| 241 | * @brief Registered callback for HW interface state changes |
| 242 | */ |
| 243 | static clib_error_t * |
| 244 | adj_mcast_hw_interface_state_change (vnet_main_t * vnm, |
| 245 | u32 hw_if_index, |
| 246 | u32 flags) |
| 247 | { |
| 248 | /* |
| 249 | * walk SW interfaces on the HW |
| 250 | */ |
| 251 | uword sw_flags; |
| 252 | |
| 253 | sw_flags = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ? |
| 254 | VNET_SW_INTERFACE_FLAG_ADMIN_UP : |
| 255 | 0); |
| 256 | |
| 257 | vnet_hw_interface_walk_sw(vnm, hw_if_index, |
| 258 | adj_nbr_hw_sw_interface_state_change, |
| 259 | (void*) sw_flags); |
| 260 | |
| 261 | return (NULL); |
| 262 | } |
| 263 | |
| 264 | VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION( |
| 265 | adj_mcast_hw_interface_state_change); |
| 266 | |
| 267 | static clib_error_t * |
| 268 | adj_mcast_interface_delete (vnet_main_t * vnm, |
| 269 | u32 sw_if_index, |
| 270 | u32 is_add) |
| 271 | { |
| 272 | /* |
| 273 | * for each mcast on the interface trigger a walk back to the children |
| 274 | */ |
| 275 | fib_protocol_t proto; |
| 276 | ip_adjacency_t *adj; |
| 277 | |
| 278 | if (is_add) |
| 279 | { |
| 280 | /* |
| 281 | * not interested in interface additions. we will not back walk |
| 282 | * to resolve paths through newly added interfaces. Why? The control |
| 283 | * plane should have the brains to add interfaces first, then routes. |
| 284 | * So the case where there are paths with a interface that matches |
| 285 | * one just created is the case where the path resolved through an |
| 286 | * interface that was deleted, and still has not been removed. The |
| 287 | * new interface added, is NO GUARANTEE that the interface being |
| 288 | * added now, even though it may have the same sw_if_index, is the |
| 289 | * same interface that the path needs. So tough! |
| 290 | * If the control plane wants these routes to resolve it needs to |
| 291 | * remove and add them again. |
| 292 | */ |
| 293 | return (NULL); |
| 294 | } |
| 295 | |
| 296 | for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) |
| 297 | { |
| 298 | if (sw_if_index >= vec_len(adj_mcasts[proto]) || |
| 299 | ADJ_INDEX_INVALID == adj_mcasts[proto][sw_if_index]) |
| 300 | continue; |
| 301 | |
| 302 | adj = adj_get(adj_mcasts[proto][sw_if_index]); |
| 303 | |
| 304 | fib_node_back_walk_ctx_t bw_ctx = { |
| 305 | .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE, |
| 306 | }; |
| 307 | |
| 308 | fib_walk_sync(FIB_NODE_TYPE_ADJ, adj_get_index(adj), &bw_ctx); |
| 309 | } |
| 310 | |
| 311 | return (NULL); |
| 312 | } |
| 313 | |
| 314 | VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_mcast_interface_delete); |
| 315 | |
Neale Ranns | 0f26c5a | 2017-03-01 15:12:11 -0800 | [diff] [blame] | 316 | /** |
| 317 | * @brief Walk the multicast Adjacencies on a given interface |
| 318 | */ |
| 319 | void |
| 320 | adj_mcast_walk (u32 sw_if_index, |
| 321 | fib_protocol_t proto, |
| 322 | adj_walk_cb_t cb, |
| 323 | void *ctx) |
| 324 | { |
| 325 | if (vec_len(adj_mcasts[proto]) > sw_if_index) |
| 326 | { |
| 327 | if (ADJ_INDEX_INVALID != adj_mcasts[proto][sw_if_index]) |
| 328 | { |
| 329 | cb(adj_mcasts[proto][sw_if_index], ctx); |
| 330 | } |
| 331 | } |
| 332 | } |
| 333 | |
Neale Ranns | 32e1c01 | 2016-11-22 17:07:28 +0000 | [diff] [blame] | 334 | u8* |
| 335 | format_adj_mcast (u8* s, va_list *ap) |
| 336 | { |
| 337 | index_t index = va_arg(*ap, index_t); |
| 338 | CLIB_UNUSED(u32 indent) = va_arg(*ap, u32); |
Neale Ranns | 32e1c01 | 2016-11-22 17:07:28 +0000 | [diff] [blame] | 339 | ip_adjacency_t * adj = adj_get(index); |
| 340 | |
| 341 | s = format(s, "%U-mcast: ", |
| 342 | format_fib_protocol, adj->ia_nh_proto); |
Neale Ranns | 0f26c5a | 2017-03-01 15:12:11 -0800 | [diff] [blame] | 343 | if (adj->rewrite_header.flags & VNET_REWRITE_HAS_FEATURES) |
| 344 | s = format(s, "[features] "); |
Neale Ranns | 32e1c01 | 2016-11-22 17:07:28 +0000 | [diff] [blame] | 345 | s = format (s, "%U", |
| 346 | format_vnet_rewrite, |
Neale Ranns | b069a69 | 2017-03-15 12:34:25 -0400 | [diff] [blame] | 347 | &adj->rewrite_header, sizeof (adj->rewrite_data), 0); |
Neale Ranns | 32e1c01 | 2016-11-22 17:07:28 +0000 | [diff] [blame] | 348 | |
| 349 | return (s); |
| 350 | } |
| 351 | |
Neale Ranns | 0f26c5a | 2017-03-01 15:12:11 -0800 | [diff] [blame] | 352 | u8* |
| 353 | format_adj_mcast_midchain (u8* s, va_list *ap) |
| 354 | { |
| 355 | index_t index = va_arg(*ap, index_t); |
| 356 | CLIB_UNUSED(u32 indent) = va_arg(*ap, u32); |
| 357 | vnet_main_t * vnm = vnet_get_main(); |
| 358 | ip_adjacency_t * adj = adj_get(index); |
| 359 | |
| 360 | s = format(s, "%U-mcast-midchain: ", |
| 361 | format_fib_protocol, adj->ia_nh_proto); |
| 362 | s = format (s, "%U", |
| 363 | format_vnet_rewrite, |
| 364 | vnm->vlib_main, &adj->rewrite_header, |
| 365 | sizeof (adj->rewrite_data), 0); |
| 366 | s = format (s, "\n%Ustacked-on:\n%U%U", |
| 367 | format_white_space, indent, |
| 368 | format_white_space, indent+2, |
| 369 | format_dpo_id, &adj->sub_type.midchain.next_dpo, indent+2); |
| 370 | |
| 371 | return (s); |
| 372 | } |
| 373 | |
Neale Ranns | 32e1c01 | 2016-11-22 17:07:28 +0000 | [diff] [blame] | 374 | |
| 375 | static void |
| 376 | adj_dpo_lock (dpo_id_t *dpo) |
| 377 | { |
| 378 | adj_lock(dpo->dpoi_index); |
| 379 | } |
| 380 | static void |
| 381 | adj_dpo_unlock (dpo_id_t *dpo) |
| 382 | { |
| 383 | adj_unlock(dpo->dpoi_index); |
| 384 | } |
| 385 | |
| 386 | const static dpo_vft_t adj_mcast_dpo_vft = { |
| 387 | .dv_lock = adj_dpo_lock, |
| 388 | .dv_unlock = adj_dpo_unlock, |
| 389 | .dv_format = format_adj_mcast, |
| 390 | }; |
Neale Ranns | 0f26c5a | 2017-03-01 15:12:11 -0800 | [diff] [blame] | 391 | const static dpo_vft_t adj_mcast_midchain_dpo_vft = { |
| 392 | .dv_lock = adj_dpo_lock, |
| 393 | .dv_unlock = adj_dpo_unlock, |
| 394 | .dv_format = format_adj_mcast_midchain, |
| 395 | }; |
Neale Ranns | 32e1c01 | 2016-11-22 17:07:28 +0000 | [diff] [blame] | 396 | |
| 397 | /** |
| 398 | * @brief The per-protocol VLIB graph nodes that are assigned to a mcast |
| 399 | * object. |
| 400 | * |
| 401 | * this means that these graph nodes are ones from which a mcast is the |
| 402 | * parent object in the DPO-graph. |
| 403 | */ |
| 404 | const static char* const adj_mcast_ip4_nodes[] = |
| 405 | { |
| 406 | "ip4-rewrite-mcast", |
| 407 | NULL, |
| 408 | }; |
| 409 | const static char* const adj_mcast_ip6_nodes[] = |
| 410 | { |
| 411 | "ip6-rewrite-mcast", |
| 412 | NULL, |
| 413 | }; |
| 414 | |
| 415 | const static char* const * const adj_mcast_nodes[DPO_PROTO_NUM] = |
| 416 | { |
| 417 | [DPO_PROTO_IP4] = adj_mcast_ip4_nodes, |
| 418 | [DPO_PROTO_IP6] = adj_mcast_ip6_nodes, |
| 419 | [DPO_PROTO_MPLS] = NULL, |
| 420 | }; |
| 421 | |
| 422 | /** |
Neale Ranns | 0f26c5a | 2017-03-01 15:12:11 -0800 | [diff] [blame] | 423 | * @brief The per-protocol VLIB graph nodes that are assigned to a mcast |
| 424 | * object. |
| 425 | * |
| 426 | * this means that these graph nodes are ones from which a mcast is the |
| 427 | * parent object in the DPO-graph. |
| 428 | */ |
| 429 | const static char* const adj_mcast_midchain_ip4_nodes[] = |
| 430 | { |
| 431 | "ip4-mcast-midchain", |
| 432 | NULL, |
| 433 | }; |
| 434 | const static char* const adj_mcast_midchain_ip6_nodes[] = |
| 435 | { |
| 436 | "ip6-mcast-midchain", |
| 437 | NULL, |
| 438 | }; |
| 439 | |
| 440 | const static char* const * const adj_mcast_midchain_nodes[DPO_PROTO_NUM] = |
| 441 | { |
| 442 | [DPO_PROTO_IP4] = adj_mcast_midchain_ip4_nodes, |
| 443 | [DPO_PROTO_IP6] = adj_mcast_midchain_ip6_nodes, |
| 444 | [DPO_PROTO_MPLS] = NULL, |
| 445 | }; |
| 446 | |
| 447 | /** |
Neale Ranns | 32e1c01 | 2016-11-22 17:07:28 +0000 | [diff] [blame] | 448 | * @brief Return the size of the adj DB. |
| 449 | * This is only for testing purposes so an efficient implementation is not needed |
| 450 | */ |
| 451 | u32 |
| 452 | adj_mcast_db_size (void) |
| 453 | { |
| 454 | u32 n_adjs, sw_if_index; |
| 455 | fib_protocol_t proto; |
| 456 | |
| 457 | n_adjs = 0; |
| 458 | for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++) |
| 459 | { |
| 460 | for (sw_if_index = 0; |
| 461 | sw_if_index < vec_len(adj_mcasts[proto]); |
| 462 | sw_if_index++) |
| 463 | { |
| 464 | if (ADJ_INDEX_INVALID != adj_mcasts[proto][sw_if_index]) |
| 465 | { |
| 466 | n_adjs++; |
| 467 | } |
| 468 | } |
| 469 | } |
| 470 | |
| 471 | return (n_adjs); |
| 472 | } |
| 473 | |
| 474 | void |
| 475 | adj_mcast_module_init (void) |
| 476 | { |
Neale Ranns | 0f26c5a | 2017-03-01 15:12:11 -0800 | [diff] [blame] | 477 | dpo_register(DPO_ADJACENCY_MCAST, |
| 478 | &adj_mcast_dpo_vft, |
| 479 | adj_mcast_nodes); |
| 480 | dpo_register(DPO_ADJACENCY_MCAST_MIDCHAIN, |
| 481 | &adj_mcast_midchain_dpo_vft, |
| 482 | adj_mcast_midchain_nodes); |
Neale Ranns | 32e1c01 | 2016-11-22 17:07:28 +0000 | [diff] [blame] | 483 | } |