blob: 9e8073d322552d37a8ecc8be4685bf01bd1225b9 [file] [log] [blame]
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001/*
2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include <vnet/adj/adj_nbr.h>
17#include <vnet/adj/adj_internal.h>
18#include <vnet/ethernet/arp_packet.h>
19#include <vnet/fib/fib_walk.h>
20
21/*
22 * Vector Hash tables of neighbour (traditional) adjacencies
23 * Key: interface(for the vector index), address (and its proto),
24 * link-type/ether-type.
25 */
26static BVT(clib_bihash) **adj_nbr_tables[FIB_PROTOCOL_MAX];
27
28// FIXME SIZE APPROPRIATELY. ASK DAVEB.
29#define ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS (64 * 64)
30#define ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE (32<<20)
31
32
33#define ADJ_NBR_SET_KEY(_key, _lt, _nh) \
34{ \
35 _key.key[0] = (_nh)->as_u64[0]; \
36 _key.key[1] = (_nh)->as_u64[1]; \
37 _key.key[2] = (_lt); \
38}
39
40#define ADJ_NBR_ITF_OK(_proto, _itf) \
41 (((_itf) < vec_len(adj_nbr_tables[_proto])) && \
42 (NULL != adj_nbr_tables[_proto][sw_if_index]))
43
44static void
45adj_nbr_insert (fib_protocol_t nh_proto,
Neale Ranns924d03a2016-10-19 08:25:46 +010046 vnet_link_t link_type,
Neale Ranns0bfe5d82016-08-25 15:29:12 +010047 const ip46_address_t *nh_addr,
48 u32 sw_if_index,
49 adj_index_t adj_index)
50{
51 BVT(clib_bihash_kv) kv;
52
53 if (sw_if_index >= vec_len(adj_nbr_tables[nh_proto]))
54 {
55 vec_validate(adj_nbr_tables[nh_proto], sw_if_index);
56 }
57 if (NULL == adj_nbr_tables[nh_proto][sw_if_index])
58 {
59 adj_nbr_tables[nh_proto][sw_if_index] =
60 clib_mem_alloc_aligned(sizeof(BVT(clib_bihash)),
61 CLIB_CACHE_LINE_BYTES);
62 memset(adj_nbr_tables[nh_proto][sw_if_index],
63 0,
64 sizeof(BVT(clib_bihash)));
65
66 BV(clib_bihash_init) (adj_nbr_tables[nh_proto][sw_if_index],
67 "Adjacency Neighbour table",
68 ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS,
69 ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE);
70 }
71
72 ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
73 kv.value = adj_index;
74
75 BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 1);
76}
77
78void
Neale Ranns177bbdc2016-11-15 09:46:51 +000079adj_nbr_remove (adj_index_t ai,
80 fib_protocol_t nh_proto,
Neale Ranns924d03a2016-10-19 08:25:46 +010081 vnet_link_t link_type,
Neale Ranns0bfe5d82016-08-25 15:29:12 +010082 const ip46_address_t *nh_addr,
83 u32 sw_if_index)
84{
85 BVT(clib_bihash_kv) kv;
86
87 if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
88 return;
89
90 ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
Neale Ranns177bbdc2016-11-15 09:46:51 +000091 kv.value = ai;
Neale Ranns0bfe5d82016-08-25 15:29:12 +010092
93 BV(clib_bihash_add_del) (adj_nbr_tables[nh_proto][sw_if_index], &kv, 0);
94}
95
96static adj_index_t
97adj_nbr_find (fib_protocol_t nh_proto,
Neale Ranns924d03a2016-10-19 08:25:46 +010098 vnet_link_t link_type,
Neale Ranns0bfe5d82016-08-25 15:29:12 +010099 const ip46_address_t *nh_addr,
100 u32 sw_if_index)
101{
102 BVT(clib_bihash_kv) kv;
103
104 ADJ_NBR_SET_KEY(kv, link_type, nh_addr);
105
106 if (!ADJ_NBR_ITF_OK(nh_proto, sw_if_index))
107 return (ADJ_INDEX_INVALID);
108
109 if (BV(clib_bihash_search)(adj_nbr_tables[nh_proto][sw_if_index],
110 &kv, &kv) < 0)
111 {
112 return (ADJ_INDEX_INVALID);
113 }
114 else
115 {
116 return (kv.value);
117 }
118}
119
Neale Rannsb80c5362016-10-08 13:03:40 +0100120static inline u32
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100121adj_get_nd_node (fib_protocol_t proto)
122{
123 switch (proto) {
124 case FIB_PROTOCOL_IP4:
Neale Rannsb80c5362016-10-08 13:03:40 +0100125 return (ip4_arp_node.index);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100126 case FIB_PROTOCOL_IP6:
Neale Rannsb80c5362016-10-08 13:03:40 +0100127 return (ip6_discover_neighbor_node.index);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100128 case FIB_PROTOCOL_MPLS:
129 break;
130 }
131 ASSERT(0);
Neale Rannsb80c5362016-10-08 13:03:40 +0100132 return (ip4_arp_node.index);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100133}
134
135static ip_adjacency_t*
136adj_nbr_alloc (fib_protocol_t nh_proto,
Neale Ranns924d03a2016-10-19 08:25:46 +0100137 vnet_link_t link_type,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100138 const ip46_address_t *nh_addr,
139 u32 sw_if_index)
140{
141 ip_adjacency_t *adj;
142
143 adj = adj_alloc(nh_proto);
144
145 adj_nbr_insert(nh_proto, link_type, nh_addr,
146 sw_if_index,
Neale Ranns6c3ebcc2016-10-02 21:20:15 +0100147 adj_get_index(adj));
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100148
149 /*
150 * since we just added the ADJ we have no rewrite string for it,
151 * so its for ARP
152 */
153 adj->lookup_next_index = IP_LOOKUP_NEXT_ARP;
154 adj->sub_type.nbr.next_hop = *nh_addr;
155 adj->ia_link = link_type;
156 adj->ia_nh_proto = nh_proto;
Neale Rannsb80c5362016-10-08 13:03:40 +0100157 adj->rewrite_header.sw_if_index = sw_if_index;
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100158 memset(&adj->sub_type.midchain.next_dpo, 0,
159 sizeof(adj->sub_type.midchain.next_dpo));
160
161 return (adj);
162}
163
164/*
Neale Ranns32e1c012016-11-22 17:07:28 +0000165 * adj_nbr_add_or_lock
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100166 *
167 * Add an adjacency for the neighbour requested.
168 *
169 * The key for an adj is:
170 * - the Next-hops protocol (i.e. v4 or v6)
171 * - the address of the next-hop
172 * - the interface the next-hop is reachable through
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100173 */
174adj_index_t
175adj_nbr_add_or_lock (fib_protocol_t nh_proto,
Neale Ranns924d03a2016-10-19 08:25:46 +0100176 vnet_link_t link_type,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100177 const ip46_address_t *nh_addr,
178 u32 sw_if_index)
179{
180 adj_index_t adj_index;
181 ip_adjacency_t *adj;
182
183 adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
184
185 if (ADJ_INDEX_INVALID == adj_index)
186 {
Neale Rannsb80c5362016-10-08 13:03:40 +0100187 vnet_main_t *vnm;
188
189 vnm = vnet_get_main();
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100190 adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
Neale Rannsb80c5362016-10-08 13:03:40 +0100191 adj_index = adj_get_index(adj);
192 adj_lock(adj_index);
193
194 vnet_rewrite_init(vnm, sw_if_index,
195 adj_get_nd_node(nh_proto),
196 vnet_tx_node_index_for_sw_interface(vnm, sw_if_index),
197 &adj->rewrite_header);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100198
199 /*
Neale Rannsb80c5362016-10-08 13:03:40 +0100200 * we need a rewrite where the destination IP address is converted
201 * to the appropriate link-layer address. This is interface specific.
202 * So ask the interface to do it.
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100203 */
Neale Rannsb80c5362016-10-08 13:03:40 +0100204 vnet_update_adjacency_for_sw_interface(vnm, sw_if_index, adj_index);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100205 }
206 else
207 {
Neale Rannsb80c5362016-10-08 13:03:40 +0100208 adj_lock(adj_index);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100209 }
210
Neale Rannsb80c5362016-10-08 13:03:40 +0100211 return (adj_index);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100212}
213
214adj_index_t
215adj_nbr_add_or_lock_w_rewrite (fib_protocol_t nh_proto,
Neale Ranns924d03a2016-10-19 08:25:46 +0100216 vnet_link_t link_type,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100217 const ip46_address_t *nh_addr,
218 u32 sw_if_index,
219 u8 *rewrite)
220{
221 adj_index_t adj_index;
222 ip_adjacency_t *adj;
223
224 adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
225
226 if (ADJ_INDEX_INVALID == adj_index)
227 {
228 adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
229 adj->rewrite_header.sw_if_index = sw_if_index;
230 }
231 else
232 {
233 adj = adj_get(adj_index);
234 }
235
Neale Ranns6c3ebcc2016-10-02 21:20:15 +0100236 adj_lock(adj_get_index(adj));
Neale Rannsb80c5362016-10-08 13:03:40 +0100237 adj_nbr_update_rewrite(adj_get_index(adj),
238 ADJ_NBR_REWRITE_FLAG_COMPLETE,
239 rewrite);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100240
Neale Ranns6c3ebcc2016-10-02 21:20:15 +0100241 return (adj_get_index(adj));
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100242}
243
244/**
245 * adj_nbr_update_rewrite
246 *
247 * Update the adjacency's rewrite string. A NULL string implies the
248 * rewirte is reset (i.e. when ARP/ND etnry is gone).
249 * NB: the adj being updated may be handling traffic in the DP.
250 */
251void
252adj_nbr_update_rewrite (adj_index_t adj_index,
Neale Rannsb80c5362016-10-08 13:03:40 +0100253 adj_nbr_rewrite_flag_t flags,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100254 u8 *rewrite)
255{
256 ip_adjacency_t *adj;
257
258 ASSERT(ADJ_INDEX_INVALID != adj_index);
259
260 adj = adj_get(adj_index);
Neale Rannsb80c5362016-10-08 13:03:40 +0100261
262 if (flags & ADJ_NBR_REWRITE_FLAG_COMPLETE)
263 {
264 /*
265 * update the adj's rewrite string and build the arc
266 * from the rewrite node to the interface's TX node
267 */
268 adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_REWRITE,
269 adj_get_rewrite_node(adj->ia_link),
270 vnet_tx_node_index_for_sw_interface(
271 vnet_get_main(),
272 adj->rewrite_header.sw_if_index),
273 rewrite);
274 }
275 else
276 {
277 adj_nbr_update_rewrite_internal(adj, IP_LOOKUP_NEXT_ARP,
278 adj_get_nd_node(adj->ia_nh_proto),
279 vnet_tx_node_index_for_sw_interface(
280 vnet_get_main(),
281 adj->rewrite_header.sw_if_index),
282 rewrite);
283 }
Neale Rannsb80c5362016-10-08 13:03:40 +0100284}
285
286/**
287 * adj_nbr_update_rewrite_internal
288 *
289 * Update the adjacency's rewrite string. A NULL string implies the
290 * rewirte is reset (i.e. when ARP/ND etnry is gone).
291 * NB: the adj being updated may be handling traffic in the DP.
292 */
293void
294adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
295 u32 adj_next_index,
296 u32 this_node,
297 u32 next_node,
298 u8 *rewrite)
299{
Neale Ranns19c68d22016-12-07 15:38:14 +0000300 ip_adjacency_t *walk_adj;
Neale Rannsad95b5d2016-11-10 20:35:14 +0000301 adj_index_t walk_ai;
302 vlib_main_t * vm;
303 u32 old_next;
Neale Ranns19c68d22016-12-07 15:38:14 +0000304 int do_walk;
Neale Rannsad95b5d2016-11-10 20:35:14 +0000305
306 vm = vlib_get_main();
307 old_next = adj->lookup_next_index;
308
309 walk_ai = adj_get_index(adj);
310 if (VNET_LINK_MPLS == adj->ia_link)
311 {
312 /*
313 * The link type MPLS has no children in the control plane graph, it only
314 * has children in the data-palne graph. The backwalk is up the former.
315 * So we need to walk from its IP cousin.
316 */
317 walk_ai = adj_nbr_find(adj->ia_nh_proto,
318 fib_proto_to_link(adj->ia_nh_proto),
319 &adj->sub_type.nbr.next_hop,
320 adj->rewrite_header.sw_if_index);
321 }
Neale Rannsb80c5362016-10-08 13:03:40 +0100322
323 /*
Neale Ranns19c68d22016-12-07 15:38:14 +0000324 * Don't call the walk re-entrantly
325 */
326 if (ADJ_INDEX_INVALID != walk_ai)
327 {
328 walk_adj = adj_get(walk_ai);
329 if (IP_ADJ_SYNC_WALK_ACTIVE & walk_adj->ia_flags)
330 {
331 do_walk = 0;
332 }
333 else
334 {
335 /*
336 * Prevent re-entrant walk of the same adj
337 */
338 walk_adj->ia_flags |= IP_ADJ_SYNC_WALK_ACTIVE;
339 do_walk = 1;
340 }
341 }
342 else
343 {
344 do_walk = 0;
345 }
346
347 /*
348 * lock the adjacencies that are affected by updates this walk will provoke.
349 * Since the aim of the walk is to update children to link to a different
350 * DPO, this adj will no longer be in use and its lock count will drop to 0.
351 * We don't want it to be deleted as part of this endevour.
352 */
353 adj_lock(adj_get_index(adj));
354 adj_lock(walk_ai);
355
356 /*
Neale Rannsb80c5362016-10-08 13:03:40 +0100357 * Updating a rewrite string is not atomic;
358 * - the rewrite string is too long to write in one instruction
359 * - when swapping from incomplete to complete, we also need to update
Neale Rannsad95b5d2016-11-10 20:35:14 +0000360 * the VLIB graph next-index of the adj.
Neale Rannsb80c5362016-10-08 13:03:40 +0100361 * ideally we would only want to suspend forwarding via this adj whilst we
362 * do this, but we do not have that level of granularity - it's suspend all
363 * worker threads or nothing.
364 * The other chioces are:
365 * - to mark the adj down and back walk so child load-balances drop this adj
366 * from the set.
367 * - update the next_node index of this adj to point to error-drop
368 * both of which will mean for MAC change we will drop for this adj
Neale Rannsad95b5d2016-11-10 20:35:14 +0000369 * which is not acceptable. However, when the adj changes type (from
370 * complete to incomplete and vice-versa) the child DPOs, which have the
371 * VLIB graph next node index, will be sending packets to the wrong graph
372 * node. So from the options above, updating the next_node of the adj to
373 * be drop will work, but it relies on each graph node v4/v6/mpls, rewrite/
374 * arp/midchain always be valid w.r.t. a mis-match of adj type and node type
375 * (i.e. a rewrite adj in the arp node). This is not enforcable. Getting it
376 * wrong will lead to hard to find bugs since its a race condition. So we
377 * choose the more reliable method of updating the children to use the drop,
378 * then switching adj's type, then updating the children again. Did I mention
379 * that this doesn't happen often...
380 * So we need to distinguish between the two cases:
381 * 1 - mac change
382 * 2 - adj type change
383 */
Neale Ranns19c68d22016-12-07 15:38:14 +0000384 if (do_walk &&
385 old_next != adj_next_index &&
Neale Rannsad95b5d2016-11-10 20:35:14 +0000386 ADJ_INDEX_INVALID != walk_ai)
387 {
388 /*
389 * the adj is changing type. we need to fix all children so that they
390 * stack momentarily on a drop, while the adj changes. If we don't do
391 * this the children will send packets to a VLIB graph node that does
392 * not correspond to the adj's type - and it goes downhill from there.
393 */
394 fib_node_back_walk_ctx_t bw_ctx = {
395 .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_DOWN,
396 /*
397 * force this walk to be synchrous. if we don't and a node in the graph
398 * (a heavily shared path-list) chooses to back-ground the walk (make it
399 * async) then it will pause and we will do the adj update below, before
400 * all the children are updated. not good.
401 */
402 .fnbw_flags = FIB_NODE_BW_FLAG_FORCE_SYNC,
403 };
404
405 fib_walk_sync(FIB_NODE_TYPE_ADJ, walk_ai, &bw_ctx);
406 }
407
408 /*
409 * If we are just updating the MAC string of the adj (which we also can't
410 * do atomically), then we need to stop packets switching through the adj.
411 * We can't do that on a per-adj basis, so it's all the packets.
412 * If we are updating the type, and we walked back to the children above,
413 * then this barrier serves to flush the queues/frames.
Neale Rannsb80c5362016-10-08 13:03:40 +0100414 */
415 vlib_worker_thread_barrier_sync(vm);
416
417 adj->lookup_next_index = adj_next_index;
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100418
419 if (NULL != rewrite)
420 {
421 /*
422 * new rewrite provided.
Neale Rannsb80c5362016-10-08 13:03:40 +0100423 * fill in the adj's rewrite string, and build the VLIB graph arc.
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100424 */
Neale Rannsb80c5362016-10-08 13:03:40 +0100425 vnet_rewrite_set_data_internal(&adj->rewrite_header,
426 sizeof(adj->rewrite_data),
427 rewrite,
428 vec_len(rewrite));
Neale Rannsb80c5362016-10-08 13:03:40 +0100429 vec_free(rewrite);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100430 }
431 else
432 {
Neale Rannsb80c5362016-10-08 13:03:40 +0100433 vnet_rewrite_clear_data_internal(&adj->rewrite_header,
434 sizeof(adj->rewrite_data));
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100435 }
Neale Rannsad95b5d2016-11-10 20:35:14 +0000436 adj->rewrite_header.node_index = this_node;
437 adj->rewrite_header.next_index = vlib_node_add_next(vlib_get_main(),
438 this_node,
439 next_node);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100440
441 /*
Neale Rannsb80c5362016-10-08 13:03:40 +0100442 * done with the rewirte update - let the workers loose.
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100443 */
Neale Rannsb80c5362016-10-08 13:03:40 +0100444 vlib_worker_thread_barrier_release(vm);
Neale Rannsad95b5d2016-11-10 20:35:14 +0000445
Neale Ranns19c68d22016-12-07 15:38:14 +0000446 if (do_walk &&
447 (old_next != adj->lookup_next_index) &&
448 (ADJ_INDEX_INVALID != walk_ai))
Neale Rannsad95b5d2016-11-10 20:35:14 +0000449 {
450 /*
451 * backwalk to the children so they can stack on the now updated
452 * adjacency
453 */
454 fib_node_back_walk_ctx_t bw_ctx = {
455 .fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE,
456 };
457
458 fib_walk_sync(FIB_NODE_TYPE_ADJ, walk_ai, &bw_ctx);
459 }
Neale Ranns19c68d22016-12-07 15:38:14 +0000460 /*
461 * Prevent re-entrant walk of the same adj
462 */
463 if (do_walk)
464 {
465 walk_adj->ia_flags &= ~IP_ADJ_SYNC_WALK_ACTIVE;
466 }
467
468 adj_unlock(adj_get_index(adj));
469 adj_unlock(walk_ai);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100470}
471
472typedef struct adj_db_count_ctx_t_ {
473 u64 count;
474} adj_db_count_ctx_t;
475
476static void
477adj_db_count (BVT(clib_bihash_kv) * kvp,
478 void *arg)
479{
480 adj_db_count_ctx_t * ctx = arg;
481 ctx->count++;
482}
483
484u32
485adj_nbr_db_size (void)
486{
487 adj_db_count_ctx_t ctx = {
488 .count = 0,
489 };
490 fib_protocol_t proto;
491 u32 sw_if_index = 0;
492
493 for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
494 {
495 vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
496 {
497 if (NULL != adj_nbr_tables[proto][sw_if_index])
498 {
499 BV(clib_bihash_foreach_key_value_pair) (
500 adj_nbr_tables[proto][sw_if_index],
501 adj_db_count,
502 &ctx);
503 }
504 }
505 }
506 return (ctx.count);
507}
508
509/**
Neale Rannsb80c5362016-10-08 13:03:40 +0100510 * @brief Context for a walk of the adjacency neighbour DB
511 */
512typedef struct adj_walk_ctx_t_
513{
514 adj_walk_cb_t awc_cb;
515 void *awc_ctx;
516} adj_walk_ctx_t;
517
518static void
519adj_nbr_walk_cb (BVT(clib_bihash_kv) * kvp,
520 void *arg)
521{
522 adj_walk_ctx_t *ctx = arg;
523
524 // FIXME: can't stop early...
525 ctx->awc_cb(kvp->value, ctx->awc_ctx);
526}
527
528void
529adj_nbr_walk (u32 sw_if_index,
530 fib_protocol_t adj_nh_proto,
531 adj_walk_cb_t cb,
532 void *ctx)
533{
534 if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
535 return;
536
537 adj_walk_ctx_t awc = {
538 .awc_ctx = ctx,
539 .awc_cb = cb,
540 };
541
542 BV(clib_bihash_foreach_key_value_pair) (
543 adj_nbr_tables[adj_nh_proto][sw_if_index],
544 adj_nbr_walk_cb,
545 &awc);
546}
547
548/**
549 * @brief Context for a walk of the adjacency neighbour DB
550 */
551typedef struct adj_walk_nh_ctx_t_
552{
553 adj_walk_cb_t awc_cb;
554 void *awc_ctx;
555 const ip46_address_t *awc_nh;
556} adj_walk_nh_ctx_t;
557
558static void
559adj_nbr_walk_nh_cb (BVT(clib_bihash_kv) * kvp,
560 void *arg)
561{
562 ip_adjacency_t *adj;
563 adj_walk_nh_ctx_t *ctx = arg;
564
565 adj = adj_get(kvp->value);
566
567 if (!ip46_address_cmp(&adj->sub_type.nbr.next_hop, ctx->awc_nh))
568 ctx->awc_cb(kvp->value, ctx->awc_ctx);
569}
570
571/**
572 * @brief Walk adjacencies on a link with a given v4 next-hop.
573 * that is visit the adjacencies with different link types.
574 */
575void
576adj_nbr_walk_nh4 (u32 sw_if_index,
577 const ip4_address_t *addr,
578 adj_walk_cb_t cb,
579 void *ctx)
580{
581 if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP4, sw_if_index))
582 return;
583
584 ip46_address_t nh = {
585 .ip4 = *addr,
586 };
587
588 adj_walk_nh_ctx_t awc = {
589 .awc_ctx = ctx,
590 .awc_cb = cb,
591 .awc_nh = &nh,
592 };
593
594 BV(clib_bihash_foreach_key_value_pair) (
595 adj_nbr_tables[FIB_PROTOCOL_IP4][sw_if_index],
596 adj_nbr_walk_nh_cb,
597 &awc);
598}
599
600/**
601 * @brief Walk adjacencies on a link with a given v6 next-hop.
602 * that is visit the adjacencies with different link types.
603 */
604void
605adj_nbr_walk_nh6 (u32 sw_if_index,
606 const ip6_address_t *addr,
607 adj_walk_cb_t cb,
608 void *ctx)
609{
610 if (!ADJ_NBR_ITF_OK(FIB_PROTOCOL_IP6, sw_if_index))
611 return;
612
613 ip46_address_t nh = {
614 .ip6 = *addr,
615 };
616
617 adj_walk_nh_ctx_t awc = {
618 .awc_ctx = ctx,
619 .awc_cb = cb,
620 .awc_nh = &nh,
621 };
622
623 BV(clib_bihash_foreach_key_value_pair) (
624 adj_nbr_tables[FIB_PROTOCOL_IP6][sw_if_index],
625 adj_nbr_walk_nh_cb,
626 &awc);
627}
628
629/**
630 * @brief Walk adjacencies on a link with a given next-hop.
631 * that is visit the adjacencies with different link types.
632 */
633void
634adj_nbr_walk_nh (u32 sw_if_index,
635 fib_protocol_t adj_nh_proto,
636 const ip46_address_t *nh,
637 adj_walk_cb_t cb,
638 void *ctx)
639{
640 if (!ADJ_NBR_ITF_OK(adj_nh_proto, sw_if_index))
641 return;
642
643 adj_walk_nh_ctx_t awc = {
644 .awc_ctx = ctx,
645 .awc_cb = cb,
646 .awc_nh = nh,
647 };
648
649 BV(clib_bihash_foreach_key_value_pair) (
650 adj_nbr_tables[adj_nh_proto][sw_if_index],
651 adj_nbr_walk_nh_cb,
652 &awc);
653}
654
655/**
Neale Ranns8b37b872016-11-21 12:25:22 +0000656 * Flags associated with the interface state walks
657 */
658typedef enum adj_nbr_interface_flags_t_
659{
660 ADJ_NBR_INTERFACE_UP = (1 << 0),
661} adj_nbr_interface_flags_t;
662
663/**
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100664 * Context for the state change walk of the DB
665 */
666typedef struct adj_nbr_interface_state_change_ctx_t_
667{
668 /**
Neale Ranns8b37b872016-11-21 12:25:22 +0000669 * Flags on the interface
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100670 */
Neale Ranns8b37b872016-11-21 12:25:22 +0000671 adj_nbr_interface_flags_t flags;
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100672} adj_nbr_interface_state_change_ctx_t;
673
Neale Rannsb80c5362016-10-08 13:03:40 +0100674static adj_walk_rc_t
675adj_nbr_interface_state_change_one (adj_index_t ai,
Neale Ranns8b37b872016-11-21 12:25:22 +0000676 void *arg)
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100677{
678 /*
679 * Back walk the graph to inform the forwarding entries
Neale Ranns8b37b872016-11-21 12:25:22 +0000680 * that this interface state has changed. Do this synchronously
681 * since this is the walk that provides convergence
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100682 */
683 adj_nbr_interface_state_change_ctx_t *ctx = arg;
684
685 fib_node_back_walk_ctx_t bw_ctx = {
Neale Ranns8b37b872016-11-21 12:25:22 +0000686 .fnbw_reason = ((ctx->flags & ADJ_NBR_INTERFACE_UP) ?
687 FIB_NODE_BW_REASON_FLAG_INTERFACE_UP :
688 FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN),
689 /*
690 * the force sync applies only as far as the first fib_entry.
691 * And it's the fib_entry's we need to converge away from
692 * the adjacencies on the now down link
693 */
694 .fnbw_flags = (!(ctx->flags & ADJ_NBR_INTERFACE_UP) ?
695 FIB_NODE_BW_FLAG_FORCE_SYNC :
696 0),
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100697 };
698
Neale Rannsb80c5362016-10-08 13:03:40 +0100699 fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
700
701 return (ADJ_WALK_RC_CONTINUE);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100702}
703
Neale Ranns8b37b872016-11-21 12:25:22 +0000704/**
705 * @brief Registered function for SW interface state changes
706 */
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100707static clib_error_t *
Neale Ranns8b37b872016-11-21 12:25:22 +0000708adj_nbr_sw_interface_state_change (vnet_main_t * vnm,
709 u32 sw_if_index,
710 u32 flags)
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100711{
712 fib_protocol_t proto;
713
714 /*
715 * walk each adj on the interface and trigger a walk from that adj
716 */
717 for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
718 {
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100719 adj_nbr_interface_state_change_ctx_t ctx = {
Neale Ranns8b37b872016-11-21 12:25:22 +0000720 .flags = ((flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
721 ADJ_NBR_INTERFACE_UP :
722 0),
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100723 };
724
Neale Rannsb80c5362016-10-08 13:03:40 +0100725 adj_nbr_walk(sw_if_index, proto,
726 adj_nbr_interface_state_change_one,
727 &ctx);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100728 }
729
730 return (NULL);
731}
732
Neale Ranns8b37b872016-11-21 12:25:22 +0000733VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION_PRIO(
734 adj_nbr_sw_interface_state_change,
735 VNET_ITF_FUNC_PRIORITY_HIGH);
736
737/**
738 * @brief Invoked on each SW interface of a HW interface when the
739 * HW interface state changes
740 */
741static void
742adj_nbr_hw_sw_interface_state_change (vnet_main_t * vnm,
743 u32 sw_if_index,
744 void *arg)
745{
746 adj_nbr_interface_state_change_ctx_t *ctx = arg;
747 fib_protocol_t proto;
748
749 /*
750 * walk each adj on the interface and trigger a walk from that adj
751 */
752 for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
753 {
754 adj_nbr_walk(sw_if_index, proto,
755 adj_nbr_interface_state_change_one,
756 ctx);
757 }
758}
759
760/**
761 * @brief Registered callback for HW interface state changes
762 */
763static clib_error_t *
764adj_nbr_hw_interface_state_change (vnet_main_t * vnm,
765 u32 hw_if_index,
766 u32 flags)
767{
768 /*
769 * walk SW interface on the HW
770 */
771 adj_nbr_interface_state_change_ctx_t ctx = {
772 .flags = ((flags & VNET_HW_INTERFACE_FLAG_LINK_UP) ?
773 ADJ_NBR_INTERFACE_UP :
774 0),
775 };
776
777 vnet_hw_interface_walk_sw(vnm, hw_if_index,
778 adj_nbr_hw_sw_interface_state_change,
779 &ctx);
780
781 return (NULL);
782}
783
784VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION_PRIO(
785 adj_nbr_hw_interface_state_change,
786 VNET_ITF_FUNC_PRIORITY_HIGH);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100787
Neale Rannsb80c5362016-10-08 13:03:40 +0100788static adj_walk_rc_t
789adj_nbr_interface_delete_one (adj_index_t ai,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100790 void *arg)
791{
792 /*
793 * Back walk the graph to inform the forwarding entries
794 * that this interface has been deleted.
795 */
796 fib_node_back_walk_ctx_t bw_ctx = {
797 .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE,
798 };
799
Neale Rannsb80c5362016-10-08 13:03:40 +0100800 fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
801
802 return (ADJ_WALK_RC_CONTINUE);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100803}
804
805/**
806 * adj_nbr_interface_add_del
807 *
808 * Registered to receive interface Add and delete notifications
809 */
810static clib_error_t *
811adj_nbr_interface_add_del (vnet_main_t * vnm,
812 u32 sw_if_index,
813 u32 is_add)
814{
815 fib_protocol_t proto;
816
817 if (is_add)
818 {
819 /*
820 * not interested in interface additions. we will not back walk
821 * to resolve paths through newly added interfaces. Why? The control
822 * plane should have the brains to add interfaces first, then routes.
823 * So the case where there are paths with a interface that matches
824 * one just created is the case where the path resolved through an
825 * interface that was deleted, and still has not been removed. The
826 * new interface added, is NO GUARANTEE that the interface being
827 * added now, even though it may have the same sw_if_index, is the
828 * same interface that the path needs. So tough!
829 * If the control plane wants these routes to resolve it needs to
830 * remove and add them again.
831 */
832 return (NULL);
833 }
834
835 for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
836 {
Neale Rannsb80c5362016-10-08 13:03:40 +0100837 adj_nbr_walk(sw_if_index, proto,
838 adj_nbr_interface_delete_one,
839 NULL);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100840 }
841
842 return (NULL);
843
844}
845
846VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_nbr_interface_add_del);
847
848
Neale Rannsb80c5362016-10-08 13:03:40 +0100849static adj_walk_rc_t
850adj_nbr_show_one (adj_index_t ai,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100851 void *arg)
852{
853 vlib_cli_output (arg, "[@%d] %U",
Neale Rannsb80c5362016-10-08 13:03:40 +0100854 ai,
855 format_ip_adjacency, ai,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100856 FORMAT_IP_ADJACENCY_NONE);
Neale Rannsb80c5362016-10-08 13:03:40 +0100857
858 return (ADJ_WALK_RC_CONTINUE);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100859}
860
861static clib_error_t *
862adj_nbr_show (vlib_main_t * vm,
863 unformat_input_t * input,
864 vlib_cli_command_t * cmd)
865{
866 adj_index_t ai = ADJ_INDEX_INVALID;
Neale Rannsb80c5362016-10-08 13:03:40 +0100867 u32 sw_if_index = ~0;
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100868
869 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
870 {
871 if (unformat (input, "%d", &ai))
872 ;
Neale Rannsb80c5362016-10-08 13:03:40 +0100873 else if (unformat (input, "%U",
874 unformat_vnet_sw_interface, vnet_get_main(),
875 &sw_if_index))
876 ;
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100877 else
878 break;
879 }
880
881 if (ADJ_INDEX_INVALID != ai)
882 {
883 vlib_cli_output (vm, "[@%d] %U",
884 ai,
Neale Rannsb80c5362016-10-08 13:03:40 +0100885 format_ip_adjacency, ai,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100886 FORMAT_IP_ADJACENCY_DETAIL);
887 }
Neale Rannsb80c5362016-10-08 13:03:40 +0100888 else if (~0 != sw_if_index)
889 {
890 fib_protocol_t proto;
891
892 for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
893 {
894 adj_nbr_walk(sw_if_index, proto,
895 adj_nbr_show_one,
896 vm);
897 }
898 }
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100899 else
900 {
901 fib_protocol_t proto;
902
903 for (proto = FIB_PROTOCOL_IP4; proto <= FIB_PROTOCOL_IP6; proto++)
904 {
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100905 vec_foreach_index(sw_if_index, adj_nbr_tables[proto])
906 {
Neale Rannsb80c5362016-10-08 13:03:40 +0100907 adj_nbr_walk(sw_if_index, proto,
908 adj_nbr_show_one,
909 vm);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100910 }
911 }
912 }
913
914 return 0;
915}
916
Neale Rannsb80c5362016-10-08 13:03:40 +0100917/*?
918 * Show all neighbour adjacencies.
919 * @cliexpar
920 * @cliexstart{sh adj nbr}
921 * [@2] ipv4 via 1.0.0.2 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
922 * [@3] mpls via 1.0.0.2 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
923 * [@4] ipv4 via 1.0.0.3 loop0: IP4: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
924 * [@5] mpls via 1.0.0.3 loop0: MPLS_UNICAST: 00:00:22:aa:bb:cc -> 00:00:11:aa:bb:cc
925 * @cliexend
926 ?*/
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100927VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
928 .path = "show adj nbr",
Neale Rannsb80c5362016-10-08 13:03:40 +0100929 .short_help = "show adj nbr [<adj_index>] [interface]",
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100930 .function = adj_nbr_show,
931};
932
Neale Rannsb80c5362016-10-08 13:03:40 +0100933static ip46_type_t
934adj_proto_to_46 (fib_protocol_t proto)
935{
936 switch (proto)
937 {
938 case FIB_PROTOCOL_IP4:
939 return (IP46_TYPE_IP4);
940 case FIB_PROTOCOL_IP6:
941 return (IP46_TYPE_IP6);
942 default:
943 return (IP46_TYPE_IP4);
944 }
945 return (IP46_TYPE_IP4);
946}
947
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100948u8*
949format_adj_nbr_incomplete (u8* s, va_list *ap)
950{
Billy McFallcfcf1e22016-10-14 09:51:49 -0400951 index_t index = va_arg(*ap, index_t);
952 CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100953 vnet_main_t * vnm = vnet_get_main();
954 ip_adjacency_t * adj = adj_get(index);
955
Neale Ranns924d03a2016-10-19 08:25:46 +0100956 s = format (s, "arp-%U", format_vnet_link, adj->ia_link);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100957 s = format (s, ": via %U",
Neale Rannsb80c5362016-10-08 13:03:40 +0100958 format_ip46_address, &adj->sub_type.nbr.next_hop,
959 adj_proto_to_46(adj->ia_nh_proto));
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100960 s = format (s, " %U",
961 format_vnet_sw_interface_name,
962 vnm,
963 vnet_get_sw_interface(vnm,
964 adj->rewrite_header.sw_if_index));
965
966 return (s);
967}
968
969u8*
970format_adj_nbr (u8* s, va_list *ap)
971{
Billy McFallcfcf1e22016-10-14 09:51:49 -0400972 index_t index = va_arg(*ap, index_t);
973 CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100974 vnet_main_t * vnm = vnet_get_main();
975 ip_adjacency_t * adj = adj_get(index);
976
Neale Ranns924d03a2016-10-19 08:25:46 +0100977 s = format (s, "%U", format_vnet_link, adj->ia_link);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100978 s = format (s, " via %U ",
Neale Rannsb80c5362016-10-08 13:03:40 +0100979 format_ip46_address, &adj->sub_type.nbr.next_hop,
980 adj_proto_to_46(adj->ia_nh_proto));
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100981 s = format (s, "%U",
982 format_vnet_rewrite,
983 vnm->vlib_main, &adj->rewrite_header, sizeof (adj->rewrite_data), 0);
984
985 return (s);
986}
987
988static void
989adj_dpo_lock (dpo_id_t *dpo)
990{
991 adj_lock(dpo->dpoi_index);
992}
993static void
994adj_dpo_unlock (dpo_id_t *dpo)
995{
996 adj_unlock(dpo->dpoi_index);
997}
998
Neale Ranns6c3ebcc2016-10-02 21:20:15 +0100999static void
1000adj_mem_show (void)
1001{
1002 fib_show_memory_usage("Adjacency",
1003 pool_elts(adj_pool),
1004 pool_len(adj_pool),
1005 sizeof(ip_adjacency_t));
1006}
1007
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001008const static dpo_vft_t adj_nbr_dpo_vft = {
1009 .dv_lock = adj_dpo_lock,
1010 .dv_unlock = adj_dpo_unlock,
1011 .dv_format = format_adj_nbr,
Neale Ranns6c3ebcc2016-10-02 21:20:15 +01001012 .dv_mem_show = adj_mem_show,
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001013};
1014const static dpo_vft_t adj_nbr_incompl_dpo_vft = {
1015 .dv_lock = adj_dpo_lock,
1016 .dv_unlock = adj_dpo_unlock,
1017 .dv_format = format_adj_nbr_incomplete,
1018};
1019
1020/**
1021 * @brief The per-protocol VLIB graph nodes that are assigned to an adjacency
1022 * object.
1023 *
1024 * this means that these graph nodes are ones from which a nbr is the
1025 * parent object in the DPO-graph.
1026 */
1027const static char* const nbr_ip4_nodes[] =
1028{
Neale Rannsf06aea52016-11-29 06:51:37 -08001029 "ip4-rewrite",
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001030 NULL,
1031};
1032const static char* const nbr_ip6_nodes[] =
1033{
1034 "ip6-rewrite",
1035 NULL,
1036};
1037const static char* const nbr_mpls_nodes[] =
1038{
1039 "mpls-output",
1040 NULL,
1041};
Neale Ranns5e575b12016-10-03 09:40:25 +01001042const static char* const nbr_ethernet_nodes[] =
1043{
1044 "adj-l2-rewrite",
1045 NULL,
1046};
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001047const static char* const * const nbr_nodes[DPO_PROTO_NUM] =
1048{
1049 [DPO_PROTO_IP4] = nbr_ip4_nodes,
1050 [DPO_PROTO_IP6] = nbr_ip6_nodes,
1051 [DPO_PROTO_MPLS] = nbr_mpls_nodes,
Neale Ranns5e575b12016-10-03 09:40:25 +01001052 [DPO_PROTO_ETHERNET] = nbr_ethernet_nodes,
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001053};
1054
1055const static char* const nbr_incomplete_ip4_nodes[] =
1056{
1057 "ip4-arp",
1058 NULL,
1059};
1060const static char* const nbr_incomplete_ip6_nodes[] =
1061{
1062 "ip6-discover-neighbor",
1063 NULL,
1064};
1065const static char* const nbr_incomplete_mpls_nodes[] =
1066{
1067 "mpls-adj-incomplete",
1068 NULL,
1069};
1070
1071const static char* const * const nbr_incomplete_nodes[DPO_PROTO_NUM] =
1072{
1073 [DPO_PROTO_IP4] = nbr_incomplete_ip4_nodes,
1074 [DPO_PROTO_IP6] = nbr_incomplete_ip6_nodes,
1075 [DPO_PROTO_MPLS] = nbr_incomplete_mpls_nodes,
1076};
1077
1078void
1079adj_nbr_module_init (void)
1080{
1081 dpo_register(DPO_ADJACENCY,
1082 &adj_nbr_dpo_vft,
1083 nbr_nodes);
1084 dpo_register(DPO_ADJACENCY_INCOMPLETE,
1085 &adj_nbr_incompl_dpo_vft,
1086 nbr_incomplete_nodes);
1087}