blob: a53122711a826c90c3cc5a6b546e3ff25f2c7354 [file] [log] [blame]
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001/*
2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/**
16 * An adjacency is a representation of an attached L3 peer.
17 *
18 * Adjacency Sub-types:
19 * - neighbour: a representation of an attached L3 peer.
20 * Key:{addr,interface,link/ether-type}
21 * SHARED
22 * - glean: used to drive ARP/ND for packets destined to a local sub-net.
23 * 'glean' mean use the packet's destination address as the target
24 * address in the ARP packet.
25 * UNSHARED. Only one per-interface.
Paul Vinciguerrad1e6b292020-01-30 12:49:20 -050026 * - midchain: a neighbour adj on a virtual/tunnel interface.
Neale Ranns0bfe5d82016-08-25 15:29:12 +010027 *
28 * The API to create and update the adjacency is very sub-type specific. This
29 * is intentional as it encourages the user to carefully consider which adjacency
30 * sub-type they are really using, and hence assign it data in the appropriate
31 * sub-type space in the union of sub-types. This prevents the adj becoming a
32 * disorganised dumping group for 'my features needs a u16 somewhere' data. It
33 * is important to enforce this approach as space in the adjacency is a premium,
34 * as we need it to fit in 1 cache line.
35 *
Paul Vinciguerrad1e6b292020-01-30 12:49:20 -050036 * the API is also based around an index to an adjacency not a raw pointer. This
Neale Ranns0bfe5d82016-08-25 15:29:12 +010037 * is so the user doesn't suffer the same limp inducing firearm injuries that
Paul Vinciguerrad1e6b292020-01-30 12:49:20 -050038 * the author suffered as the adjacencies can realloc.
Neale Ranns0bfe5d82016-08-25 15:29:12 +010039 */
40
41#ifndef __ADJ_H__
42#define __ADJ_H__
43
Neale Ranns0bfe5d82016-08-25 15:29:12 +010044#include <vnet/adj/adj_types.h>
45#include <vnet/adj/adj_nbr.h>
Neale Ranns0bfe5d82016-08-25 15:29:12 +010046#include <vnet/adj/adj_glean.h>
Neale Rannsfa5d1982017-02-20 14:19:51 -080047#include <vnet/adj/rewrite.h>
48
49/** @brief Common (IP4/IP6) next index stored in adjacency. */
50typedef enum
51{
52 /** Adjacency to drop this packet. */
53 IP_LOOKUP_NEXT_DROP,
54 /** Adjacency to punt this packet. */
55 IP_LOOKUP_NEXT_PUNT,
56
57 /** This packet is for one of our own IP addresses. */
58 IP_LOOKUP_NEXT_LOCAL,
59
60 /** This packet matches an "incomplete adjacency" and packets
61 need to be passed to ARP to find rewrite string for
62 this destination. */
63 IP_LOOKUP_NEXT_ARP,
64
65 /** This packet matches an "interface route" and packets
66 need to be passed to ARP to find rewrite string for
67 this destination. */
68 IP_LOOKUP_NEXT_GLEAN,
69
70 /** This packet is to be rewritten and forwarded to the next
71 processing node. This is typically the output interface but
72 might be another node for further output processing. */
73 IP_LOOKUP_NEXT_REWRITE,
74
75 /** This packets follow a mid-chain adjacency */
76 IP_LOOKUP_NEXT_MIDCHAIN,
77
78 /** This packets needs to go to ICMP error */
79 IP_LOOKUP_NEXT_ICMP_ERROR,
80
81 /** Multicast Adjacency. */
82 IP_LOOKUP_NEXT_MCAST,
83
Paul Vinciguerrad1e6b292020-01-30 12:49:20 -050084 /** Broadcast Adjacency. */
Neale Ranns1855b8e2018-07-11 10:31:26 -070085 IP_LOOKUP_NEXT_BCAST,
86
Paul Vinciguerrad1e6b292020-01-30 12:49:20 -050087 /** Multicast Midchain Adjacency. An Adjacency for sending multicast packets
Neale Ranns0f26c5a2017-03-01 15:12:11 -080088 * on a tunnel/virtual interface */
89 IP_LOOKUP_NEXT_MCAST_MIDCHAIN,
90
Neale Rannsfa5d1982017-02-20 14:19:51 -080091 IP_LOOKUP_N_NEXT,
92} __attribute__ ((packed)) ip_lookup_next_t;
93
94typedef enum
95{
96 IP4_LOOKUP_N_NEXT = IP_LOOKUP_N_NEXT,
97} ip4_lookup_next_t;
98
99typedef enum
100{
101 /* Hop-by-hop header handling */
102 IP6_LOOKUP_NEXT_HOP_BY_HOP = IP_LOOKUP_N_NEXT,
103 IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP,
104 IP6_LOOKUP_NEXT_POP_HOP_BY_HOP,
105 IP6_LOOKUP_N_NEXT,
106} ip6_lookup_next_t;
107
108#define IP4_LOOKUP_NEXT_NODES { \
109 [IP_LOOKUP_NEXT_DROP] = "ip4-drop", \
110 [IP_LOOKUP_NEXT_PUNT] = "ip4-punt", \
111 [IP_LOOKUP_NEXT_LOCAL] = "ip4-local", \
112 [IP_LOOKUP_NEXT_ARP] = "ip4-arp", \
113 [IP_LOOKUP_NEXT_GLEAN] = "ip4-glean", \
114 [IP_LOOKUP_NEXT_REWRITE] = "ip4-rewrite", \
115 [IP_LOOKUP_NEXT_MCAST] = "ip4-rewrite-mcast", \
Neale Ranns1855b8e2018-07-11 10:31:26 -0700116 [IP_LOOKUP_NEXT_BCAST] = "ip4-rewrite-bcast", \
Neale Rannsfa5d1982017-02-20 14:19:51 -0800117 [IP_LOOKUP_NEXT_MIDCHAIN] = "ip4-midchain", \
Neale Ranns0f26c5a2017-03-01 15:12:11 -0800118 [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip4-mcast-midchain", \
Neale Rannsfa5d1982017-02-20 14:19:51 -0800119 [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip4-icmp-error", \
120}
121
122#define IP6_LOOKUP_NEXT_NODES { \
123 [IP_LOOKUP_NEXT_DROP] = "ip6-drop", \
124 [IP_LOOKUP_NEXT_PUNT] = "ip6-punt", \
125 [IP_LOOKUP_NEXT_LOCAL] = "ip6-local", \
126 [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor", \
127 [IP_LOOKUP_NEXT_GLEAN] = "ip6-glean", \
128 [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite", \
Neale Ranns1855b8e2018-07-11 10:31:26 -0700129 [IP_LOOKUP_NEXT_BCAST] = "ip6-rewrite-bcast", \
Neale Rannsfa5d1982017-02-20 14:19:51 -0800130 [IP_LOOKUP_NEXT_MCAST] = "ip6-rewrite-mcast", \
131 [IP_LOOKUP_NEXT_MIDCHAIN] = "ip6-midchain", \
Neale Ranns0f26c5a2017-03-01 15:12:11 -0800132 [IP_LOOKUP_NEXT_MCAST_MIDCHAIN] = "ip6-mcast-midchain", \
Neale Rannsfa5d1982017-02-20 14:19:51 -0800133 [IP_LOOKUP_NEXT_ICMP_ERROR] = "ip6-icmp-error", \
134 [IP6_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop", \
135 [IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", \
136 [IP6_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", \
137}
138
139/**
Neale Ranns1855b8e2018-07-11 10:31:26 -0700140 * The special broadcast address (to construct a broadcast adjacency
141 */
142extern const ip46_address_t ADJ_BCAST_ADDR;
143
144/**
Paul Vinciguerrad1e6b292020-01-30 12:49:20 -0500145 * Forward declaration
Neale Rannsfa5d1982017-02-20 14:19:51 -0800146 */
147struct ip_adjacency_t_;
148
149/**
150 * @brief A function type for post-rewrite fixups on midchain adjacency
151 */
152typedef void (*adj_midchain_fixup_t) (vlib_main_t * vm,
Neale Ranns960eeea2019-12-02 23:28:50 +0000153 const struct ip_adjacency_t_ * adj,
Neale Rannsdb14f5a2018-01-29 10:43:33 -0800154 vlib_buffer_t * b0,
155 const void *data);
Neale Rannsfa5d1982017-02-20 14:19:51 -0800156
157/**
158 * @brief Flags on an IP adjacency
159 */
Neale Ranns521a8d72018-12-06 13:46:49 +0000160typedef enum adj_attr_t_
Neale Rannsfa5d1982017-02-20 14:19:51 -0800161{
Neale Rannsfa5d1982017-02-20 14:19:51 -0800162 /**
163 * Currently a sync walk is active. Used to prevent re-entrant walking
164 */
Neale Ranns521a8d72018-12-06 13:46:49 +0000165 ADJ_ATTR_SYNC_WALK_ACTIVE = 0,
Neale Rannsfa5d1982017-02-20 14:19:51 -0800166
167 /**
168 * Packets TX through the midchain do not increment the interface
169 * counters. This should be used when the adj is associated with an L2
Paul Vinciguerrad1e6b292020-01-30 12:49:20 -0500170 * interface and that L2 interface is in a bridge domain. In that case
Neale Rannsfa5d1982017-02-20 14:19:51 -0800171 * the packet will have traversed the interface's TX node, and hence have
172 * been counted, before it traverses ths midchain
173 */
Neale Ranns521a8d72018-12-06 13:46:49 +0000174 ADJ_ATTR_MIDCHAIN_NO_COUNT,
175 /**
176 * When stacking midchains on a fib-entry extract the choice from the
177 * load-balance returned based on an IP hash of the adj's rewrite
178 */
179 ADJ_ATTR_MIDCHAIN_IP_STACK,
180 /**
181 * If the midchain were to stack on its FIB entry a loop would form.
182 */
183 ADJ_ATTR_MIDCHAIN_LOOPED,
Neale Ranns4ec36c52020-03-31 09:21:29 -0400184 /**
185 * the fixup function is standard IP4o4 header
186 */
187 ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR,
Neale Ranns521a8d72018-12-06 13:46:49 +0000188} adj_attr_t;
189
190#define ADJ_ATTR_NAMES { \
191 [ADJ_ATTR_SYNC_WALK_ACTIVE] = "walk-active", \
192 [ADJ_ATTR_MIDCHAIN_NO_COUNT] = "midchain-no-count", \
193 [ADJ_ATTR_MIDCHAIN_IP_STACK] = "midchain-ip-stack", \
194 [ADJ_ATTR_MIDCHAIN_LOOPED] = "midchain-looped", \
Neale Ranns4ec36c52020-03-31 09:21:29 -0400195 [ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR] = "midchain-ip4o4-hdr-fixup", \
Neale Ranns521a8d72018-12-06 13:46:49 +0000196}
197
Neale Ranns4ec36c52020-03-31 09:21:29 -0400198#define FOR_EACH_ADJ_ATTR(_attr) \
199 for (_attr = ADJ_ATTR_SYNC_WALK_ACTIVE; \
200 _attr <= ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR; \
Neale Ranns521a8d72018-12-06 13:46:49 +0000201 _attr++)
202
203/**
204 * @brief Flags on an IP adjacency
205 */
206typedef enum adj_flags_t_
207{
208 ADJ_FLAG_NONE = 0,
209 ADJ_FLAG_SYNC_WALK_ACTIVE = (1 << ADJ_ATTR_SYNC_WALK_ACTIVE),
210 ADJ_FLAG_MIDCHAIN_NO_COUNT = (1 << ADJ_ATTR_MIDCHAIN_NO_COUNT),
211 ADJ_FLAG_MIDCHAIN_IP_STACK = (1 << ADJ_ATTR_MIDCHAIN_IP_STACK),
212 ADJ_FLAG_MIDCHAIN_LOOPED = (1 << ADJ_ATTR_MIDCHAIN_LOOPED),
Neale Ranns4ec36c52020-03-31 09:21:29 -0400213 ADJ_FLAG_MIDCHAIN_FIXUP_IP4O4_HDR = (1 << ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR),
Neale Rannsfa5d1982017-02-20 14:19:51 -0800214} __attribute__ ((packed)) adj_flags_t;
215
216/**
Neale Ranns521a8d72018-12-06 13:46:49 +0000217 * @brief Format adjacency flags
218 */
219extern u8* format_adj_flags(u8 * s, va_list * args);
220
221/**
Neale Rannsfa5d1982017-02-20 14:19:51 -0800222 * @brief IP unicast adjacency.
223 * @note cache aligned.
224 *
Lijian.Zhang33af8c12019-09-16 16:22:36 +0800225 * An adjacency is a representation of a peer on a particular link.
Neale Rannsfa5d1982017-02-20 14:19:51 -0800226 */
227typedef struct ip_adjacency_t_
228{
229 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
230
231 /**
Lijian.Zhang33af8c12019-09-16 16:22:36 +0800232 * Linkage into the FIB node graph. First member since this type
Neale Rannsfa5d1982017-02-20 14:19:51 -0800233 * has 8 byte alignment requirements.
234 */
235 fib_node_t ia_node;
Neale Rannsfa5d1982017-02-20 14:19:51 -0800236 /**
Neale Ranns4ec36c52020-03-31 09:21:29 -0400237 * feature [arc] config index
Neale Rannsfa5d1982017-02-20 14:19:51 -0800238 */
Neale Ranns4ec36c52020-03-31 09:21:29 -0400239 u32 ia_cfg_index;
Neale Rannsfa5d1982017-02-20 14:19:51 -0800240
241 union
242 {
243 /**
244 * IP_LOOKUP_NEXT_ARP/IP_LOOKUP_NEXT_REWRITE
245 *
246 * neighbour adjacency sub-type;
247 */
248 struct
249 {
250 ip46_address_t next_hop;
251 } nbr;
252 /**
253 * IP_LOOKUP_NEXT_MIDCHAIN
254 *
255 * A nbr adj that is also recursive. Think tunnels.
Paul Vinciguerrad1e6b292020-01-30 12:49:20 -0500256 * A nbr adj can transition to be of type MIDCHAIN
Neale Rannsfa5d1982017-02-20 14:19:51 -0800257 * so be sure to leave the two structs with the next_hop
258 * fields aligned.
259 */
260 struct
261 {
262 /**
263 * The recursive next-hop.
264 * This field MUST be at the same memory location as
265 * sub_type.nbr.next_hop
266 */
267 ip46_address_t next_hop;
268 /**
269 * The next DPO to use
270 */
271 dpo_id_t next_dpo;
272 /**
273 * A function to perform the post-rewrite fixup
274 */
275 adj_midchain_fixup_t fixup_func;
Neale Rannsdb14f5a2018-01-29 10:43:33 -0800276 /**
277 * Fixup data passed back to the client in the fixup function
278 */
279 const void *fixup_data;
Neale Ranns521a8d72018-12-06 13:46:49 +0000280 /**
281 * the FIB entry this midchain resolves through. required for recursive
282 * loop detection.
283 */
284 fib_node_index_t fei;
Neale Ranns4ec36c52020-03-31 09:21:29 -0400285
286 /** spare space */
287 u8 __ia_midchain_pad[4];
288
Neale Rannsfa5d1982017-02-20 14:19:51 -0800289 } midchain;
290 /**
291 * IP_LOOKUP_NEXT_GLEAN
292 *
293 * Glean the address to ARP for from the packet's destination.
294 * Technically these aren't adjacencies, i.e. they are not a
295 * representation of a peer. One day we might untangle this coupling
296 * and use a new Glean DPO.
297 */
298 struct
299 {
300 ip46_address_t receive_addr;
301 } glean;
302 } sub_type;
303
304 CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
305
Neale Ranns4ec36c52020-03-31 09:21:29 -0400306 /** Rewrite in second and third cache lines */
Klement Sekera7dbf9a12019-11-21 10:31:03 +0000307 VNET_DECLARE_REWRITE;
Neale Ranns88fc83e2017-04-05 08:11:14 -0700308
309 /**
310 * more control plane members that do not fit on the first cacheline
311 */
Neale Ranns4ec36c52020-03-31 09:21:29 -0400312 CLIB_CACHE_LINE_ALIGN_MARK (cacheline3);
313
Neale Ranns88fc83e2017-04-05 08:11:14 -0700314 /**
315 * A sorted vector of delegates
316 */
Neale Ranns76447a72018-02-20 06:25:02 -0800317 struct adj_delegate_t_ *ia_delegates;
Neale Ranns88fc83e2017-04-05 08:11:14 -0700318
Neale Rannscbe25aa2019-09-30 10:53:31 +0000319 /**
320 * The VLIB node in which this adj is used to forward packets
321 */
322 u32 ia_node_index;
Neale Ranns4ec36c52020-03-31 09:21:29 -0400323
324 /**
325 * Next hop after ip4-lookup.
326 * This is not accessed in the rewrite nodes.
327 * 1-bytes
328 */
329 ip_lookup_next_t lookup_next_index;
330
331 /**
332 * link/ether-type
333 * 1 bytes
334 */
335 vnet_link_t ia_link;
336
337 /**
338 * The protocol of the neighbor/peer. i.e. the protocol with
339 * which to interpret the 'next-hop' attributes of the sub-types.
340 * 1-bytes
341 */
342 fib_protocol_t ia_nh_proto;
343
344 /**
345 * Flags on the adjacency
346 * 1-bytes
347 */
348 adj_flags_t ia_flags;
349
350 /**
351 * Free space on the fourth cacheline (not used in the DP)
352 */
353 u8 __ia_pad[48];
Neale Rannsfa5d1982017-02-20 14:19:51 -0800354} ip_adjacency_t;
355
356STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline0) == 0),
Lijian.Zhang33af8c12019-09-16 16:22:36 +0800357 "IP adjacency cacheline 0 is not offset");
Neale Rannsfa5d1982017-02-20 14:19:51 -0800358STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline1) ==
359 CLIB_CACHE_LINE_BYTES),
Lijian.Zhang33af8c12019-09-16 16:22:36 +0800360 "IP adjacency cacheline 1 is more than one cacheline size offset");
Neale Ranns4ec36c52020-03-31 09:21:29 -0400361#if defined __x86_64__
362STATIC_ASSERT ((STRUCT_OFFSET_OF (ip_adjacency_t, cacheline3) ==
363 3 * CLIB_CACHE_LINE_BYTES),
364 "IP adjacency cacheline 3 is more than one cacheline size offset");
365/* An adj fits into 4 cachelines on your average machine */
366STATIC_ASSERT_SIZEOF (ip_adjacency_t, 4 * 64);
367#endif
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100368
369/**
370 * @brief
371 * Take a reference counting lock on the adjacency
372 */
373extern void adj_lock(adj_index_t adj_index);
374/**
375 * @brief
376 * Release a reference counting lock on the adjacency
377 */
378extern void adj_unlock(adj_index_t adj_index);
379
380/**
381 * @brief
382 * Add a child dependent to an adjacency. The child will
Paul Vinciguerrad1e6b292020-01-30 12:49:20 -0500383 * thus be informed via its registered back-walk function
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100384 * when the adjacency state changes.
385 */
386extern u32 adj_child_add(adj_index_t adj_index,
387 fib_node_type_t type,
388 fib_node_index_t child_index);
389/**
390 * @brief
391 * Remove a child dependent
392 */
393extern void adj_child_remove(adj_index_t adj_index,
394 u32 sibling_index);
395
396/**
Neale Rannsb80c5362016-10-08 13:03:40 +0100397 * @brief Walk the Adjacencies on a given interface
398 */
399extern void adj_walk (u32 sw_if_index,
400 adj_walk_cb_t cb,
401 void *ctx);
402
403/**
404 * @brief Return the link type of the adjacency
405 */
406extern vnet_link_t adj_get_link_type (adj_index_t ai);
407
408/**
409 * @brief Return the sw interface index of the adjacency.
410 */
411extern u32 adj_get_sw_if_index (adj_index_t ai);
412
413/**
Neale Ranns88fc83e2017-04-05 08:11:14 -0700414 * @brief Return true if the adjacency is 'UP', i.e. can be used for forwarding.
415 * 0 is down, !0 is up.
416 */
417extern int adj_is_up (adj_index_t ai);
418
419/**
Neale Rannsb80c5362016-10-08 13:03:40 +0100420 * @brief Return the link type of the adjacency
421 */
422extern const u8* adj_get_rewrite (adj_index_t ai);
423
424/**
Neale Ranns521a8d72018-12-06 13:46:49 +0000425 * @brief descend the FIB graph looking for loops
426 *
427 * @param ai
428 * The adj index to traverse
429 *
430 * @param entry_indicies)
431 * A pointer to a vector of FIB entries already visited.
432 */
433extern int adj_recursive_loop_detect (adj_index_t ai,
434 fib_node_index_t **entry_indicies);
435
436/**
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100437 * @brief
Lijian.Zhang33af8c12019-09-16 16:22:36 +0800438 * The global adjacency pool. Exposed for fast/inline data-plane access
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100439 */
Neale Ranns6c3ebcc2016-10-02 21:20:15 +0100440extern ip_adjacency_t *adj_pool;
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100441
442/**
443 * @brief
444 * Adjacency packet counters
445 */
446extern vlib_combined_counter_main_t adjacency_counters;
447
448/**
Neale Ranns9c6a6132017-02-21 05:33:14 -0800449 * @brief Global Config for enabling per-adjacency counters
450 * This is configurable because it comes with a non-negligible
451 * performance cost. */
452extern int adj_per_adj_counters;
453
454/**
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100455 * @brief
456 * Get a pointer to an adjacency object from its index
457 */
458static inline ip_adjacency_t *
459adj_get (adj_index_t adj_index)
460{
BenoƮt Ganne138c37a2019-07-18 17:34:28 +0200461 return (pool_elt_at_index(adj_pool, adj_index));
462}
463
464static inline int
465adj_is_valid(adj_index_t adj_index)
466{
467 return !(pool_is_free_index(adj_pool, adj_index));
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100468}
469
Neale Ranns9c6a6132017-02-21 05:33:14 -0800470/**
471 * @brief Get the global configuration option for enabling per-adj counters
472 */
473static inline int
474adj_are_counters_enabled (void)
475{
476 return (adj_per_adj_counters);
477}
478
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100479#endif