blob: 17c8b1d84d0357d7e2a1ad9bd174a26c5436b8b1 [file] [log] [blame]
Neale Rannscbe25aa2019-09-30 10:53:31 +00001/*
2 * l2/l2_arp_term.c: IP v4 ARP L2 BD termination
3 *
4 * Copyright (c) 2010 Cisco and/or its affiliates.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include <vlibmemory/api.h>
19
20#include <vnet/l2/l2_arp_term.h>
21#include <vnet/l2/l2_input.h>
22#include <vnet/l2/feat_bitmap.h>
23
24#include <vnet/ip/ip4_packet.h>
Neale Rannse4031132020-10-26 13:00:06 +000025#include <vnet/ip/ip6_packet.h>
26#include <vnet/ip/icmp6.h>
27#include <vnet/ip/ip6.h>
28#include <vnet/ip/format.h>
Neale Rannscbe25aa2019-09-30 10:53:31 +000029#include <vnet/ethernet/arp_packet.h>
30
31static const u8 vrrp_prefix[] = { 0x00, 0x00, 0x5E, 0x00, 0x01 };
32
33l2_arp_term_main_t l2_arp_term_main;
34
35/*
36 * ARP/ND Termination in a L2 Bridge Domain based on IP4/IP6 to MAC
37 * hash tables mac_by_ip4 and mac_by_ip6 for each BD.
38 */
39typedef enum
40{
41 ARP_TERM_NEXT_L2_OUTPUT,
42 ARP_TERM_NEXT_DROP,
43 ARP_TERM_N_NEXT,
44} arp_term_next_t;
45
46u32 arp_term_next_node_index[32];
47
48typedef struct
49{
50 u8 packet_data[64];
51} ethernet_arp_input_trace_t;
52
53#define foreach_ethernet_arp_error \
54 _ (replies_sent, "ARP replies sent") \
55 _ (l2_type_not_ethernet, "L2 type not ethernet") \
56 _ (l3_type_not_ip4, "L3 type not IP4") \
57 _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
58 _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
59 _ (l3_dst_address_unset, "IP4 destination address is unset") \
60 _ (l3_src_address_is_local, "IP4 source address matches local interface") \
61 _ (l3_src_address_learned, "ARP request IP4 source address learned") \
62 _ (replies_received, "ARP replies received") \
63 _ (opcode_not_request, "ARP opcode not request") \
64 _ (proxy_arp_replies_sent, "Proxy ARP replies sent") \
65 _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
66 _ (gratuitous_arp, "ARP probe or announcement dropped") \
67 _ (interface_no_table, "Interface is not mapped to an IP table") \
68 _ (interface_not_ip_enabled, "Interface is not IP enabled") \
69 _ (unnumbered_mismatch, "RX interface is unnumbered to different subnet") \
70
71typedef enum
72{
73#define _(sym,string) ETHERNET_ARP_ERROR_##sym,
74 foreach_ethernet_arp_error
75#undef _
76 ETHERNET_ARP_N_ERROR,
77} ethernet_arp_reply_error_t;
78
79static char *ethernet_arp_error_strings[] = {
80#define _(sym,string) string,
81 foreach_ethernet_arp_error
82#undef _
83};
84
85static u8 *
86format_arp_term_input_trace (u8 * s, va_list * va)
87{
88 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
89 CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
90 ethernet_arp_input_trace_t *t = va_arg (*va, ethernet_arp_input_trace_t *);
91
92 /* arp-term trace data saved is either arp or ip6/icmp6 packet:
93 - for arp, the 1st 16-bit field is hw type of value of 0x0001.
94 - for ip6, the first nibble has value of 6. */
95 s = format (s, "%U", t->packet_data[0] == 0 ?
96 format_ethernet_arp_header : format_ip6_header,
97 t->packet_data, sizeof (t->packet_data));
98
99 return s;
100}
101
102void
103l2_arp_term_set_publisher_node (bool on)
104{
105 l2_arp_term_main_t *l2am = &l2_arp_term_main;
106
107 l2am->publish = on;
108}
109
110static int
111l2_arp_term_publish (l2_arp_term_publish_event_t * ctx)
112{
113 l2_arp_term_main_t *l2am = &l2_arp_term_main;
114
115 vec_add1 (l2am->publish_events, *ctx);
116
117 vlib_process_signal_event (vlib_get_main (),
118 l2_arp_term_process_node.index,
119 L2_ARP_TERM_EVENT_PUBLISH, 0);
120
121 return 0;
122}
123
124static inline void
125l2_arp_term_publish_v4_dp (u32 sw_if_index,
126 const ethernet_arp_ip4_over_ethernet_address_t * a)
127{
128 l2_arp_term_main_t *l2am = &l2_arp_term_main;
129
130 if (!l2am->publish)
131 return;
132
133 l2_arp_term_publish_event_t args = {
134 .sw_if_index = sw_if_index,
135 .type = IP46_TYPE_IP4,
136 .ip.ip4 = a->ip4,
137 .mac = a->mac,
138 };
139
140 vl_api_rpc_call_main_thread (l2_arp_term_publish, (u8 *) & args,
141 sizeof (args));
142}
143
144static inline void
145l2_arp_term_publish_v6_dp (u32 sw_if_index,
146 const ip6_address_t * addr,
147 const mac_address_t * mac)
148{
149 l2_arp_term_main_t *l2am = &l2_arp_term_main;
150
151 if (!l2am->publish)
152 return;
153
154 l2_arp_term_publish_event_t args = {
155 .sw_if_index = sw_if_index,
156 .type = IP46_TYPE_IP6,
157 .ip.ip6 = *addr,
158 .mac = *mac,
159 };
160
161 vl_api_rpc_call_main_thread (l2_arp_term_publish, (u8 *) & args,
162 sizeof (args));
163}
164
165static inline int
166vnet_ip6_nd_term (vlib_main_t * vm,
167 vlib_node_runtime_t * node,
168 vlib_buffer_t * p0,
169 ethernet_header_t * eth,
170 ip6_header_t * ip, u32 sw_if_index, u16 bd_index)
171{
172 icmp6_neighbor_solicitation_or_advertisement_header_t *ndh;
173 mac_address_t mac;
174
175 mac_address_from_bytes (&mac, eth->src_address);
176 ndh = ip6_next_header (ip);
177 if (ndh->icmp.type != ICMP6_neighbor_solicitation &&
178 ndh->icmp.type != ICMP6_neighbor_advertisement)
179 return 0;
180
181 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
182 (p0->flags & VLIB_BUFFER_IS_TRACED)))
183 {
184 u8 *t0 = vlib_add_trace (vm, node, p0,
185 sizeof (icmp6_input_trace_t));
186 clib_memcpy (t0, ip, sizeof (icmp6_input_trace_t));
187 }
188
189 /* Check if anyone want ND events for L2 BDs */
190 if (PREDICT_FALSE (!ip6_address_is_link_local_unicast (&ip->src_address)))
191 {
192 l2_arp_term_publish_v6_dp (sw_if_index, &ip->src_address, &mac);
193 }
194
195 /* Check if MAC entry exsist for solicited target IP */
196 if (ndh->icmp.type == ICMP6_neighbor_solicitation)
197 {
198 icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *opt;
199 l2_bridge_domain_t *bd_config;
200 u8 *macp;
201
202 opt = (void *) (ndh + 1);
203 if ((opt->header.type !=
204 ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address) ||
205 (opt->header.n_data_u64s != 1))
206 return 0; /* source link layer address option not present */
207
208 bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index);
209 macp =
210 (u8 *) hash_get_mem (bd_config->mac_by_ip6, &ndh->target_address);
211 if (macp)
212 { /* found ip-mac entry, generate eighbor advertisement response */
213 int bogus_length;
214 vlib_node_runtime_t *error_node =
215 vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
216 ip->dst_address = ip->src_address;
217 ip->src_address = ndh->target_address;
218 ip->hop_limit = 255;
219 opt->header.type =
220 ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address;
221 clib_memcpy (opt->ethernet_address, macp, 6);
222 ndh->icmp.type = ICMP6_neighbor_advertisement;
223 ndh->advertisement_flags = clib_host_to_net_u32
224 (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED |
225 ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
226 ndh->icmp.checksum = 0;
227 ndh->icmp.checksum =
228 ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip, &bogus_length);
229 clib_memcpy (eth->dst_address, eth->src_address, 6);
230 clib_memcpy (eth->src_address, macp, 6);
231 vlib_error_count (vm, error_node->node_index,
232 ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_TX, 1);
233 return 1;
234 }
235 }
236
237 return 0;
238
239}
240
241static uword
242arp_term_l2bd (vlib_main_t * vm,
243 vlib_node_runtime_t * node, vlib_frame_t * frame)
244{
245 l2input_main_t *l2im = &l2input_main;
246 u32 n_left_from, next_index, *from, *to_next;
247 u32 n_replies_sent = 0;
248 u16 last_bd_index = ~0;
249 l2_bridge_domain_t *last_bd_config = 0;
250 l2_input_config_t *cfg0;
251
252 from = vlib_frame_vector_args (frame);
253 n_left_from = frame->n_vectors;
254 next_index = node->cached_next_index;
255
256 while (n_left_from > 0)
257 {
258 u32 n_left_to_next;
259
260 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
261
262 while (n_left_from > 0 && n_left_to_next > 0)
263 {
264 vlib_buffer_t *p0;
265 ethernet_header_t *eth0;
266 ethernet_arp_header_t *arp0;
267 ip6_header_t *iph0;
268 u8 *l3h0;
269 u32 pi0, error0, next0, sw_if_index0;
270 u16 ethertype0;
271 u16 bd_index0;
272 u32 ip0;
273 u8 *macp0;
274
275 pi0 = from[0];
276 to_next[0] = pi0;
277 from += 1;
278 to_next += 1;
279 n_left_from -= 1;
280 n_left_to_next -= 1;
281
282 p0 = vlib_get_buffer (vm, pi0);
283 // Terminate only local (SHG == 0) ARP
284 if (vnet_buffer (p0)->l2.shg != 0)
285 goto next_l2_feature;
286
287 eth0 = vlib_buffer_get_current (p0);
288 l3h0 = (u8 *) eth0 + vnet_buffer (p0)->l2.l2_len;
289 ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2));
290 arp0 = (ethernet_arp_header_t *) l3h0;
291
292 if (ethertype0 != ETHERNET_TYPE_ARP)
293 goto check_ip6_nd;
294
295 if ((arp0->opcode !=
296 clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request)) &&
297 (arp0->opcode !=
298 clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply)))
299 goto check_ip6_nd;
300
301 /* Must be ARP request/reply packet here */
302 if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
303 (p0->flags & VLIB_BUFFER_IS_TRACED)))
304 {
305 u8 *t0 = vlib_add_trace (vm, node, p0,
306 sizeof (ethernet_arp_input_trace_t));
307 clib_memcpy_fast (t0, l3h0,
308 sizeof (ethernet_arp_input_trace_t));
309 }
310
311 error0 = 0;
312 error0 =
313 (arp0->l2_type !=
314 clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet)
315 ? ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
316 error0 =
317 (arp0->l3_type !=
318 clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
319 ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
320
321 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
322
323 if (error0)
324 goto drop;
325
326 /* Trash ARP packets whose ARP-level source addresses do not
327 match, or if requester address is mcast */
328 if (PREDICT_FALSE
329 (!ethernet_mac_address_equal (eth0->src_address,
330 arp0->ip4_over_ethernet[0].
331 mac.bytes))
332 || ethernet_address_cast (arp0->ip4_over_ethernet[0].mac.bytes))
333 {
334 /* VRRP virtual MAC may be different to SMAC in ARP reply */
Tianyu Li07d04f72021-05-26 14:20:57 +0800335 if (clib_memcmp (arp0->ip4_over_ethernet[0].mac.bytes,
336 vrrp_prefix, sizeof (vrrp_prefix)) != 0)
Neale Rannscbe25aa2019-09-30 10:53:31 +0000337 {
338 error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
339 goto drop;
340 }
341 }
342 if (PREDICT_FALSE
343 (ip4_address_is_multicast (&arp0->ip4_over_ethernet[0].ip4)))
344 {
345 error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
346 goto drop;
347 }
348
349 /* Check if anyone want ARP request events for L2 BDs */
350 l2_arp_term_publish_v4_dp (sw_if_index0,
351 &arp0->ip4_over_ethernet[0]);
352
353 /* lookup BD mac_by_ip4 hash table for MAC entry */
354 ip0 = arp0->ip4_over_ethernet[1].ip4.as_u32;
355 bd_index0 = vnet_buffer (p0)->l2.bd_index;
356 if (PREDICT_FALSE ((bd_index0 != last_bd_index)
357 || (last_bd_index == (u16) ~ 0)))
358 {
359 last_bd_index = bd_index0;
360 last_bd_config = vec_elt_at_index (l2im->bd_configs, bd_index0);
361 }
362 macp0 = (u8 *) hash_get (last_bd_config->mac_by_ip4, ip0);
363
364 if (PREDICT_FALSE (!macp0))
365 goto next_l2_feature; /* MAC not found */
366 if (PREDICT_FALSE (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
367 arp0->ip4_over_ethernet[1].ip4.as_u32))
368 goto next_l2_feature; /* GARP */
369
370 /* MAC found, send ARP reply -
371 Convert ARP request packet to ARP reply */
372 arp0->opcode = clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply);
373 arp0->ip4_over_ethernet[1] = arp0->ip4_over_ethernet[0];
374 arp0->ip4_over_ethernet[0].ip4.as_u32 = ip0;
375 mac_address_from_bytes (&arp0->ip4_over_ethernet[0].mac, macp0);
376 clib_memcpy_fast (eth0->dst_address, eth0->src_address, 6);
377 clib_memcpy_fast (eth0->src_address, macp0, 6);
378 n_replies_sent += 1;
379
380 output_response:
381 /* For BVI, need to use l2-fwd node to send ARP reply as
382 l2-output node cannot output packet to BVI properly */
383 cfg0 = vec_elt_at_index (l2im->configs, sw_if_index0);
Neale Ranns47a3d992020-09-29 15:38:51 +0000384 if (PREDICT_FALSE (l2_input_is_bvi (cfg0)))
Neale Rannscbe25aa2019-09-30 10:53:31 +0000385 {
386 vnet_buffer (p0)->l2.feature_bitmap |= L2INPUT_FEAT_FWD;
387 vnet_buffer (p0)->sw_if_index[VLIB_RX] = 0;
388 goto next_l2_feature;
389 }
390
391 /* Send ARP/ND reply back out input interface through l2-output */
392 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
393 next0 = ARP_TERM_NEXT_L2_OUTPUT;
394 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
395 to_next, n_left_to_next, pi0,
396 next0);
397 continue;
398
399 check_ip6_nd:
400 /* IP6 ND event notification or solicitation handling to generate
401 local response instead of flooding */
402 iph0 = (ip6_header_t *) l3h0;
403 if (PREDICT_FALSE (ethertype0 == ETHERNET_TYPE_IP6 &&
404 iph0->protocol == IP_PROTOCOL_ICMP6 &&
405 !ip6_address_is_unspecified
406 (&iph0->src_address)))
407 {
408 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
409 if (vnet_ip6_nd_term
410 (vm, node, p0, eth0, iph0, sw_if_index0,
411 vnet_buffer (p0)->l2.bd_index))
412 goto output_response;
413 }
414
415 next_l2_feature:
416 {
417 next0 = vnet_l2_feature_next (p0, arp_term_next_node_index,
418 L2INPUT_FEAT_ARP_TERM);
419 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
420 to_next, n_left_to_next,
421 pi0, next0);
422 continue;
423 }
424
425 drop:
426 if (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ||
427 (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
428 arp0->ip4_over_ethernet[1].ip4.as_u32))
429 {
430 error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
431 }
432 next0 = ARP_TERM_NEXT_DROP;
433 p0->error = node->errors[error0];
434
435 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
436 to_next, n_left_to_next, pi0,
437 next0);
438 }
439
440 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
441 }
442
443 vlib_error_count (vm, node->node_index,
444 ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
445 return frame->n_vectors;
446}
447
448/* *INDENT-OFF* */
449VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
450 .function = arp_term_l2bd,
451 .name = "arp-term-l2bd",
452 .vector_size = sizeof (u32),
453 .n_errors = ETHERNET_ARP_N_ERROR,
454 .error_strings = ethernet_arp_error_strings,
455 .n_next_nodes = ARP_TERM_N_NEXT,
456 .next_nodes = {
457 [ARP_TERM_NEXT_L2_OUTPUT] = "l2-output",
458 [ARP_TERM_NEXT_DROP] = "error-drop",
459 },
460 .format_buffer = format_ethernet_arp_header,
461 .format_trace = format_arp_term_input_trace,
462};
463/* *INDENT-ON* */
464
465clib_error_t *
466arp_term_init (vlib_main_t * vm)
467{
468 // Initialize the feature next-node indexes
469 feat_bitmap_init_next_nodes (vm,
470 arp_term_l2bd_node.index,
471 L2INPUT_N_FEAT,
472 l2input_get_feat_names (),
473 arp_term_next_node_index);
474 return 0;
475}
476
477VLIB_INIT_FUNCTION (arp_term_init);
478
479/*
480 * fd.io coding-style-patch-verification: ON
481 *
482 * Local Variables:
483 * eval: (c-set-style "gnu")
484 * End:
485 */