blob: abd7bad0ed899098b7b0532f72751f9572408f50 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * decap.c: vxlan tunnel decap packet processing
3 *
4 * Copyright (c) 2013 Cisco and/or its affiliates.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include <vlib/vlib.h>
19#include <vnet/pg/pg.h>
20#include <vnet/vxlan/vxlan.h>
21
John Lo13e3d452016-08-09 19:20:51 -040022vlib_node_registration_t vxlan4_input_node;
23vlib_node_registration_t vxlan6_input_node;
Ed Warnickecb9cada2015-12-08 15:45:58 -070024
25typedef struct {
26 u32 next_index;
27 u32 tunnel_index;
28 u32 error;
29 u32 vni;
30} vxlan_rx_trace_t;
31
32static u8 * format_vxlan_rx_trace (u8 * s, va_list * args)
33{
34 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
35 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
36 vxlan_rx_trace_t * t = va_arg (*args, vxlan_rx_trace_t *);
37
38 if (t->tunnel_index != ~0)
39 {
John Loc42912d2016-11-07 18:30:47 -050040 s = format (s, "VXLAN decap from vxlan_tunnel%d vni %d next %d error %d",
Ed Warnickecb9cada2015-12-08 15:45:58 -070041 t->tunnel_index, t->vni, t->next_index, t->error);
42 }
43 else
44 {
Eyal Bari0f4b1842018-04-12 12:39:51 +030045 s = format (s, "VXLAN decap error - tunnel for vni %d does not exist",
John Loc42912d2016-11-07 18:30:47 -050046 t->vni);
Ed Warnickecb9cada2015-12-08 15:45:58 -070047 }
48 return s;
49}
50
John Lo2b81eb82017-01-30 13:12:10 -050051always_inline u32
52validate_vxlan_fib (vlib_buffer_t *b, vxlan_tunnel_t *t, u32 is_ip4)
53{
Eyal Barifb663012017-10-19 15:27:51 +030054 u32 sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
John Lo2b81eb82017-01-30 13:12:10 -050055
Eyal Barifb663012017-10-19 15:27:51 +030056 u32 * fib_index_by_sw_if_index = is_ip4 ?
57 ip4_main.fib_index_by_sw_if_index : ip6_main.fib_index_by_sw_if_index;
58 u32 tx_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX];
59 u32 fib_index = (tx_sw_if_index == (u32) ~ 0) ?
60 vec_elt (fib_index_by_sw_if_index, sw_if_index) : tx_sw_if_index;
John Lo2b81eb82017-01-30 13:12:10 -050061
62 return (fib_index == t->encap_fib_index);
63}
64
Eyal Bari0f4b1842018-04-12 12:39:51 +030065typedef struct
66{
67 vxlan4_tunnel_key_t key4;
68 u32 tunnel_index;
69}last_tunnel_cache4;
70
71typedef struct
72{
73 vxlan6_tunnel_key_t key6;
74 u32 tunnel_index;
75}last_tunnel_cache6;
76
77always_inline vxlan_tunnel_t *
78vxlan4_find_tunnel (vxlan_main_t * vxm, last_tunnel_cache4 * cache,
79 ip4_header_t * ip4_0, vxlan_header_t * vxlan0,
80 vxlan_tunnel_t ** stats_t0)
81{
82 /* Make sure VXLAN tunnel exist according to packet SIP and VNI */
83 vxlan4_tunnel_key_t key4_0 = {
84 .src = ip4_0->src_address.as_u32,
85 .vni = vxlan0->vni_reserved,
86 };
87
88 if (PREDICT_FALSE (key4_0.as_u64 != cache->key4.as_u64))
89 {
90 uword * p = hash_get (vxm->vxlan4_tunnel_by_key, key4_0.as_u64);
91 if (PREDICT_FALSE (p == 0))
92 return 0;
93
94 cache->key4 = key4_0;
95 cache->tunnel_index = p[0];
96 }
97 vxlan_tunnel_t * t0 = pool_elt_at_index (vxm->tunnels, cache->tunnel_index);
98
99 /* Validate VXLAN tunnel SIP against packet DIP */
100 if (PREDICT_TRUE (ip4_0->dst_address.as_u32 == t0->src.ip4.as_u32))
101 *stats_t0 = t0;
102 else
103 {
104 /* try multicast */
105 if (PREDICT_TRUE (!ip4_address_is_multicast (&ip4_0->dst_address)))
106 return 0;
107
108 key4_0.src = ip4_0->dst_address.as_u32;
109 /* Make sure mcast VXLAN tunnel exist by packet DIP and VNI */
110 uword * p = hash_get (vxm->vxlan4_tunnel_by_key, key4_0.as_u64);
111 if (PREDICT_FALSE (p == NULL))
112 return 0;
113 *stats_t0 = pool_elt_at_index (vxm->tunnels, p[0]);
114 }
115
116 return t0;
117}
118
119always_inline vxlan_tunnel_t *
120vxlan6_find_tunnel (vxlan_main_t * vxm, last_tunnel_cache6 * cache,
121 ip6_header_t * ip6_0, vxlan_header_t * vxlan0,
122 vxlan_tunnel_t ** stats_t0)
123{
124 /* Make sure VXLAN tunnel exist according to packet SIP and VNI */
125 vxlan6_tunnel_key_t key6_0 = {
126 .src = ip6_0->src_address,
127 .vni = vxlan0->vni_reserved,
128 };
129
130 if (PREDICT_FALSE (memcmp(&key6_0, &cache->key6, sizeof key6_0) != 0))
131 {
132 uword * p = hash_get_mem (vxm->vxlan6_tunnel_by_key, &key6_0);
133 if (PREDICT_FALSE (p == NULL))
134 return 0;
135
136 cache->key6 = key6_0;
137 cache->tunnel_index = p[0];
138 }
139 vxlan_tunnel_t * t0 = pool_elt_at_index (vxm->tunnels, cache->tunnel_index);
140
141 /* Validate VXLAN tunnel SIP against packet DIP */
142 if (PREDICT_TRUE (ip6_address_is_equal (&ip6_0->dst_address, &t0->src.ip6)))
143 *stats_t0 = t0;
144 else
145 {
146 /* try multicast */
147 if (PREDICT_TRUE (!ip6_address_is_multicast (&ip6_0->dst_address)))
148 return 0;
149
150 key6_0.src = ip6_0->dst_address;
151 /* Make sure mcast VXLAN tunnel exist by packet DIP and VNI */
152 uword * p = hash_get_mem (vxm->vxlan6_tunnel_by_key, &key6_0);
153 if (PREDICT_FALSE (p == NULL))
154 return 0;
155 *stats_t0 = pool_elt_at_index (vxm->tunnels, p[0]);
156 }
157
158 return t0;
159}
160
Chris Luke99cb3352016-04-26 10:49:53 -0400161always_inline uword
Ed Warnickecb9cada2015-12-08 15:45:58 -0700162vxlan_input (vlib_main_t * vm,
163 vlib_node_runtime_t * node,
Chris Luke99cb3352016-04-26 10:49:53 -0400164 vlib_frame_t * from_frame,
John Lo37682e12016-11-30 12:51:39 -0500165 u32 is_ip4)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700166{
Ed Warnickecb9cada2015-12-08 15:45:58 -0700167 vxlan_main_t * vxm = &vxlan_main;
168 vnet_main_t * vnm = vxm->vnet_main;
169 vnet_interface_main_t * im = &vnm->interface_main;
Eyal Bari0f4b1842018-04-12 12:39:51 +0300170 vlib_combined_counter_main_t * rx_counter = im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX;
171 vlib_combined_counter_main_t * drop_counter = im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_DROP;
172 last_tunnel_cache4 last4 = { .tunnel_index = ~0 };
173 last_tunnel_cache6 last6 = { .tunnel_index = ~0 };
Ed Warnickecb9cada2015-12-08 15:45:58 -0700174 u32 pkts_decapsulated = 0;
Damjan Marion586afd72017-04-05 19:18:20 +0200175 u32 thread_index = vlib_get_thread_index();
Ed Warnickecb9cada2015-12-08 15:45:58 -0700176
Dave Barachf9c231e2016-08-05 10:10:18 -0400177 if (is_ip4)
Eyal Bari0f4b1842018-04-12 12:39:51 +0300178 last4.key4.as_u64 = ~0;
Dave Barachf9c231e2016-08-05 10:10:18 -0400179 else
Eyal Bari0f4b1842018-04-12 12:39:51 +0300180 memset (&last6.key6, 0xff, sizeof last6.key6);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700181
Eyal Barifb663012017-10-19 15:27:51 +0300182 u32 next_index = node->cached_next_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700183
Eyal Barifb663012017-10-19 15:27:51 +0300184 u32 * from = vlib_frame_vector_args (from_frame);
185 u32 n_left_from = from_frame->n_vectors;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700186
187 while (n_left_from > 0)
188 {
Eyal Barifb663012017-10-19 15:27:51 +0300189 u32 * to_next, n_left_to_next;
Eyal Bari0f4b1842018-04-12 12:39:51 +0300190 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
Eyal Barifb663012017-10-19 15:27:51 +0300191
Ed Warnickecb9cada2015-12-08 15:45:58 -0700192 while (n_left_from >= 4 && n_left_to_next >= 2)
193 {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700194 /* Prefetch next iteration. */
195 {
196 vlib_buffer_t * p2, * p3;
197
198 p2 = vlib_get_buffer (vm, from[2]);
199 p3 = vlib_get_buffer (vm, from[3]);
200
201 vlib_prefetch_buffer_header (p2, LOAD);
202 vlib_prefetch_buffer_header (p3, LOAD);
203
204 CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
205 CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
206 }
207
Eyal Bari0f4b1842018-04-12 12:39:51 +0300208 u32 bi0 = to_next[0] = from[0];
209 u32 bi1 = to_next[1] = from[1];
Ed Warnickecb9cada2015-12-08 15:45:58 -0700210 from += 2;
211 to_next += 2;
212 n_left_to_next -= 2;
213 n_left_from -= 2;
214
Eyal Bari0f4b1842018-04-12 12:39:51 +0300215 vlib_buffer_t * b0, * b1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700216 b0 = vlib_get_buffer (vm, bi0);
217 b1 = vlib_get_buffer (vm, bi1);
218
219 /* udp leaves current_data pointing at the vxlan header */
Eyal Barifb663012017-10-19 15:27:51 +0300220 void * cur0 = vlib_buffer_get_current (b0);
221 void * cur1 = vlib_buffer_get_current (b1);
222 vxlan_header_t * vxlan0 = cur0;
223 vxlan_header_t * vxlan1 = cur1;
224
225 ip4_header_t * ip4_0, * ip4_1;
226 ip6_header_t * ip6_0, * ip6_1;
Chris Luke99cb3352016-04-26 10:49:53 -0400227 if (is_ip4) {
Eyal Barifb663012017-10-19 15:27:51 +0300228 ip4_0 = cur0 - sizeof(udp_header_t) - sizeof(ip4_header_t);
229 ip4_1 = cur1 - sizeof(udp_header_t) - sizeof(ip4_header_t);
Chris Luke99cb3352016-04-26 10:49:53 -0400230 } else {
Eyal Barifb663012017-10-19 15:27:51 +0300231 ip6_0 = cur0 - sizeof(udp_header_t) - sizeof(ip6_header_t);
232 ip6_1 = cur1 - sizeof(udp_header_t) - sizeof(ip6_header_t);
Chris Luke99cb3352016-04-26 10:49:53 -0400233 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700234
Eyal Bari0f4b1842018-04-12 12:39:51 +0300235 /* pop vxlan */
Eyal Barifb663012017-10-19 15:27:51 +0300236 vlib_buffer_advance (b0, sizeof *vxlan0);
237 vlib_buffer_advance (b1, sizeof *vxlan1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700238
Eyal Bari0f4b1842018-04-12 12:39:51 +0300239 vxlan_tunnel_t * t0, * stats_t0;
240 vxlan_tunnel_t * t1, * stats_t1;
241 if (is_ip4)
242 {
243 t0 = vxlan4_find_tunnel (vxm, &last4, ip4_0, vxlan0, &stats_t0);
244 t1 = vxlan4_find_tunnel (vxm, &last4, ip4_1, vxlan1, &stats_t1);
245 }
246 else
247 {
248 t0 = vxlan6_find_tunnel (vxm, &last6, ip6_0, vxlan0, &stats_t0);
249 t1 = vxlan6_find_tunnel (vxm, &last6, ip6_1, vxlan1, &stats_t1);
Chris Luke99cb3352016-04-26 10:49:53 -0400250 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700251
Eyal Barifb663012017-10-19 15:27:51 +0300252 u32 len0 = vlib_buffer_length_in_chain (vm, b0);
Eyal Barifb663012017-10-19 15:27:51 +0300253 u32 len1 = vlib_buffer_length_in_chain (vm, b1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700254
Eyal Bari0f4b1842018-04-12 12:39:51 +0300255 u32 next0, next1;
256 u8 error0 = 0, error1 = 0;
257 /* Validate VXLAN tunnel encap-fib index agaist packet */
258 if (PREDICT_FALSE (t0 == 0 || validate_vxlan_fib (b0, t0, is_ip4) == 0 ||
259 vxlan0->flags != VXLAN_FLAGS_I))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700260 {
Eyal Bari0f4b1842018-04-12 12:39:51 +0300261 next0 = VXLAN_INPUT_NEXT_DROP;
262
263 if (t0 != 0 && vxlan0->flags != VXLAN_FLAGS_I)
264 {
265 error0 = VXLAN_ERROR_BAD_FLAGS;
266 vlib_increment_combined_counter
267 (drop_counter, thread_index, stats_t0->sw_if_index, 1, len0);
268 }
269 else
270 error0 = VXLAN_ERROR_NO_SUCH_TUNNEL;
271 b0->error = node->errors[error0];
272 }
273 else
274 {
275 next0 = t0->decap_next_index;
276
277 /* Required to make the l2 tag push / pop code work on l2 subifs */
278 if (PREDICT_TRUE(next0 == VXLAN_INPUT_NEXT_L2_INPUT))
279 vnet_update_l2_len (b0);
280
281 /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
282 vnet_buffer(b0)->sw_if_index[VLIB_RX] = t0->sw_if_index;
283 vlib_increment_combined_counter
284 (rx_counter, thread_index, stats_t0->sw_if_index, 1, len0);
285 pkts_decapsulated++;
286 }
287
288 /* Validate VXLAN tunnel encap-fib index agaist packet */
289 if (PREDICT_FALSE (t1 == 0 || validate_vxlan_fib (b1, t1, is_ip4) == 0 ||
290 vxlan1->flags != VXLAN_FLAGS_I))
291 {
292 next1 = VXLAN_INPUT_NEXT_DROP;
293
294 if (t1 != 0 && vxlan1->flags != VXLAN_FLAGS_I)
295 {
296 error1 = VXLAN_ERROR_BAD_FLAGS;
297 vlib_increment_combined_counter
298 (drop_counter, thread_index, stats_t1->sw_if_index, 1, len1);
299 }
300 else
301 error1 = VXLAN_ERROR_NO_SUCH_TUNNEL;
302 b1->error = node->errors[error1];
303 }
304 else
305 {
306 next1 = t1->decap_next_index;
307
308 /* Required to make the l2 tag push / pop code work on l2 subifs */
309 if (PREDICT_TRUE(next1 == VXLAN_INPUT_NEXT_L2_INPUT))
310 vnet_update_l2_len (b1);
311
312 /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
313 vnet_buffer(b1)->sw_if_index[VLIB_RX] = t1->sw_if_index;
314 pkts_decapsulated++;
315
316 vlib_increment_combined_counter
317 (rx_counter, thread_index, stats_t1->sw_if_index, 1, len1);
318 }
319
320 if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
321 {
322 vxlan_rx_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
323 tr->next_index = next0;
324 tr->error = error0;
325 tr->tunnel_index = t0 == 0 ? ~0 : t0 - vxm->tunnels;
326 tr->vni = vnet_get_vni (vxlan0);
327 }
328 if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
329 {
330 vxlan_rx_trace_t *tr = vlib_add_trace (vm, node, b1, sizeof (*tr));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700331 tr->next_index = next1;
332 tr->error = error1;
Eyal Bari0f4b1842018-04-12 12:39:51 +0300333 tr->tunnel_index = t1 == 0 ? ~0 : t1 - vxm->tunnels;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700334 tr->vni = vnet_get_vni (vxlan1);
335 }
336
337 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
338 to_next, n_left_to_next,
339 bi0, bi1, next0, next1);
340 }
341
342 while (n_left_from > 0 && n_left_to_next > 0)
343 {
Eyal Bari0f4b1842018-04-12 12:39:51 +0300344 u32 bi0 = to_next[0] = from[0];
Ed Warnickecb9cada2015-12-08 15:45:58 -0700345 from += 1;
346 to_next += 1;
347 n_left_from -= 1;
348 n_left_to_next -= 1;
349
Eyal Bari0f4b1842018-04-12 12:39:51 +0300350 vlib_buffer_t * b0 = vlib_get_buffer (vm, bi0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700351
352 /* udp leaves current_data pointing at the vxlan header */
Eyal Barifb663012017-10-19 15:27:51 +0300353 void * cur0 = vlib_buffer_get_current (b0);
354 vxlan_header_t * vxlan0 = cur0;
Eyal Bari0f4b1842018-04-12 12:39:51 +0300355 ip4_header_t * ip4_0;
356 ip6_header_t * ip6_0;
Eyal Barifb663012017-10-19 15:27:51 +0300357 if (is_ip4)
358 ip4_0 = cur0 -sizeof(udp_header_t) - sizeof(ip4_header_t);
359 else
360 ip6_0 = cur0 -sizeof(udp_header_t) - sizeof(ip6_header_t);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700361
362 /* pop (ip, udp, vxlan) */
Eyal Barifb663012017-10-19 15:27:51 +0300363 vlib_buffer_advance (b0, sizeof(*vxlan0));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700364
Eyal Bari0f4b1842018-04-12 12:39:51 +0300365 vxlan_tunnel_t * t0, * stats_t0;
366 if (is_ip4)
367 t0 = vxlan4_find_tunnel (vxm, &last4, ip4_0, vxlan0, &stats_t0);
368 else
369 t0 = vxlan6_find_tunnel (vxm, &last6, ip6_0, vxlan0, &stats_t0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700370
Eyal Bari0f4b1842018-04-12 12:39:51 +0300371 uword len0 = vlib_buffer_length_in_chain (vm, b0);
John Loc42912d2016-11-07 18:30:47 -0500372
Eyal Bari0f4b1842018-04-12 12:39:51 +0300373 u32 next0;
374 u8 error0 = 0;
375 /* Validate VXLAN tunnel encap-fib index agaist packet */
376 if (PREDICT_FALSE (t0 == 0 || validate_vxlan_fib (b0, t0, is_ip4) == 0 ||
377 vxlan0->flags != VXLAN_FLAGS_I))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700378 {
Eyal Bari0f4b1842018-04-12 12:39:51 +0300379 next0 = VXLAN_INPUT_NEXT_DROP;
380
381 if (t0 != 0 && vxlan0->flags != VXLAN_FLAGS_I)
382 {
383 error0 = VXLAN_ERROR_BAD_FLAGS;
384 vlib_increment_combined_counter
385 (drop_counter, thread_index, stats_t0->sw_if_index, 1, len0);
386 }
387 else
388 error0 = VXLAN_ERROR_NO_SUCH_TUNNEL;
389 b0->error = node->errors[error0];
390 }
391 else
392 {
393 next0 = t0->decap_next_index;
394
395 /* Required to make the l2 tag push / pop code work on l2 subifs */
396 if (PREDICT_TRUE(next0 == VXLAN_INPUT_NEXT_L2_INPUT))
397 vnet_update_l2_len (b0);
398
399 /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
400 vnet_buffer(b0)->sw_if_index[VLIB_RX] = t0->sw_if_index;
401 pkts_decapsulated++;
402
403 vlib_increment_combined_counter
404 (rx_counter, thread_index, stats_t0->sw_if_index, 1, len0);
405 }
406
407 if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
408 {
409 vxlan_rx_trace_t *tr
Ed Warnickecb9cada2015-12-08 15:45:58 -0700410 = vlib_add_trace (vm, node, b0, sizeof (*tr));
411 tr->next_index = next0;
412 tr->error = error0;
Eyal Bari0f4b1842018-04-12 12:39:51 +0300413 tr->tunnel_index = t0 == 0 ? ~0 : t0 - vxm->tunnels;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700414 tr->vni = vnet_get_vni (vxlan0);
415 }
416 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
417 to_next, n_left_to_next,
418 bi0, next0);
419 }
420
421 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
422 }
Eyal Barifb663012017-10-19 15:27:51 +0300423 /* Do we still need this now that tunnel tx stats is kept? */
424 u32 node_idx = is_ip4 ? vxlan4_input_node.index : vxlan6_input_node.index;
425 vlib_node_increment_counter (vm, node_idx, VXLAN_ERROR_DECAPSULATED,
426 pkts_decapsulated);
427
Ed Warnickecb9cada2015-12-08 15:45:58 -0700428 return from_frame->n_vectors;
429}
430
Chris Luke99cb3352016-04-26 10:49:53 -0400431static uword
432vxlan4_input (vlib_main_t * vm,
433 vlib_node_runtime_t * node,
434 vlib_frame_t * from_frame)
435{
436 return vxlan_input(vm, node, from_frame, /* is_ip4 */ 1);
437}
438
439static uword
440vxlan6_input (vlib_main_t * vm,
441 vlib_node_runtime_t * node,
442 vlib_frame_t * from_frame)
443{
444 return vxlan_input(vm, node, from_frame, /* is_ip4 */ 0);
445}
446
Ed Warnickecb9cada2015-12-08 15:45:58 -0700447static char * vxlan_error_strings[] = {
448#define vxlan_error(n,s) s,
449#include <vnet/vxlan/vxlan_error.def>
450#undef vxlan_error
451#undef _
452};
453
Chris Luke99cb3352016-04-26 10:49:53 -0400454VLIB_REGISTER_NODE (vxlan4_input_node) = {
455 .function = vxlan4_input,
456 .name = "vxlan4-input",
457 /* Takes a vector of packets. */
458 .vector_size = sizeof (u32),
459
460 .n_errors = VXLAN_N_ERROR,
461 .error_strings = vxlan_error_strings,
462
463 .n_next_nodes = VXLAN_INPUT_N_NEXT,
464 .next_nodes = {
465#define _(s,n) [VXLAN_INPUT_NEXT_##s] = n,
466 foreach_vxlan_input_next
467#undef _
468 },
469
470//temp .format_buffer = format_vxlan_header,
471 .format_trace = format_vxlan_rx_trace,
472 // $$$$ .unformat_buffer = unformat_vxlan_header,
473};
474
Damjan Marion1c80e832016-05-11 23:07:18 +0200475VLIB_NODE_FUNCTION_MULTIARCH (vxlan4_input_node, vxlan4_input)
476
Chris Luke99cb3352016-04-26 10:49:53 -0400477VLIB_REGISTER_NODE (vxlan6_input_node) = {
478 .function = vxlan6_input,
479 .name = "vxlan6-input",
Ed Warnickecb9cada2015-12-08 15:45:58 -0700480 /* Takes a vector of packets. */
481 .vector_size = sizeof (u32),
482
483 .n_errors = VXLAN_N_ERROR,
484 .error_strings = vxlan_error_strings,
485
486 .n_next_nodes = VXLAN_INPUT_N_NEXT,
487 .next_nodes = {
488#define _(s,n) [VXLAN_INPUT_NEXT_##s] = n,
489 foreach_vxlan_input_next
490#undef _
491 },
492
493//temp .format_buffer = format_vxlan_header,
494 .format_trace = format_vxlan_rx_trace,
495 // $$$$ .unformat_buffer = unformat_vxlan_header,
496};
Damjan Marion1c80e832016-05-11 23:07:18 +0200497
498VLIB_NODE_FUNCTION_MULTIARCH (vxlan6_input_node, vxlan6_input)
499
John Lo37682e12016-11-30 12:51:39 -0500500
501typedef enum {
502 IP_VXLAN_BYPASS_NEXT_DROP,
503 IP_VXLAN_BYPASS_NEXT_VXLAN,
504 IP_VXLAN_BYPASS_N_NEXT,
505} ip_vxan_bypass_next_t;
506
507always_inline uword
508ip_vxlan_bypass_inline (vlib_main_t * vm,
John Lo2b81eb82017-01-30 13:12:10 -0500509 vlib_node_runtime_t * node,
510 vlib_frame_t * frame,
511 u32 is_ip4)
John Lo37682e12016-11-30 12:51:39 -0500512{
513 vxlan_main_t * vxm = &vxlan_main;
514 u32 * from, * to_next, n_left_from, n_left_to_next, next_index;
515 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
516 ip4_address_t addr4; /* last IPv4 address matching a local VTEP address */
517 ip6_address_t addr6; /* last IPv6 address matching a local VTEP address */
518
519 from = vlib_frame_vector_args (frame);
520 n_left_from = frame->n_vectors;
521 next_index = node->cached_next_index;
522
523 if (node->flags & VLIB_NODE_FLAG_TRACE)
524 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
525
526 if (is_ip4) addr4.data_u32 = ~0;
527 else ip6_address_set_zero (&addr6);
528
529 while (n_left_from > 0)
530 {
531 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
532
533 while (n_left_from >= 4 && n_left_to_next >= 2)
534 {
535 vlib_buffer_t * b0, * b1;
John Lo2b81eb82017-01-30 13:12:10 -0500536 ip4_header_t * ip40, * ip41;
537 ip6_header_t * ip60, * ip61;
John Lo37682e12016-11-30 12:51:39 -0500538 udp_header_t * udp0, * udp1;
539 u32 bi0, ip_len0, udp_len0, flags0, next0;
540 u32 bi1, ip_len1, udp_len1, flags1, next1;
541 i32 len_diff0, len_diff1;
542 u8 error0, good_udp0, proto0;
543 u8 error1, good_udp1, proto1;
544
545 /* Prefetch next iteration. */
546 {
547 vlib_buffer_t * p2, * p3;
548
549 p2 = vlib_get_buffer (vm, from[2]);
550 p3 = vlib_get_buffer (vm, from[3]);
551
552 vlib_prefetch_buffer_header (p2, LOAD);
553 vlib_prefetch_buffer_header (p3, LOAD);
554
555 CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
556 CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
557 }
558
559 bi0 = to_next[0] = from[0];
560 bi1 = to_next[1] = from[1];
561 from += 2;
562 n_left_from -= 2;
563 to_next += 2;
564 n_left_to_next -= 2;
565
566 b0 = vlib_get_buffer (vm, bi0);
567 b1 = vlib_get_buffer (vm, bi1);
John Lo2b81eb82017-01-30 13:12:10 -0500568 if (is_ip4)
569 {
570 ip40 = vlib_buffer_get_current (b0);
571 ip41 = vlib_buffer_get_current (b1);
572 }
573 else
574 {
575 ip60 = vlib_buffer_get_current (b0);
576 ip61 = vlib_buffer_get_current (b1);
577 }
John Lo37682e12016-11-30 12:51:39 -0500578
579 /* Setup packet for next IP feature */
580 vnet_feature_next(vnet_buffer(b0)->sw_if_index[VLIB_RX], &next0, b0);
581 vnet_feature_next(vnet_buffer(b1)->sw_if_index[VLIB_RX], &next1, b1);
582
John Lo2b81eb82017-01-30 13:12:10 -0500583 if (is_ip4)
584 {
585 /* Treat IP frag packets as "experimental" protocol for now
586 until support of IP frag reassembly is implemented */
587 proto0 = ip4_is_fragment(ip40) ? 0xfe : ip40->protocol;
588 proto1 = ip4_is_fragment(ip41) ? 0xfe : ip41->protocol;
589 }
590 else
591 {
592 proto0 = ip60->protocol;
593 proto1 = ip61->protocol;
594 }
John Lo37682e12016-11-30 12:51:39 -0500595
596 /* Process packet 0 */
597 if (proto0 != IP_PROTOCOL_UDP)
598 goto exit0; /* not UDP packet */
599
John Lo2b81eb82017-01-30 13:12:10 -0500600 if (is_ip4)
601 udp0 = ip4_next_header (ip40);
602 else
603 udp0 = ip6_next_header (ip60);
604
John Lo37682e12016-11-30 12:51:39 -0500605 if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_vxlan))
606 goto exit0; /* not VXLAN packet */
607
John Lo2b81eb82017-01-30 13:12:10 -0500608 /* Validate DIP against VTEPs*/
Eyal Bari0f4b1842018-04-12 12:39:51 +0300609 if (is_ip4)
John Lo37682e12016-11-30 12:51:39 -0500610 {
John Lo2b81eb82017-01-30 13:12:10 -0500611 if (addr4.as_u32 != ip40->dst_address.as_u32)
John Lo37682e12016-11-30 12:51:39 -0500612 {
John Lo2b81eb82017-01-30 13:12:10 -0500613 if (!hash_get (vxm->vtep4, ip40->dst_address.as_u32))
614 goto exit0; /* no local VTEP for VXLAN packet */
615 addr4 = ip40->dst_address;
John Lo37682e12016-11-30 12:51:39 -0500616 }
617 }
John Lo2b81eb82017-01-30 13:12:10 -0500618 else
619 {
620 if (!ip6_address_is_equal (&addr6, &ip60->dst_address))
621 {
622 if (!hash_get_mem (vxm->vtep6, &ip60->dst_address))
623 goto exit0; /* no local VTEP for VXLAN packet */
624 addr6 = ip60->dst_address;
625 }
626 }
John Lo37682e12016-11-30 12:51:39 -0500627
628 flags0 = b0->flags;
Damjan Marion213b5aa2017-07-13 21:19:27 +0200629 good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
John Lo37682e12016-11-30 12:51:39 -0500630
631 /* Don't verify UDP checksum for packets with explicit zero checksum. */
632 good_udp0 |= udp0->checksum == 0;
633
634 /* Verify UDP length */
John Lo2b81eb82017-01-30 13:12:10 -0500635 if (is_ip4)
636 ip_len0 = clib_net_to_host_u16 (ip40->length);
637 else
638 ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
John Lo37682e12016-11-30 12:51:39 -0500639 udp_len0 = clib_net_to_host_u16 (udp0->length);
John Lo37682e12016-11-30 12:51:39 -0500640 len_diff0 = ip_len0 - udp_len0;
641
642 /* Verify UDP checksum */
643 if (PREDICT_FALSE (!good_udp0))
644 {
Damjan Marion213b5aa2017-07-13 21:19:27 +0200645 if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
John Lo2b81eb82017-01-30 13:12:10 -0500646 {
647 if (is_ip4)
648 flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
649 else
650 flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
651 good_udp0 =
Damjan Marion213b5aa2017-07-13 21:19:27 +0200652 (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
John Lo2b81eb82017-01-30 13:12:10 -0500653 }
John Lo37682e12016-11-30 12:51:39 -0500654 }
655
John Lo2b81eb82017-01-30 13:12:10 -0500656 if (is_ip4)
657 {
658 error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
659 error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
660 }
661 else
662 {
663 error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
664 error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
665 }
John Lo37682e12016-11-30 12:51:39 -0500666
Eyal Bari0f4b1842018-04-12 12:39:51 +0300667 next0 = error0 ?
John Lo37682e12016-11-30 12:51:39 -0500668 IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
669 b0->error = error0 ? error_node->errors[error0] : 0;
670
John Lo2b81eb82017-01-30 13:12:10 -0500671 /* vxlan-input node expect current at VXLAN header */
672 if (is_ip4)
673 vlib_buffer_advance (b0, sizeof(ip4_header_t)+sizeof(udp_header_t));
674 else
675 vlib_buffer_advance (b0, sizeof(ip6_header_t)+sizeof(udp_header_t));
676
John Lo37682e12016-11-30 12:51:39 -0500677 exit0:
678 /* Process packet 1 */
679 if (proto1 != IP_PROTOCOL_UDP)
680 goto exit1; /* not UDP packet */
681
John Lo2b81eb82017-01-30 13:12:10 -0500682 if (is_ip4)
683 udp1 = ip4_next_header (ip41);
684 else
685 udp1 = ip6_next_header (ip61);
686
John Lo37682e12016-11-30 12:51:39 -0500687 if (udp1->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_vxlan))
688 goto exit1; /* not VXLAN packet */
689
John Lo2b81eb82017-01-30 13:12:10 -0500690 /* Validate DIP against VTEPs*/
Eyal Bari0f4b1842018-04-12 12:39:51 +0300691 if (is_ip4)
John Lo37682e12016-11-30 12:51:39 -0500692 {
John Lo2b81eb82017-01-30 13:12:10 -0500693 if (addr4.as_u32 != ip41->dst_address.as_u32)
John Lo37682e12016-11-30 12:51:39 -0500694 {
John Lo2b81eb82017-01-30 13:12:10 -0500695 if (!hash_get (vxm->vtep4, ip41->dst_address.as_u32))
696 goto exit1; /* no local VTEP for VXLAN packet */
697 addr4 = ip41->dst_address;
698 }
John Lo37682e12016-11-30 12:51:39 -0500699 }
John Lo2b81eb82017-01-30 13:12:10 -0500700 else
701 {
702 if (!ip6_address_is_equal (&addr6, &ip61->dst_address))
703 {
704 if (!hash_get_mem (vxm->vtep6, &ip61->dst_address))
705 goto exit1; /* no local VTEP for VXLAN packet */
706 addr6 = ip61->dst_address;
707 }
708 }
John Lo37682e12016-11-30 12:51:39 -0500709
710 flags1 = b1->flags;
Damjan Marion213b5aa2017-07-13 21:19:27 +0200711 good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
John Lo37682e12016-11-30 12:51:39 -0500712
713 /* Don't verify UDP checksum for packets with explicit zero checksum. */
714 good_udp1 |= udp1->checksum == 0;
715
716 /* Verify UDP length */
John Lo2b81eb82017-01-30 13:12:10 -0500717 if (is_ip4)
718 ip_len1 = clib_net_to_host_u16 (ip41->length);
719 else
720 ip_len1 = clib_net_to_host_u16 (ip61->payload_length);
John Lo37682e12016-11-30 12:51:39 -0500721 udp_len1 = clib_net_to_host_u16 (udp1->length);
John Lo37682e12016-11-30 12:51:39 -0500722 len_diff1 = ip_len1 - udp_len1;
723
724 /* Verify UDP checksum */
725 if (PREDICT_FALSE (!good_udp1))
726 {
Damjan Marion213b5aa2017-07-13 21:19:27 +0200727 if ((flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
John Lo2b81eb82017-01-30 13:12:10 -0500728 {
729 if (is_ip4)
730 flags1 = ip4_tcp_udp_validate_checksum (vm, b1);
731 else
732 flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1);
733 good_udp1 =
Damjan Marion213b5aa2017-07-13 21:19:27 +0200734 (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
John Lo2b81eb82017-01-30 13:12:10 -0500735 }
John Lo37682e12016-11-30 12:51:39 -0500736 }
737
John Lo2b81eb82017-01-30 13:12:10 -0500738 if (is_ip4)
739 {
740 error1 = good_udp1 ? 0 : IP4_ERROR_UDP_CHECKSUM;
741 error1 = (len_diff1 >= 0) ? error1 : IP4_ERROR_UDP_LENGTH;
742 }
743 else
744 {
Eyal Baria93ea422017-02-01 13:36:15 +0200745 error1 = good_udp1 ? 0 : IP6_ERROR_UDP_CHECKSUM;
746 error1 = (len_diff1 >= 0) ? error1 : IP6_ERROR_UDP_LENGTH;
John Lo2b81eb82017-01-30 13:12:10 -0500747 }
John Lo37682e12016-11-30 12:51:39 -0500748
Eyal Bari0f4b1842018-04-12 12:39:51 +0300749 next1 = error1 ?
John Lo37682e12016-11-30 12:51:39 -0500750 IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
751 b1->error = error1 ? error_node->errors[error1] : 0;
Eyal Bari0f4b1842018-04-12 12:39:51 +0300752
John Lo2b81eb82017-01-30 13:12:10 -0500753 /* vxlan-input node expect current at VXLAN header */
754 if (is_ip4)
755 vlib_buffer_advance (b1, sizeof(ip4_header_t)+sizeof(udp_header_t));
756 else
757 vlib_buffer_advance (b1, sizeof(ip6_header_t)+sizeof(udp_header_t));
758
John Lo37682e12016-11-30 12:51:39 -0500759 exit1:
760 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
761 to_next, n_left_to_next,
762 bi0, bi1, next0, next1);
763 }
764
765 while (n_left_from > 0 && n_left_to_next > 0)
766 {
767 vlib_buffer_t * b0;
John Lo2b81eb82017-01-30 13:12:10 -0500768 ip4_header_t * ip40;
769 ip6_header_t * ip60;
John Lo37682e12016-11-30 12:51:39 -0500770 udp_header_t * udp0;
771 u32 bi0, ip_len0, udp_len0, flags0, next0;
772 i32 len_diff0;
773 u8 error0, good_udp0, proto0;
774
775 bi0 = to_next[0] = from[0];
776 from += 1;
777 n_left_from -= 1;
778 to_next += 1;
779 n_left_to_next -= 1;
780
781 b0 = vlib_get_buffer (vm, bi0);
John Lo2b81eb82017-01-30 13:12:10 -0500782 if (is_ip4)
783 ip40 = vlib_buffer_get_current (b0);
784 else
785 ip60 = vlib_buffer_get_current (b0);
John Lo37682e12016-11-30 12:51:39 -0500786
787 /* Setup packet for next IP feature */
788 vnet_feature_next(vnet_buffer(b0)->sw_if_index[VLIB_RX], &next0, b0);
789
John Lo2b81eb82017-01-30 13:12:10 -0500790 if (is_ip4)
791 /* Treat IP4 frag packets as "experimental" protocol for now
792 until support of IP frag reassembly is implemented */
793 proto0 = ip4_is_fragment(ip40) ? 0xfe : ip40->protocol;
794 else
795 proto0 = ip60->protocol;
John Lo37682e12016-11-30 12:51:39 -0500796
797 if (proto0 != IP_PROTOCOL_UDP)
798 goto exit; /* not UDP packet */
799
John Lo2b81eb82017-01-30 13:12:10 -0500800 if (is_ip4)
801 udp0 = ip4_next_header (ip40);
802 else
803 udp0 = ip6_next_header (ip60);
804
John Lo37682e12016-11-30 12:51:39 -0500805 if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_vxlan))
806 goto exit; /* not VXLAN packet */
807
John Lo2b81eb82017-01-30 13:12:10 -0500808 /* Validate DIP against VTEPs*/
Eyal Bari0f4b1842018-04-12 12:39:51 +0300809 if (is_ip4)
John Lo37682e12016-11-30 12:51:39 -0500810 {
John Lo2b81eb82017-01-30 13:12:10 -0500811 if (addr4.as_u32 != ip40->dst_address.as_u32)
John Lo37682e12016-11-30 12:51:39 -0500812 {
John Lo2b81eb82017-01-30 13:12:10 -0500813 if (!hash_get (vxm->vtep4, ip40->dst_address.as_u32))
814 goto exit; /* no local VTEP for VXLAN packet */
815 addr4 = ip40->dst_address;
816 }
John Lo37682e12016-11-30 12:51:39 -0500817 }
John Lo2b81eb82017-01-30 13:12:10 -0500818 else
819 {
820 if (!ip6_address_is_equal (&addr6, &ip60->dst_address))
821 {
822 if (!hash_get_mem (vxm->vtep6, &ip60->dst_address))
823 goto exit; /* no local VTEP for VXLAN packet */
824 addr6 = ip60->dst_address;
825 }
826 }
John Lo37682e12016-11-30 12:51:39 -0500827
828 flags0 = b0->flags;
Damjan Marion213b5aa2017-07-13 21:19:27 +0200829 good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
John Lo37682e12016-11-30 12:51:39 -0500830
831 /* Don't verify UDP checksum for packets with explicit zero checksum. */
832 good_udp0 |= udp0->checksum == 0;
833
834 /* Verify UDP length */
John Lo2b81eb82017-01-30 13:12:10 -0500835 if (is_ip4)
836 ip_len0 = clib_net_to_host_u16 (ip40->length);
837 else
838 ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
John Lo37682e12016-11-30 12:51:39 -0500839 udp_len0 = clib_net_to_host_u16 (udp0->length);
John Lo37682e12016-11-30 12:51:39 -0500840 len_diff0 = ip_len0 - udp_len0;
841
842 /* Verify UDP checksum */
843 if (PREDICT_FALSE (!good_udp0))
844 {
Damjan Marion213b5aa2017-07-13 21:19:27 +0200845 if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
John Lo2b81eb82017-01-30 13:12:10 -0500846 {
847 if (is_ip4)
848 flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
849 else
850 flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
851 good_udp0 =
Damjan Marion213b5aa2017-07-13 21:19:27 +0200852 (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
John Lo2b81eb82017-01-30 13:12:10 -0500853 }
John Lo37682e12016-11-30 12:51:39 -0500854 }
855
John Lo2b81eb82017-01-30 13:12:10 -0500856 if (is_ip4)
857 {
858 error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
859 error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
860 }
861 else
862 {
863 error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
864 error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
865 }
John Lo37682e12016-11-30 12:51:39 -0500866
Eyal Bari0f4b1842018-04-12 12:39:51 +0300867 next0 = error0 ?
John Lo37682e12016-11-30 12:51:39 -0500868 IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
869 b0->error = error0 ? error_node->errors[error0] : 0;
870
John Lo2b81eb82017-01-30 13:12:10 -0500871 /* vxlan-input node expect current at VXLAN header */
872 if (is_ip4)
873 vlib_buffer_advance (b0, sizeof(ip4_header_t)+sizeof(udp_header_t));
874 else
875 vlib_buffer_advance (b0, sizeof(ip6_header_t)+sizeof(udp_header_t));
876
John Lo37682e12016-11-30 12:51:39 -0500877 exit:
878 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
879 to_next, n_left_to_next,
880 bi0, next0);
881 }
882
883 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
884 }
885
886 return frame->n_vectors;
887}
888
889static uword
890ip4_vxlan_bypass (vlib_main_t * vm,
891 vlib_node_runtime_t * node,
892 vlib_frame_t * frame)
893{
894 return ip_vxlan_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
895}
896
897VLIB_REGISTER_NODE (ip4_vxlan_bypass_node) = {
898 .function = ip4_vxlan_bypass,
899 .name = "ip4-vxlan-bypass",
900 .vector_size = sizeof (u32),
901
902 .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
903 .next_nodes = {
904 [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
905 [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan4-input",
906 },
907
908 .format_buffer = format_ip4_header,
909 .format_trace = format_ip4_forward_next_trace,
910};
911
912VLIB_NODE_FUNCTION_MULTIARCH (ip4_vxlan_bypass_node,ip4_vxlan_bypass)
913
John Lo37682e12016-11-30 12:51:39 -0500914/* Dummy init function to get us linked in. */
915clib_error_t * ip4_vxlan_bypass_init (vlib_main_t * vm)
916{ return 0; }
917
918VLIB_INIT_FUNCTION (ip4_vxlan_bypass_init);
John Lo2b81eb82017-01-30 13:12:10 -0500919
920static uword
921ip6_vxlan_bypass (vlib_main_t * vm,
922 vlib_node_runtime_t * node,
923 vlib_frame_t * frame)
924{
925 return ip_vxlan_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
926}
927
928VLIB_REGISTER_NODE (ip6_vxlan_bypass_node) = {
929 .function = ip6_vxlan_bypass,
930 .name = "ip6-vxlan-bypass",
931 .vector_size = sizeof (u32),
932
933 .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
934 .next_nodes = {
935 [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
936 [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan6-input",
937 },
938
939 .format_buffer = format_ip6_header,
940 .format_trace = format_ip6_forward_next_trace,
941};
942
943VLIB_NODE_FUNCTION_MULTIARCH (ip6_vxlan_bypass_node,ip6_vxlan_bypass)
944
945/* Dummy init function to get us linked in. */
946clib_error_t * ip6_vxlan_bypass_init (vlib_main_t * vm)
947{ return 0; }
948
949VLIB_INIT_FUNCTION (ip6_vxlan_bypass_init);
eyal bariaf86a482018-04-17 11:20:27 +0300950
951#define foreach_vxlan_flow_input_next \
952_(DROP, "error-drop") \
953_(L2_INPUT, "l2-input")
954
955typedef enum
956{
957#define _(s,n) VXLAN_FLOW_NEXT_##s,
958 foreach_vxlan_flow_input_next
959#undef _
960 VXLAN_FLOW_N_NEXT,
961} vxlan_flow_input_next_t;
962
963#define foreach_vxlan_flow_error \
964 _(NONE, "no error") \
965 _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \
966 _(IP_HEADER_ERROR, "Rx ip header errors") \
967 _(UDP_CHECKSUM_ERROR, "Rx udp checksum errors") \
968 _(UDP_LENGTH_ERROR, "Rx udp length errors")
969
970typedef enum
971{
972#define _(f,s) VXLAN_FLOW_ERROR_##f,
973 foreach_vxlan_flow_error
974#undef _
975 VXLAN_FLOW_N_ERROR,
976} vxlan_flow_error_t;
977
978static char *vxlan_flow_error_strings[] = {
979#define _(n,s) s,
980 foreach_vxlan_flow_error
981#undef _
982};
983
984
985static_always_inline u8
986vxlan_validate_udp_csum (vlib_main_t * vm, vlib_buffer_t *b)
987{
988 u32 flags = b->flags;
989 enum { offset = sizeof(ip4_header_t) + sizeof(udp_header_t) + sizeof(vxlan_header_t), };
990
991 /* Verify UDP checksum */
992 if ((flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
993 {
994 vlib_buffer_advance (b, -offset);
995 flags = ip4_tcp_udp_validate_checksum (vm, b);
996 vlib_buffer_advance (b, offset);
997 }
998
999 return (flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1000}
1001
1002static_always_inline u8
1003vxlan_check_udp_csum (vlib_main_t * vm, vlib_buffer_t *b)
1004{
1005 ip4_vxlan_header_t * hdr = vlib_buffer_get_current(b) - sizeof *hdr;
1006 udp_header_t * udp = &hdr->udp;
1007 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1008 u8 good_csum = (b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0 ||
1009 udp->checksum == 0;
1010
1011 return !good_csum;
1012}
1013
1014static_always_inline u8
1015vxlan_check_ip (vlib_buffer_t *b, u16 payload_len)
1016{
1017 ip4_vxlan_header_t * hdr = vlib_buffer_get_current(b) - sizeof *hdr;
1018 u16 ip_len = clib_net_to_host_u16 (hdr->ip4.length);
1019 u16 expected = payload_len + sizeof *hdr;
1020 return ip_len > expected || hdr->ip4.ttl == 0 || hdr->ip4.ip_version_and_header_length != 0x45;
1021}
1022
1023static_always_inline u8
1024vxlan_check_ip_udp_len (vlib_buffer_t *b)
1025{
1026 ip4_vxlan_header_t * hdr = vlib_buffer_get_current(b) - sizeof *hdr;
1027 u16 ip_len = clib_net_to_host_u16 (hdr->ip4.length);
1028 u16 udp_len = clib_net_to_host_u16 (hdr->udp.length);
1029 return udp_len > ip_len;
1030}
1031
1032static_always_inline u8
1033vxlan_err_code (u8 ip_err0, u8 udp_err0, u8 csum_err0)
1034{
1035 u8 error0 = VXLAN_FLOW_ERROR_NONE;
1036 if (ip_err0)
1037 error0 = VXLAN_FLOW_ERROR_IP_HEADER_ERROR;
1038 if (udp_err0)
1039 error0 = VXLAN_FLOW_ERROR_UDP_LENGTH_ERROR;
1040 if (csum_err0)
1041 error0 = VXLAN_FLOW_ERROR_UDP_CHECKSUM_ERROR;
1042 return error0;
1043}
1044
Eyal Bari93a6f252018-06-14 08:57:39 +03001045VLIB_NODE_FN (vxlan4_flow_input_node) (vlib_main_t * vm,
eyal bariaf86a482018-04-17 11:20:27 +03001046 vlib_node_runtime_t * node,
1047 vlib_frame_t * f)
1048{
1049 enum { payload_offset = sizeof(ip4_vxlan_header_t) };
1050
1051 vxlan_main_t * vxm = &vxlan_main;
1052 vnet_interface_main_t * im = &vnet_main.interface_main;
1053 vlib_combined_counter_main_t * rx_counter[VXLAN_FLOW_N_NEXT] = {
1054 [VXLAN_FLOW_NEXT_DROP] = im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_DROP,
1055 [VXLAN_FLOW_NEXT_L2_INPUT] = im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
1056 };
1057 u32 thread_index = vlib_get_thread_index();
1058
1059 u32 * from = vlib_frame_vector_args (f);
1060 u32 n_left_from = f->n_vectors;
1061 u32 next_index = VXLAN_FLOW_NEXT_L2_INPUT;
1062
1063 while (n_left_from > 0)
1064 {
1065 u32 n_left_to_next, *to_next;
1066
1067 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1068
1069 while (n_left_from > 3 && n_left_to_next > 3)
1070 {
1071 u32 bi0 = to_next[0] = from[0];
1072 u32 bi1 = to_next[1] = from[1];
1073 u32 bi2 = to_next[2] = from[2];
1074 u32 bi3 = to_next[3] = from[3];
1075 from+=4;
1076 n_left_from-=4;
1077 to_next+=4;
1078 n_left_to_next-=4;
1079
1080 vlib_buffer_t * b0 = vlib_get_buffer (vm, bi0);
1081 vlib_buffer_t * b1 = vlib_get_buffer (vm, bi1);
1082 vlib_buffer_t * b2 = vlib_get_buffer (vm, bi2);
1083 vlib_buffer_t * b3 = vlib_get_buffer (vm, bi3);
1084
1085 vlib_buffer_advance (b0, payload_offset);
1086 vlib_buffer_advance (b1, payload_offset);
1087 vlib_buffer_advance (b2, payload_offset);
1088 vlib_buffer_advance (b3, payload_offset);
1089
1090 u16 len0 = vlib_buffer_length_in_chain (vm, b0);
1091 u16 len1 = vlib_buffer_length_in_chain (vm, b1);
1092 u16 len2 = vlib_buffer_length_in_chain (vm, b2);
1093 u16 len3 = vlib_buffer_length_in_chain (vm, b3);
1094
1095 u32 next0 = VXLAN_FLOW_NEXT_L2_INPUT, next1 = VXLAN_FLOW_NEXT_L2_INPUT,
1096 next2 = VXLAN_FLOW_NEXT_L2_INPUT, next3 = VXLAN_FLOW_NEXT_L2_INPUT;
1097
1098 u8 ip_err0 = vxlan_check_ip (b0, len0);
1099 u8 ip_err1 = vxlan_check_ip (b1, len1);
1100 u8 ip_err2 = vxlan_check_ip (b2, len2);
1101 u8 ip_err3 = vxlan_check_ip (b3, len3);
1102 u8 ip_err = ip_err0 | ip_err1 | ip_err2 | ip_err3;
1103
1104 u8 udp_err0 = vxlan_check_ip_udp_len (b0);
1105 u8 udp_err1 = vxlan_check_ip_udp_len (b1);
1106 u8 udp_err2 = vxlan_check_ip_udp_len (b2);
1107 u8 udp_err3 = vxlan_check_ip_udp_len (b3);
1108 u8 udp_err = udp_err0 | udp_err1 | udp_err2 | udp_err3;
1109
1110 u8 csum_err0 = vxlan_check_udp_csum (vm, b0);
1111 u8 csum_err1 = vxlan_check_udp_csum (vm, b1);
1112 u8 csum_err2 = vxlan_check_udp_csum (vm, b2);
1113 u8 csum_err3 = vxlan_check_udp_csum (vm, b3);
1114 u8 csum_err = csum_err0 | csum_err1 | csum_err2 | csum_err3;
1115
1116 if (PREDICT_FALSE(csum_err))
1117 {
1118 if (csum_err0)
1119 csum_err0 = !vxlan_validate_udp_csum (vm, b0);
1120 if (csum_err1)
1121 csum_err1 = !vxlan_validate_udp_csum (vm, b1);
1122 if (csum_err2)
1123 csum_err2 = !vxlan_validate_udp_csum (vm, b2);
1124 if (csum_err3)
1125 csum_err3 = !vxlan_validate_udp_csum (vm, b3);
1126 csum_err = csum_err0 | csum_err1 | csum_err2 | csum_err3;
1127 }
1128
1129 if (PREDICT_FALSE(ip_err || udp_err || csum_err))
1130 {
1131 if (ip_err0 || udp_err0 || csum_err0)
1132 {
1133 next0 = VXLAN_FLOW_NEXT_DROP;
1134 u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
1135 b0->error = node->errors[error0];
1136 }
1137 if (ip_err1 || udp_err1 || csum_err1)
1138 {
1139 next1 = VXLAN_FLOW_NEXT_DROP;
1140 u8 error1 = vxlan_err_code (ip_err1, udp_err1, csum_err1);
1141 b1->error = node->errors[error1];
1142 }
1143 if (ip_err2 || udp_err2 || csum_err2)
1144 {
1145 next2 = VXLAN_FLOW_NEXT_DROP;
1146 u8 error2 = vxlan_err_code (ip_err2, udp_err2, csum_err2);
1147 b2->error = node->errors[error2];
1148 }
1149 if (ip_err3 || udp_err3 || csum_err3)
1150 {
1151 next3 = VXLAN_FLOW_NEXT_DROP;
1152 u8 error3 = vxlan_err_code (ip_err3, udp_err3, csum_err3);
1153 b3->error = node->errors[error3];
1154 }
1155 }
1156
1157 vnet_update_l2_len (b0);
1158 vnet_update_l2_len (b1);
1159 vnet_update_l2_len (b2);
1160 vnet_update_l2_len (b3);
1161
1162 ASSERT (b0->flow_id != 0);
1163 ASSERT (b1->flow_id != 0);
1164 ASSERT (b2->flow_id != 0);
1165 ASSERT (b3->flow_id != 0);
1166
1167 u32 t_index0 = b0->flow_id - vxm->flow_id_start;
1168 u32 t_index1 = b1->flow_id - vxm->flow_id_start;
1169 u32 t_index2 = b2->flow_id - vxm->flow_id_start;
1170 u32 t_index3 = b3->flow_id - vxm->flow_id_start;
1171
1172 vxlan_tunnel_t * t0 = &vxm->tunnels[t_index0];
1173 vxlan_tunnel_t * t1 = &vxm->tunnels[t_index1];
1174 vxlan_tunnel_t * t2 = &vxm->tunnels[t_index2];
1175 vxlan_tunnel_t * t3 = &vxm->tunnels[t_index3];
1176
1177 /* flow id consumed */
1178 b0->flow_id = 0;
1179 b1->flow_id = 0;
1180 b2->flow_id = 0;
1181 b3->flow_id = 0;
1182
1183 u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX] = t0->sw_if_index;
1184 u32 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX] = t1->sw_if_index;
1185 u32 sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX] = t2->sw_if_index;
1186 u32 sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX] = t3->sw_if_index;
1187
1188 vlib_increment_combined_counter (rx_counter[next0], thread_index, sw_if_index0, 1, len0);
1189 vlib_increment_combined_counter (rx_counter[next1], thread_index, sw_if_index1, 1, len1);
1190 vlib_increment_combined_counter (rx_counter[next2], thread_index, sw_if_index2, 1, len2);
1191 vlib_increment_combined_counter (rx_counter[next3], thread_index, sw_if_index3, 1, len3);
1192
1193 u32 flags = b0->flags | b1->flags | b2->flags | b3->flags;
1194
1195 if (PREDICT_FALSE(flags & VLIB_BUFFER_IS_TRACED))
1196 {
1197 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1198 {
1199 vxlan_rx_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof *tr);
1200 u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
1201 *tr = (vxlan_rx_trace_t) {
1202 .next_index = next0, .error = error0, .tunnel_index = t_index0, .vni = t0->vni };
1203 }
1204 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1205 {
1206 vxlan_rx_trace_t *tr = vlib_add_trace (vm, node, b1, sizeof *tr);
1207 u8 error1 = vxlan_err_code (ip_err1, udp_err1, csum_err1);
1208 *tr = (vxlan_rx_trace_t) {
1209 .next_index = next1, .error = error1, .tunnel_index = t_index1, .vni = t1->vni };
1210 }
1211 if (b2->flags & VLIB_BUFFER_IS_TRACED)
1212 {
1213 vxlan_rx_trace_t *tr = vlib_add_trace (vm, node, b2, sizeof *tr);
1214 u8 error2 = vxlan_err_code (ip_err2, udp_err2, csum_err2);
1215 *tr = (vxlan_rx_trace_t) {
1216 .next_index = next2, .error = error2, .tunnel_index = t_index2, .vni = t2->vni };
1217 }
1218 if (b3->flags & VLIB_BUFFER_IS_TRACED)
1219 {
1220 vxlan_rx_trace_t *tr = vlib_add_trace (vm, node, b3, sizeof *tr);
1221 u8 error3 = vxlan_err_code (ip_err3, udp_err3, csum_err3);
1222 *tr = (vxlan_rx_trace_t) {
1223 .next_index = next3, .error = error3, .tunnel_index = t_index3, .vni = t3->vni };
1224 }
1225 }
1226 vlib_validate_buffer_enqueue_x4
1227 (vm, node, next_index, to_next, n_left_to_next,
1228 bi0, bi1, bi2, bi3, next0, next1, next2, next3);
1229 }
1230 while (n_left_from > 0 && n_left_to_next > 0)
1231 {
1232 u32 bi0 = to_next[0] = from[0];
1233 from++;
1234 n_left_from--;
1235 to_next++;
1236 n_left_to_next--;
1237
1238 vlib_buffer_t * b0 = vlib_get_buffer (vm, bi0);
1239 vlib_buffer_advance (b0, payload_offset);
1240
1241 u16 len0 = vlib_buffer_length_in_chain (vm, b0);
1242 u32 next0 = VXLAN_FLOW_NEXT_L2_INPUT;
1243
1244 u8 ip_err0 = vxlan_check_ip (b0, len0);
1245 u8 udp_err0 = vxlan_check_ip_udp_len (b0);
1246 u8 csum_err0 = vxlan_check_udp_csum (vm, b0);
1247
1248 if (csum_err0)
1249 csum_err0 = !vxlan_validate_udp_csum (vm, b0);
1250 if (ip_err0 || udp_err0 || csum_err0)
1251 {
1252 next0 = VXLAN_FLOW_NEXT_DROP;
1253 u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
1254 b0->error = node->errors[error0];
1255 }
1256
1257 vnet_update_l2_len (b0);
1258
1259 ASSERT (b0->flow_id != 0);
1260 u32 t_index0 = b0->flow_id - vxm->flow_id_start;
1261 vxlan_tunnel_t * t0 = &vxm->tunnels[t_index0];
1262 b0->flow_id = 0;
1263
1264 u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX] = t0->sw_if_index;
1265 vlib_increment_combined_counter (rx_counter[next0], thread_index, sw_if_index0, 1, len0);
1266
1267 if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
1268 {
1269 vxlan_rx_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof *tr);
1270 u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
1271 *tr = (vxlan_rx_trace_t) {
1272 .next_index = next0, .error = error0, .tunnel_index = t_index0, .vni = t0->vni };
1273 }
1274 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
1275 to_next, n_left_to_next,
1276 bi0, next0);
1277 }
1278
1279 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1280 }
1281
1282 return f->n_vectors;
1283}
1284
1285/* *INDENT-OFF* */
1286#ifndef CLIB_MULTIARCH_VARIANT
1287VLIB_REGISTER_NODE (vxlan4_flow_input_node) = {
1288 .name = "vxlan-flow-input",
eyal bariaf86a482018-04-17 11:20:27 +03001289 .type = VLIB_NODE_TYPE_INTERNAL,
1290 .vector_size = sizeof (u32),
1291
1292 .format_trace = format_vxlan_rx_trace,
1293
1294 .n_errors = VXLAN_FLOW_N_ERROR,
1295 .error_strings = vxlan_flow_error_strings,
1296
1297 .n_next_nodes = VXLAN_FLOW_N_NEXT,
1298 .next_nodes = {
1299#define _(s,n) [VXLAN_FLOW_NEXT_##s] = n,
1300 foreach_vxlan_flow_input_next
1301#undef _
1302 },
1303};
1304#endif
1305/* *INDENT-ON* */