blob: 4deec829195e0adf64c3bbef961a4fd31ff26fb2 [file] [log] [blame]
Steven9cd2d7a2017-12-20 12:43:01 -08001/*
2 *------------------------------------------------------------------
3 * Copyright (c) 2017 Cisco and/or its affiliates.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *------------------------------------------------------------------
16 */
17
18#define _GNU_SOURCE
19#include <stdint.h>
20#include <vnet/llc/llc.h>
21#include <vnet/snap/snap.h>
22#include <vnet/bonding/node.h>
23
24bond_main_t bond_main;
25
26#define foreach_bond_input_error \
27 _(NONE, "no error") \
28 _(IF_DOWN, "interface down") \
29 _(NO_SLAVE, "no slave") \
30 _(NO_BOND, "no bond interface")\
31 _(PASS_THRU, "pass through")
32
33typedef enum
34{
35#define _(f,s) BOND_INPUT_ERROR_##f,
36 foreach_bond_input_error
37#undef _
38 BOND_INPUT_N_ERROR,
39} bond_input_error_t;
40
41static char *bond_input_error_strings[] = {
42#define _(n,s) s,
43 foreach_bond_input_error
44#undef _
45};
46
47static u8 *
48format_bond_input_trace (u8 * s, va_list * args)
49{
50 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
51 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
52 bond_packet_trace_t *t = va_arg (*args, bond_packet_trace_t *);
53 vnet_hw_interface_t *hw, *hw1;
54 vnet_main_t *vnm = vnet_get_main ();
55
56 hw = vnet_get_sup_hw_interface (vnm, t->sw_if_index);
57 hw1 = vnet_get_sup_hw_interface (vnm, t->bond_sw_if_index);
58 s = format (s, "src %U, dst %U, %s -> %s",
59 format_ethernet_address, t->ethernet.src_address,
60 format_ethernet_address, t->ethernet.dst_address,
61 hw->name, hw1->name);
62
63 return s;
64}
65
66static_always_inline u8
67packet_is_cdp (ethernet_header_t * eth)
68{
69 llc_header_t *llc;
70 snap_header_t *snap;
71
72 llc = (llc_header_t *) (eth + 1);
73 snap = (snap_header_t *) (llc + 1);
74
75 return ((eth->type == htons (ETHERNET_TYPE_CDP)) ||
76 ((llc->src_sap == 0xAA) && (llc->control == 0x03) &&
77 (snap->protocol == htons (0x2000)) &&
78 (snap->oui[0] == 0) && (snap->oui[1] == 0) &&
79 (snap->oui[2] == 0x0C)));
80}
81
82static inline void
83bond_sw_if_index_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node,
84 slave_if_t * sif, ethernet_header_t * eth,
85 vlib_buffer_t * b0)
86{
87 bond_if_t *bif;
88 u16 thread_index = vlib_get_thread_index ();
89 u16 *ethertype_p, ethertype;
90 ethernet_vlan_header_t *vlan;
91
92 if (PREDICT_TRUE (sif != 0))
93 {
94 bif = bond_get_master_by_sw_if_index (sif->group);
95 if (PREDICT_TRUE (bif != 0))
96 {
97 if (PREDICT_TRUE (vec_len (bif->slaves) >= 1))
98 {
99 if (PREDICT_TRUE (bif->admin_up == 1))
100 {
101 if (!ethernet_frame_is_tagged (ntohs (eth->type)))
102 {
103 // Let some layer2 packets pass through.
104 if (PREDICT_TRUE ((eth->type !=
105 htons (ETHERNET_TYPE_SLOW_PROTOCOLS))
106 && !packet_is_cdp (eth)
107 && (eth->type !=
108 htons
109 (ETHERNET_TYPE_802_1_LLDP))))
110 {
111 // Change the physical interface to
112 // bond interface
113 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
114 bif->sw_if_index;
115
116 /* increase rx counters */
117 vlib_increment_simple_counter
118 (vnet_main.interface_main.sw_if_counters +
119 VNET_INTERFACE_COUNTER_RX, thread_index,
120 bif->sw_if_index, 1);
121 }
122 else
123 {
124 vlib_error_count (vm, node->node_index,
125 BOND_INPUT_ERROR_PASS_THRU, 1);
126 }
127 }
128 else
129 {
130 vlan = (void *) (eth + 1);
131 ethertype_p = &vlan->type;
132 if (*ethertype_p == ntohs (ETHERNET_TYPE_VLAN))
133 {
134 vlan++;
135 ethertype_p = &vlan->type;
136 }
137 ethertype = *ethertype_p;
138 if (PREDICT_TRUE ((ethertype !=
139 htons (ETHERNET_TYPE_SLOW_PROTOCOLS))
140 && (ethertype !=
141 htons (ETHERNET_TYPE_CDP))
142 && (ethertype !=
143 htons
144 (ETHERNET_TYPE_802_1_LLDP))))
145 {
146 // Change the physical interface to
147 // bond interface
148 vnet_buffer (b0)->sw_if_index[VLIB_RX] =
149 bif->sw_if_index;
150
151 /* increase rx counters */
152 vlib_increment_simple_counter
153 (vnet_main.interface_main.sw_if_counters +
154 VNET_INTERFACE_COUNTER_RX, thread_index,
155 bif->sw_if_index, 1);
156 }
157 else
158 {
159 vlib_error_count (vm, node->node_index,
160 BOND_INPUT_ERROR_PASS_THRU, 1);
161 }
162 }
163 }
164 else
165 {
166 vlib_error_count (vm, node->node_index,
167 BOND_INPUT_ERROR_IF_DOWN, 1);
168 }
169 }
170 else
171 {
172 vlib_error_count (vm, node->node_index,
173 BOND_INPUT_ERROR_NO_SLAVE, 1);
174 }
175 }
176 else
177 {
178 vlib_error_count (vm, node->node_index,
179 BOND_INPUT_ERROR_NO_BOND, 1);
180 }
181 }
182 else
183 {
184 vlib_error_count (vm, node->node_index, BOND_INPUT_ERROR_NO_SLAVE, 1);
185 }
186
187}
188
189static uword
190bond_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
191 vlib_frame_t * frame)
192{
193 u32 bi0, bi1, bi2, bi3;
194 vlib_buffer_t *b0, *b1, *b2, *b3;
195 u32 next_index;
196 u32 *from, *to_next, n_left_from, n_left_to_next;
197 ethernet_header_t *eth, *eth1, *eth2, *eth3;
198 u32 next0, next1, next2, next3;
199 bond_packet_trace_t *t0;
200 uword n_trace = vlib_get_trace_count (vm, node);
201 u32 sw_if_index, sw_if_index1, sw_if_index2, sw_if_index3;
202 slave_if_t *sif, *sif1, *sif2, *sif3;
203 u16 thread_index = vlib_get_thread_index ();
204
205 /* Vector of buffer / pkt indices we're supposed to process */
206 from = vlib_frame_vector_args (frame);
207
208 /* Number of buffers / pkts */
209 n_left_from = frame->n_vectors;
210
211 /* Speculatively send the first buffer to the last disposition we used */
212 next_index = node->cached_next_index;
213
214 while (n_left_from > 0)
215 {
216 /* set up to enqueue to our disposition with index = next_index */
217 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
218
219 while (n_left_from >= 12 && n_left_to_next >= 4)
220 {
221 // Prefetch next iteration
222 {
223 vlib_buffer_t *b4, *b5, *b6, *b7;
224
225 b4 = vlib_get_buffer (vm, from[4]);
226 b5 = vlib_get_buffer (vm, from[5]);
227 b6 = vlib_get_buffer (vm, from[6]);
228 b7 = vlib_get_buffer (vm, from[7]);
229
230 vlib_prefetch_buffer_header (b4, STORE);
231 vlib_prefetch_buffer_header (b5, STORE);
232 vlib_prefetch_buffer_header (b6, STORE);
233 vlib_prefetch_buffer_header (b7, STORE);
234
235 CLIB_PREFETCH (b4->data, CLIB_CACHE_LINE_BYTES, LOAD);
236 CLIB_PREFETCH (b5->data, CLIB_CACHE_LINE_BYTES, LOAD);
237 CLIB_PREFETCH (b6->data, CLIB_CACHE_LINE_BYTES, LOAD);
238 CLIB_PREFETCH (b7->data, CLIB_CACHE_LINE_BYTES, LOAD);
239 }
240
241 next0 = 0;
242 next1 = 0;
243 next2 = 0;
244 next3 = 0;
245
246 bi0 = from[0];
247 bi1 = from[1];
248 bi2 = from[2];
249 bi3 = from[3];
250
251 to_next[0] = bi0;
252 to_next[1] = bi1;
253 to_next[2] = bi2;
254 to_next[3] = bi3;
255
256 from += 4;
257 to_next += 4;
258 n_left_from -= 4;
259 n_left_to_next -= 4;
260
261 b0 = vlib_get_buffer (vm, bi0);
262 b1 = vlib_get_buffer (vm, bi1);
263 b2 = vlib_get_buffer (vm, bi2);
264 b3 = vlib_get_buffer (vm, bi3);
265
266 vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_RX], &next0,
267 b0);
268 vnet_feature_next (vnet_buffer (b1)->sw_if_index[VLIB_RX], &next1,
269 b1);
270 vnet_feature_next (vnet_buffer (b2)->sw_if_index[VLIB_RX], &next2,
271 b2);
272 vnet_feature_next (vnet_buffer (b3)->sw_if_index[VLIB_RX], &next3,
273 b3);
274
275 eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
276 eth1 = (ethernet_header_t *) vlib_buffer_get_current (b1);
277 eth2 = (ethernet_header_t *) vlib_buffer_get_current (b2);
278 eth3 = (ethernet_header_t *) vlib_buffer_get_current (b3);
279
280 sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
281 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
282 sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX];
283 sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX];
284
285 // sw_if_index points to the physical interface
286 sif = bond_get_slave_by_sw_if_index (sw_if_index);
287 sif1 = bond_get_slave_by_sw_if_index (sw_if_index1);
288 sif2 = bond_get_slave_by_sw_if_index (sw_if_index2);
289 sif3 = bond_get_slave_by_sw_if_index (sw_if_index3);
290
291 bond_sw_if_index_rewrite (vm, node, sif, eth, b0);
292 bond_sw_if_index_rewrite (vm, node, sif1, eth1, b1);
293 bond_sw_if_index_rewrite (vm, node, sif2, eth2, b2);
294 bond_sw_if_index_rewrite (vm, node, sif3, eth3, b3);
295
296 if (PREDICT_FALSE (n_trace > 0))
297 {
298 vlib_trace_buffer (vm, node, next0, b0, 0 /* follow_chain */ );
299 vlib_set_trace_count (vm, node, --n_trace);
300 t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
301 t0->ethernet = *eth;
302 t0->sw_if_index = sw_if_index;
303 t0->bond_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
304
305 if (PREDICT_TRUE (n_trace > 0))
306 {
307 vlib_trace_buffer (vm, node, next1, b1,
308 0 /* follow_chain */ );
309 vlib_set_trace_count (vm, node, --n_trace);
310 t0 = vlib_add_trace (vm, node, b1, sizeof (*t0));
311 t0->ethernet = *eth1;
312 t0->sw_if_index = sw_if_index1;
313 t0->bond_sw_if_index =
314 vnet_buffer (b1)->sw_if_index[VLIB_RX];
315
316 if (PREDICT_TRUE (n_trace > 0))
317 {
318 vlib_trace_buffer (vm, node, next1, b2,
319 0 /* follow_chain */ );
320 vlib_set_trace_count (vm, node, --n_trace);
321 t0 = vlib_add_trace (vm, node, b2, sizeof (*t0));
322 t0->ethernet = *eth2;
323 t0->sw_if_index = sw_if_index2;
324 t0->bond_sw_if_index =
325 vnet_buffer (b2)->sw_if_index[VLIB_RX];
326
327 if (PREDICT_TRUE (n_trace > 0))
328 {
329 vlib_trace_buffer (vm, node, next1, b2,
330 0 /* follow_chain */ );
331 vlib_set_trace_count (vm, node, --n_trace);
332 t0 = vlib_add_trace (vm, node, b3, sizeof (*t0));
333 t0->ethernet = *eth3;
334 t0->sw_if_index = sw_if_index3;
335 t0->bond_sw_if_index =
336 vnet_buffer (b3)->sw_if_index[VLIB_RX];
337 }
338 }
339 }
340 }
341
342 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
343 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1);
344 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2);
345 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3);
346
347 /* verify speculative enqueue, maybe switch current next frame */
348 vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
349 to_next, n_left_to_next,
350 bi0, bi1, bi2, bi3, next0, next1,
351 next2, next3);
352 }
353
354 while (n_left_from > 0 && n_left_to_next > 0)
355 {
356 // Prefetch next iteration
357 if (n_left_from > 1)
358 {
359 vlib_buffer_t *p2;
360
361 p2 = vlib_get_buffer (vm, from[1]);
362 vlib_prefetch_buffer_header (p2, STORE);
363 CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, LOAD);
364 }
365
366 next0 = 0;
367 bi0 = from[0];
368 to_next[0] = bi0;
369 from += 1;
370 to_next += 1;
371 n_left_from -= 1;
372 n_left_to_next -= 1;
373
374 b0 = vlib_get_buffer (vm, bi0);
375 vnet_feature_next (vnet_buffer (b0)->sw_if_index[VLIB_RX], &next0,
376 b0);
377
378 eth = (ethernet_header_t *) vlib_buffer_get_current (b0);
379
380 sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
381 // sw_if_index points to the physical interface
382 sif = bond_get_slave_by_sw_if_index (sw_if_index);
383 bond_sw_if_index_rewrite (vm, node, sif, eth, b0);
384
385 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
386
387 /* verify speculative enqueue, maybe switch current next frame */
388 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
389 to_next, n_left_to_next,
390 bi0, next0);
391 }
392 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
393 }
394
395 vlib_node_increment_counter (vm, bond_input_node.index,
396 BOND_INPUT_ERROR_NONE, frame->n_vectors);
397
398 vnet_device_increment_rx_packets (thread_index, frame->n_vectors);
399
400 return frame->n_vectors;
401}
402
403static clib_error_t *
404bond_input_init (vlib_main_t * vm)
405{
406 return 0;
407}
408
409/* *INDENT-OFF* */
410VLIB_REGISTER_NODE (bond_input_node) = {
411 .function = bond_input_fn,
412 .name = "bond-input",
413 .vector_size = sizeof (u32),
414 .format_buffer = format_ethernet_header_with_length,
415 .format_trace = format_bond_input_trace,
416 .type = VLIB_NODE_TYPE_INTERNAL,
417 .n_errors = BOND_INPUT_N_ERROR,
418 .error_strings = bond_input_error_strings,
419 .n_next_nodes = 0,
420 .next_nodes =
421 {
422 [0] = "error-drop"
423 }
424};
425
426VLIB_INIT_FUNCTION (bond_input_init);
427
428VNET_FEATURE_INIT (bond_input, static) =
429{
430 .arc_name = "device-input",
431 .node_name = "bond-input",
432 .runs_before = VNET_FEATURES ("ethernet-input"),
433};
434VLIB_NODE_FUNCTION_MULTIARCH (bond_input_node, bond_input_fn)
435/* *INDENT-ON* */
436
437static clib_error_t *
438bond_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
439{
440 bond_main_t *bm = &bond_main;
441 slave_if_t *sif;
442 vlib_main_t *vm = bm->vlib_main;
443
444 sif = bond_get_slave_by_sw_if_index (sw_if_index);
445 if (sif)
446 {
447 sif->port_enabled = flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP;
448 if (sif->port_enabled == 0)
449 {
450 if (sif->lacp_enabled == 0)
451 {
452 bond_disable_collecting_distributing (vm, sif);
453 }
454 }
455 else
456 {
457 if (sif->lacp_enabled == 0)
458 {
459 bond_enable_collecting_distributing (vm, sif);
460 }
461 }
462 }
463
464 return 0;
465}
466
467VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bond_sw_interface_up_down);
468
469static clib_error_t *
470bond_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
471{
472 bond_main_t *bm = &bond_main;
473 slave_if_t *sif;
474 vnet_sw_interface_t *sw;
475 vlib_main_t *vm = bm->vlib_main;
476 vnet_interface_main_t *im = &vnm->interface_main;
477
478 sw = pool_elt_at_index (im->sw_interfaces, hw_if_index);
479 sif = bond_get_slave_by_sw_if_index (sw->sw_if_index);
480 if (sif)
481 {
482 if (!(flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
483 {
484 if (sif->lacp_enabled == 0)
485 {
486 bond_disable_collecting_distributing (vm, sif);
487 }
488 }
489 else
490 {
491 if (sif->lacp_enabled == 0)
492 {
493 bond_enable_collecting_distributing (vm, sif);
494 }
495 }
496 }
497
498 return 0;
499}
500
501VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (bond_hw_interface_up_down);
502
503/*
504 * fd.io coding-style-patch-verification: ON
505 *
506 * Local Variables:
507 * eval: (c-set-style "gnu")
508 * End:
509 */