blob: 843c236f1235067df82f06068de030ce0dec0a6b [file] [log] [blame]
Steven9cd2d7a2017-12-20 12:43:01 -08001/*
2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#ifndef __included_vnet_bonding_node_h__
16#define __included_vnet_bonding_node_h__
17
18#include <vlib/vlib.h>
19#include <vlib/unix/unix.h>
20#include <vppinfra/format.h>
21#include <vppinfra/hash.h>
22#include <vnet/ethernet/ethernet.h>
23#include <vnet/interface.h>
24
25#define LACP_FAST_PERIODIC_TIMER 1.0
26#define LACP_SHORT_TIMOUT_TIME (LACP_FAST_PERIODIC_TIMER * 3)
27#define LACP_SLOW_PERIODIC_TIMER 30.0
28#define LACP_LONG_TIMOUT_TIME (LACP_SLOW_PERIODIC_TIMER * 3)
29
30#ifndef MIN
31#define MIN(x,y) (((x)<(y))?(x):(y))
32#endif
33
Steven0d883012018-05-11 11:06:23 -070034#define BOND_MODULO_SHORTCUT(a) \
Damjan Marion69fdfee2018-10-06 14:33:18 +020035 (is_pow2 (a))
Steven0d883012018-05-11 11:06:23 -070036
Steven9cd2d7a2017-12-20 12:43:01 -080037#define foreach_bond_mode \
38 _ (1, ROUND_ROBIN, "round-robin") \
39 _ (2, ACTIVE_BACKUP, "active-backup") \
40 _ (3, XOR, "xor") \
41 _ (4, BROADCAST, "broadcast") \
42 _ (5, LACP, "lacp")
43
44typedef enum
45{
46#define _(v, f, s) BOND_MODE_##f = v,
47 foreach_bond_mode
48#undef _
49} bond_mode_t;
50
51/* configurable load-balances */
52#define foreach_bond_lb \
53 _ (2, L23, "l23", l23) \
Damjan Marion16de39e2018-09-26 10:15:41 +020054 _ (1, L34 , "l34", l34) \
Steven9cd2d7a2017-12-20 12:43:01 -080055 _ (0, L2, "l2", l2)
56
57/* load-balance functions implemented in bond-output */
58#define foreach_bond_lb_algo \
59 _ (0, L2, "l2", l2) \
Damjan Marion16de39e2018-09-26 10:15:41 +020060 _ (1, L34 , "l34", l34) \
Steven9cd2d7a2017-12-20 12:43:01 -080061 _ (2, L23, "l23", l23) \
62 _ (3, RR, "round-robin", round_robin) \
63 _ (4, BC, "broadcast", broadcast) \
64 _ (5, AB, "active-backup", active_backup)
65
66typedef enum
67{
68#define _(v, f, s, p) BOND_LB_##f = v,
69 foreach_bond_lb_algo
70#undef _
71} bond_load_balance_t;
72
BenoƮt Ganne47727c02019-02-12 13:35:08 +010073typedef enum
Steven9f781d82018-06-05 11:09:32 -070074{
75 BOND_SEND_GARP_NA = 1,
76} bond_send_garp_na_process_event_t;
77
Steven9cd2d7a2017-12-20 12:43:01 -080078typedef struct
79{
Alexander Chernavinad9d5282018-12-13 09:08:09 -050080 u32 id;
Steven9cd2d7a2017-12-20 12:43:01 -080081 u8 hw_addr_set;
82 u8 hw_addr[6];
83 u8 mode;
84 u8 lb;
Zhiyong Yang751e3f32019-06-26 05:49:14 -040085 u8 numa_only;
Steven Luong2e1fa542020-01-06 15:14:46 -080086 u8 gso;
Steven9cd2d7a2017-12-20 12:43:01 -080087 /* return */
88 u32 sw_if_index;
89 int rv;
90 clib_error_t *error;
91} bond_create_if_args_t;
92
93typedef struct
94{
Steven Luong4c4223e2020-07-15 08:44:54 -070095 /* member's sw_if_index */
96 u32 member;
Steven9cd2d7a2017-12-20 12:43:01 -080097 /* bond's sw_if_index */
98 u32 group;
99 u8 is_passive;
100 u8 is_long_timeout;
101 /* return */
102 int rv;
103 clib_error_t *error;
Steven Luong4c4223e2020-07-15 08:44:54 -0700104} bond_add_member_args_t;
Steven9cd2d7a2017-12-20 12:43:01 -0800105
106typedef struct
107{
Steven Luong4c4223e2020-07-15 08:44:54 -0700108 u32 member;
Steven9cd2d7a2017-12-20 12:43:01 -0800109 /* return */
110 int rv;
111 clib_error_t *error;
Steven Luong4c4223e2020-07-15 08:44:54 -0700112} bond_detach_member_args_t;
Steven9cd2d7a2017-12-20 12:43:01 -0800113
Steven Luonga1876b82019-08-20 16:58:00 -0700114typedef struct
115{
116 u32 sw_if_index;
117 u32 weight;
118 /* return */
119 int rv;
120 clib_error_t *error;
121} bond_set_intf_weight_args_t;
122
Steven9cd2d7a2017-12-20 12:43:01 -0800123/** BOND interface details struct */
124typedef struct
125{
126 u32 sw_if_index;
Alexander Chernavinad9d5282018-12-13 09:08:09 -0500127 u32 id;
Steven9cd2d7a2017-12-20 12:43:01 -0800128 u8 interface_name[64];
Steven Luong4c4223e2020-07-15 08:44:54 -0700129 u32 mode;
130 u32 lb;
Zhiyong Yang751e3f32019-06-26 05:49:14 -0400131 u8 numa_only;
Steven Luong4c4223e2020-07-15 08:44:54 -0700132 u32 active_members;
133 u32 members;
Steven9cd2d7a2017-12-20 12:43:01 -0800134} bond_interface_details_t;
135
Steven Luong4c4223e2020-07-15 08:44:54 -0700136/** member interface details struct */
Steven9cd2d7a2017-12-20 12:43:01 -0800137typedef struct
138{
139 u32 sw_if_index;
140 u8 interface_name[64];
141 u8 is_passive;
142 u8 is_long_timeout;
Steven Luonga1876b82019-08-20 16:58:00 -0700143 u8 is_local_numa;
144 u32 weight;
Steven Luong4c4223e2020-07-15 08:44:54 -0700145 u32 active_members;
146} member_interface_details_t;
Steven9cd2d7a2017-12-20 12:43:01 -0800147
148typedef CLIB_PACKED (struct
149 {
150 u16 system_priority;
151 u8 system[6];
152 u16 key; u16 port_priority; u16 port_number;
153 u8 state;
154 }) lacp_port_info_t;
155
156typedef struct
157{
Stevenc4e99c52018-09-27 20:06:26 -0700158 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
159 u32 buffers[VLIB_FRAME_SIZE];
160 u32 n_buffers;
161} bond_per_port_queue_t;
Stevena005e7f2018-03-22 17:46:58 -0700162
Stevenc4e99c52018-09-27 20:06:26 -0700163typedef struct
164{
165 bond_per_port_queue_t *per_port_queue;
166} bond_per_thread_data_t;
Stevena005e7f2018-03-22 17:46:58 -0700167
168typedef struct
169{
Steven9cd2d7a2017-12-20 12:43:01 -0800170 u8 admin_up;
171 u8 mode;
172 u8 lb;
173
Steven Luong4c4223e2020-07-15 08:44:54 -0700174 /* the last member index for the rr lb */
Steven9cd2d7a2017-12-20 12:43:01 -0800175 u32 lb_rr_last_index;
176
Alexander Chernavinad9d5282018-12-13 09:08:09 -0500177 /* Real device instance in interface vector */
Steven9cd2d7a2017-12-20 12:43:01 -0800178 u32 dev_instance;
Alexander Chernavinad9d5282018-12-13 09:08:09 -0500179
180 /* Interface ID being shown to user */
181 u32 id;
182
Steven9cd2d7a2017-12-20 12:43:01 -0800183 u32 hw_if_index;
184 u32 sw_if_index;
185
Steven Luong4c4223e2020-07-15 08:44:54 -0700186 /* Configured members */
187 u32 *members;
Steven9cd2d7a2017-12-20 12:43:01 -0800188
Steven Luong4c4223e2020-07-15 08:44:54 -0700189 /* Members that are in DISTRIBUTING state */
190 u32 *active_members;
Steven9cd2d7a2017-12-20 12:43:01 -0800191
Steven9cd2d7a2017-12-20 12:43:01 -0800192 lacp_port_info_t partner;
193 lacp_port_info_t actor;
194 u8 individual_aggregator;
195
Steven Luong4c4223e2020-07-15 08:44:54 -0700196 /* If the flag numa_only is set, it means that only members
Zhiyong Yang751e3f32019-06-26 05:49:14 -0400197 on local numa node works for lacp mode if have at least one,
198 otherwise it works as usual. */
199 u8 numa_only;
Steven Luong2e1fa542020-01-06 15:14:46 -0800200 u8 gso;
Zhiyong Yang751e3f32019-06-26 05:49:14 -0400201
Steven Luong4c4223e2020-07-15 08:44:54 -0700202 /* How many members on local numa node are there in lacp mode? */
203 word n_numa_members;
Zhiyong Yang751e3f32019-06-26 05:49:14 -0400204
Steven9cd2d7a2017-12-20 12:43:01 -0800205 u32 group;
206 uword *port_number_bitmap;
207 u8 use_custom_mac;
208 u8 hw_address[6];
Stevena005e7f2018-03-22 17:46:58 -0700209
210 clib_spinlock_t lockp;
Steven9cd2d7a2017-12-20 12:43:01 -0800211} bond_if_t;
212
213typedef struct
214{
215 u8 persistent_hw_address[6];
216
217 /* neighbor's vlib software interface index */
218 u32 sw_if_index;
219
220 /* Neighbor time-to-live (usually 3s) */
221 f32 ttl_in_seconds;
222
223 /* 1 = interface is configured with long timeout (60s) */
224 u8 is_long_timeout;
225
226 /* 1 = debug is on; 0 = debug is off */
227 u8 debug;
228
229 /* tx packet template id for this neighbor */
230 u8 packet_template_index;
231
232 /* Info we actually keep about each neighbor */
233
234 /* Jenkins hash optimization: avoid tlv scan, send short keepalive msg */
235 u8 last_packet_signature_valid;
236 uword last_packet_signature;
237
238 /* last received lacp packet, for the J-hash optimization */
239 u8 *last_rx_pkt;
240
241 /* last marker packet */
242 u8 *last_marker_pkt;
243
244 /* neighbor vlib hw_if_index */
245 u32 hw_if_index;
246
Steven Luonga1876b82019-08-20 16:58:00 -0700247 /* weight -- valid only for active backup */
248 u32 weight;
249
Steven9cd2d7a2017-12-20 12:43:01 -0800250 /* actor does not initiate the protocol exchange */
251 u8 is_passive;
252
253 /* Partner port information */
254 lacp_port_info_t partner;
255 lacp_port_info_t partner_admin;;
256
Zhiyong Yang52c5f262019-06-13 21:14:33 -0400257 /* Actor port information */
Steven9cd2d7a2017-12-20 12:43:01 -0800258 lacp_port_info_t actor;
259 lacp_port_info_t actor_admin;
260
261 /* Need To Transmit flag */
262 u8 ntt;
263
264 /* Link has been established and Aggregate Port is operable */
265 u8 port_enabled;
266
267 /* Initialization or reinitialization of the lacp protocol entity */
268 u8 begin;
269
270 /* Aggregation Port is operating the lacp */
271 u8 lacp_enabled;
272
273 /* MUX to indicate to the Selection Logic wait_while_timer expired */
274 u8 ready_n;
275
276 /* Selection Logic indicates al Aggregation Ports attached */
277 u8 ready;
278
279 /* Selection Logic selected an Aggregator */
280 int selected;
281
282 /* RX machine indicates an Aggregation Port in PORT_DISABLED state */
283 u8 port_moved;
284
285 /* timer used to detect whether received protocol information has expired */
286 f64 current_while_timer;
287
288 /* timer used to detect actor churn states */
289 f64 actor_churn_timer;
290
291 /* time last lacpdu was sent */
Steven Luong82c5dda2019-03-05 09:38:33 -0800292 f64 last_lacpdu_sent_time;
293
294 /* time last lacpdu was received */
295 f64 last_lacpdu_recd_time;
296
297 /* time last marker pdu was sent */
298 f64 last_marker_pdu_sent_time;
299
300 /* time last marker pdu was received */
301 f64 last_marker_pdu_recd_time;
Steven9cd2d7a2017-12-20 12:43:01 -0800302
303 /* timer used to generate periodic transmission */
304 f64 periodic_timer;
305
306 /* timer used to detect partner churn states */
307 f64 partner_churn_timer;
308
309 /* provides hysteresis before performing an aggregation change */
310 f64 wait_while_timer;
311
312 /* Implemention variables, not in the spec */
313 int rx_state;
314 int tx_state;
315 int mux_state;
316 int ptx_state;
317
318 /* actor admin key */
319 u32 group;
320
321 u32 marker_tx_id;
322
323 u32 bif_dev_instance;
324
325 u8 loopback_port;
326
327 /* bond mode */
328 u8 mode;
Steven Luong82c5dda2019-03-05 09:38:33 -0800329
330 /* good lacp pdu received */
331 u64 pdu_received;
332
333 /* bad lacp pdu received */
334 u64 bad_pdu_received;
335
336 /* pdu sent */
337 u64 pdu_sent;
338
339 /* good marker pdu received */
340 u64 marker_pdu_received;
341
342 /* bad marker pdu received */
343 u64 marker_bad_pdu_received;
344
345 /* pdu sent */
346 u64 marker_pdu_sent;
Steven Luonga1876b82019-08-20 16:58:00 -0700347
Steven Luong4c4223e2020-07-15 08:44:54 -0700348 /* member is numa node */
Steven Luonga1876b82019-08-20 16:58:00 -0700349 u8 is_local_numa;
Steven Luong4c4223e2020-07-15 08:44:54 -0700350} member_if_t;
Steven9cd2d7a2017-12-20 12:43:01 -0800351
352typedef void (*lacp_enable_disable_func) (vlib_main_t * vm, bond_if_t * bif,
Steven Luong4c4223e2020-07-15 08:44:54 -0700353 member_if_t * mif, u8 enable);
Steven9cd2d7a2017-12-20 12:43:01 -0800354
355typedef struct
356{
Steven Luongaa725782019-11-12 19:45:49 -0800357 u32 partner_state;
358 u32 actor_state;
359} lacp_stats_t;
360
361typedef struct
362{
Steven9cd2d7a2017-12-20 12:43:01 -0800363 /* pool of bonding interfaces */
364 bond_if_t *interfaces;
365
Alexander Chernavinad9d5282018-12-13 09:08:09 -0500366 /* record used interface IDs */
367 uword *id_used;
368
Steven Luong4c4223e2020-07-15 08:44:54 -0700369 /* pool of member interfaces */
370 member_if_t *neighbors;
Steven9cd2d7a2017-12-20 12:43:01 -0800371
Steven9cd2d7a2017-12-20 12:43:01 -0800372 /* rapidly find a bond by vlib software interface index */
373 uword *bond_by_sw_if_index;
374
375 /* convenience variables */
376 vlib_main_t *vlib_main;
377 vnet_main_t *vnet_main;
378
379 /* lacp plugin is loaded */
380 u8 lacp_plugin_loaded;
381
382 lacp_enable_disable_func lacp_enable_disable;
Steven0d883012018-05-11 11:06:23 -0700383
Steven Luong4c4223e2020-07-15 08:44:54 -0700384 uword *member_by_sw_if_index;
Stevenc4e99c52018-09-27 20:06:26 -0700385
386 bond_per_thread_data_t *per_thread_data;
Steven Luong0f09a822019-08-07 12:20:22 -0700387
Steven Luongaa725782019-11-12 19:45:49 -0800388 lacp_stats_t **stats;
Steven9cd2d7a2017-12-20 12:43:01 -0800389} bond_main_t;
390
391/* bond packet trace capture */
392typedef struct
393{
394 ethernet_header_t ethernet;
395 u32 sw_if_index;
396 u32 bond_sw_if_index;
397} bond_packet_trace_t;
398
399typedef u32 (*load_balance_func) (vlib_main_t * vm,
400 vlib_node_runtime_t * node, bond_if_t * bif,
Steven Luong4c4223e2020-07-15 08:44:54 -0700401 vlib_buffer_t * b0, uword member_count);
Steven9cd2d7a2017-12-20 12:43:01 -0800402
403typedef struct
404{
405 load_balance_func load_balance;
406} bond_load_balance_func_t;
407
408extern vlib_node_registration_t bond_input_node;
Steven9f781d82018-06-05 11:09:32 -0700409extern vlib_node_registration_t bond_process_node;
Steven9cd2d7a2017-12-20 12:43:01 -0800410extern vnet_device_class_t bond_dev_class;
411extern bond_main_t bond_main;
412
413void bond_disable_collecting_distributing (vlib_main_t * vm,
Steven Luong4c4223e2020-07-15 08:44:54 -0700414 member_if_t * mif);
415void bond_enable_collecting_distributing (vlib_main_t * vm,
416 member_if_t * mif);
Steven9cd2d7a2017-12-20 12:43:01 -0800417u8 *format_bond_interface_name (u8 * s, va_list * args);
418
Steven Luonga1876b82019-08-20 16:58:00 -0700419void bond_set_intf_weight (vlib_main_t * vm,
420 bond_set_intf_weight_args_t * args);
Steven9cd2d7a2017-12-20 12:43:01 -0800421void bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args);
422int bond_delete_if (vlib_main_t * vm, u32 sw_if_index);
Steven Luong4c4223e2020-07-15 08:44:54 -0700423void bond_add_member (vlib_main_t * vm, bond_add_member_args_t * args);
424void bond_detach_member (vlib_main_t * vm, bond_detach_member_args_t * args);
Steven9cd2d7a2017-12-20 12:43:01 -0800425int bond_dump_ifs (bond_interface_details_t ** out_bondids);
Steven Luong4c4223e2020-07-15 08:44:54 -0700426int bond_dump_member_ifs (member_interface_details_t ** out_memberids,
427 u32 bond_sw_if_index);
Steven9cd2d7a2017-12-20 12:43:01 -0800428
429static inline uword
430unformat_bond_mode (unformat_input_t * input, va_list * args)
431{
432 u8 *r = va_arg (*args, u8 *);
433
434 if (0);
435#define _(v, f, s) else if (unformat (input, s)) *r = BOND_MODE_##f;
436 foreach_bond_mode
437#undef _
438 else
439 return 0;
440
441 return 1;
442}
443
444static inline u8 *
445format_bond_mode (u8 * s, va_list * args)
446{
447 u32 i = va_arg (*args, u32);
448 u8 *t = 0;
449
450 switch (i)
451 {
452#define _(v, f, s) case BOND_MODE_##f: t = (u8 *) s; break;
453 foreach_bond_mode
454#undef _
455 default:
456 return format (s, "unknown");
457 }
458 return format (s, "%s", t);
459}
460
461static inline uword
462unformat_bond_load_balance (unformat_input_t * input, va_list * args)
463{
464 u8 *r = va_arg (*args, u8 *);
465
466 if (0);
467#define _(v, f, s, p) else if (unformat (input, s)) *r = BOND_LB_##f;
468 foreach_bond_lb
469#undef _
470 else
471 return 0;
472
473 return 1;
474}
475
476static inline u8 *
477format_bond_load_balance (u8 * s, va_list * args)
478{
479 u32 i = va_arg (*args, u32);
480 u8 *t = 0;
481
482 switch (i)
483 {
484#define _(v, f, s, p) case BOND_LB_##f: t = (u8 *) s; break;
485 foreach_bond_lb_algo
486#undef _
487 default:
488 return format (s, "unknown");
489 }
490 return format (s, "%s", t);
491}
492
493static inline void
494bond_register_callback (lacp_enable_disable_func func)
495{
496 bond_main_t *bm = &bond_main;
497
498 bm->lacp_plugin_loaded = 1;
499 bm->lacp_enable_disable = func;
500}
501
502static inline bond_if_t *
Steven Luong4c4223e2020-07-15 08:44:54 -0700503bond_get_bond_if_by_sw_if_index (u32 sw_if_index)
Steven9cd2d7a2017-12-20 12:43:01 -0800504{
505 bond_main_t *bm = &bond_main;
506 uword *p;
507
508 p = hash_get (bm->bond_by_sw_if_index, sw_if_index);
509 if (!p)
510 {
511 return 0;
512 }
513 return pool_elt_at_index (bm->interfaces, p[0]);
514}
515
516static inline bond_if_t *
Steven Luong4c4223e2020-07-15 08:44:54 -0700517bond_get_bond_if_by_dev_instance (u32 dev_instance)
Steven9cd2d7a2017-12-20 12:43:01 -0800518{
519 bond_main_t *bm = &bond_main;
520
521 return pool_elt_at_index (bm->interfaces, dev_instance);
522}
523
Steven Luong4c4223e2020-07-15 08:44:54 -0700524static inline member_if_t *
525bond_get_member_by_sw_if_index (u32 sw_if_index)
Steven9cd2d7a2017-12-20 12:43:01 -0800526{
527 bond_main_t *bm = &bond_main;
Steven Luong4c4223e2020-07-15 08:44:54 -0700528 member_if_t *mif = 0;
Steven0d883012018-05-11 11:06:23 -0700529 uword p;
Steven9cd2d7a2017-12-20 12:43:01 -0800530
Steven Luong4c4223e2020-07-15 08:44:54 -0700531 if (sw_if_index < vec_len (bm->member_by_sw_if_index))
Steven9cd2d7a2017-12-20 12:43:01 -0800532 {
Steven Luong4c4223e2020-07-15 08:44:54 -0700533 p = bm->member_by_sw_if_index[sw_if_index];
Steven0d883012018-05-11 11:06:23 -0700534 if (p)
Steven Luong4c4223e2020-07-15 08:44:54 -0700535 mif = pool_elt_at_index (bm->neighbors, p >> 1);
Steven9cd2d7a2017-12-20 12:43:01 -0800536 }
Steven0d883012018-05-11 11:06:23 -0700537
Steven Luong4c4223e2020-07-15 08:44:54 -0700538 return mif;
Steven9cd2d7a2017-12-20 12:43:01 -0800539}
540
541#endif /* __included_vnet_bonding_node_h__ */
542
543/*
544 * fd.io coding-style-patch-verification: ON
545 *
546 * Local Variables:
547 * eval: (c-set-style "gnu")
548 * End:
549 */