blob: 94b43dcd1d9ce06fecabc8ff2801a1c967fe6944 [file] [log] [blame]
Steven9cd2d7a2017-12-20 12:43:01 -08001/*
2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#ifndef __included_vnet_bonding_node_h__
16#define __included_vnet_bonding_node_h__
17
18#include <vlib/vlib.h>
19#include <vlib/unix/unix.h>
20#include <vppinfra/format.h>
21#include <vppinfra/hash.h>
22#include <vnet/ethernet/ethernet.h>
23#include <vnet/interface.h>
24
25#define LACP_FAST_PERIODIC_TIMER 1.0
26#define LACP_SHORT_TIMOUT_TIME (LACP_FAST_PERIODIC_TIMER * 3)
27#define LACP_SLOW_PERIODIC_TIMER 30.0
28#define LACP_LONG_TIMOUT_TIME (LACP_SLOW_PERIODIC_TIMER * 3)
29
30#ifndef MIN
31#define MIN(x,y) (((x)<(y))?(x):(y))
32#endif
33
Steven0d883012018-05-11 11:06:23 -070034#define BOND_MODULO_SHORTCUT(a) \
Damjan Marion69fdfee2018-10-06 14:33:18 +020035 (is_pow2 (a))
Steven0d883012018-05-11 11:06:23 -070036
Steven9cd2d7a2017-12-20 12:43:01 -080037#define foreach_bond_mode \
38 _ (1, ROUND_ROBIN, "round-robin") \
39 _ (2, ACTIVE_BACKUP, "active-backup") \
40 _ (3, XOR, "xor") \
41 _ (4, BROADCAST, "broadcast") \
42 _ (5, LACP, "lacp")
43
44typedef enum
45{
46#define _(v, f, s) BOND_MODE_##f = v,
47 foreach_bond_mode
48#undef _
49} bond_mode_t;
50
51/* configurable load-balances */
52#define foreach_bond_lb \
53 _ (2, L23, "l23", l23) \
Damjan Marion16de39e2018-09-26 10:15:41 +020054 _ (1, L34 , "l34", l34) \
Steven9cd2d7a2017-12-20 12:43:01 -080055 _ (0, L2, "l2", l2)
56
57/* load-balance functions implemented in bond-output */
58#define foreach_bond_lb_algo \
59 _ (0, L2, "l2", l2) \
Damjan Marion16de39e2018-09-26 10:15:41 +020060 _ (1, L34 , "l34", l34) \
Steven9cd2d7a2017-12-20 12:43:01 -080061 _ (2, L23, "l23", l23) \
62 _ (3, RR, "round-robin", round_robin) \
63 _ (4, BC, "broadcast", broadcast) \
64 _ (5, AB, "active-backup", active_backup)
65
66typedef enum
67{
68#define _(v, f, s, p) BOND_LB_##f = v,
69 foreach_bond_lb_algo
70#undef _
71} bond_load_balance_t;
72
BenoƮt Ganne47727c02019-02-12 13:35:08 +010073typedef enum
Steven9f781d82018-06-05 11:09:32 -070074{
75 BOND_SEND_GARP_NA = 1,
76} bond_send_garp_na_process_event_t;
77
Steven9cd2d7a2017-12-20 12:43:01 -080078typedef struct
79{
Alexander Chernavinad9d5282018-12-13 09:08:09 -050080 u32 id;
Steven9cd2d7a2017-12-20 12:43:01 -080081 u8 hw_addr_set;
82 u8 hw_addr[6];
83 u8 mode;
84 u8 lb;
Zhiyong Yang751e3f32019-06-26 05:49:14 -040085 u8 numa_only;
Steven9cd2d7a2017-12-20 12:43:01 -080086 /* return */
87 u32 sw_if_index;
88 int rv;
89 clib_error_t *error;
90} bond_create_if_args_t;
91
92typedef struct
93{
94 /* slave's sw_if_index */
95 u32 slave;
96 /* bond's sw_if_index */
97 u32 group;
98 u8 is_passive;
99 u8 is_long_timeout;
100 /* return */
101 int rv;
102 clib_error_t *error;
103} bond_enslave_args_t;
104
105typedef struct
106{
107 u32 slave;
108 /* return */
109 int rv;
110 clib_error_t *error;
111} bond_detach_slave_args_t;
112
113/** BOND interface details struct */
114typedef struct
115{
116 u32 sw_if_index;
Alexander Chernavinad9d5282018-12-13 09:08:09 -0500117 u32 id;
Steven9cd2d7a2017-12-20 12:43:01 -0800118 u8 interface_name[64];
119 u8 mode;
120 u8 lb;
Zhiyong Yang751e3f32019-06-26 05:49:14 -0400121 u8 numa_only;
Steven9cd2d7a2017-12-20 12:43:01 -0800122 u32 active_slaves;
123 u32 slaves;
124} bond_interface_details_t;
125
126/** slave interface details struct */
127typedef struct
128{
129 u32 sw_if_index;
130 u8 interface_name[64];
131 u8 is_passive;
132 u8 is_long_timeout;
133 u32 active_slaves;
134} slave_interface_details_t;
135
136typedef CLIB_PACKED (struct
137 {
138 u16 system_priority;
139 u8 system[6];
140 u16 key; u16 port_priority; u16 port_number;
141 u8 state;
142 }) lacp_port_info_t;
143
144typedef struct
145{
Stevenc4e99c52018-09-27 20:06:26 -0700146 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
147 u32 buffers[VLIB_FRAME_SIZE];
148 u32 n_buffers;
149} bond_per_port_queue_t;
Stevena005e7f2018-03-22 17:46:58 -0700150
Stevenc4e99c52018-09-27 20:06:26 -0700151typedef struct
152{
153 bond_per_port_queue_t *per_port_queue;
154} bond_per_thread_data_t;
Stevena005e7f2018-03-22 17:46:58 -0700155
156typedef struct
157{
Steven9cd2d7a2017-12-20 12:43:01 -0800158 u8 admin_up;
159 u8 mode;
160 u8 lb;
161
Zhiyong Yang6865d3c2019-05-15 04:25:20 -0400162 /* This flag works for active-backup mode only
163 and marks if the working port is local numa. */
164 u8 is_local_numa;
165 /* current working sw_if_index in active-bakeup mode. */
166 u32 sw_if_index_working;
Steven9cd2d7a2017-12-20 12:43:01 -0800167 /* the last slave index for the rr lb */
168 u32 lb_rr_last_index;
169
Alexander Chernavinad9d5282018-12-13 09:08:09 -0500170 /* Real device instance in interface vector */
Steven9cd2d7a2017-12-20 12:43:01 -0800171 u32 dev_instance;
Alexander Chernavinad9d5282018-12-13 09:08:09 -0500172
173 /* Interface ID being shown to user */
174 u32 id;
175
Steven9cd2d7a2017-12-20 12:43:01 -0800176 u32 hw_if_index;
177 u32 sw_if_index;
178
179 /* Configured slaves */
180 u32 *slaves;
181
182 /* Slaves that are in DISTRIBUTING state */
183 u32 *active_slaves;
184
185 /* rapidly find an active slave */
186 uword *active_slave_by_sw_if_index;
187
188 lacp_port_info_t partner;
189 lacp_port_info_t actor;
190 u8 individual_aggregator;
191
Zhiyong Yang751e3f32019-06-26 05:49:14 -0400192 /* If the flag numa_only is set, it means that only slaves
193 on local numa node works for lacp mode if have at least one,
194 otherwise it works as usual. */
195 u8 numa_only;
196
197 /* How many slaves on local numa node are there in lacp mode? */
198 word n_numa_slaves;
199
Steven9cd2d7a2017-12-20 12:43:01 -0800200 u32 group;
201 uword *port_number_bitmap;
202 u8 use_custom_mac;
203 u8 hw_address[6];
Stevena005e7f2018-03-22 17:46:58 -0700204
205 clib_spinlock_t lockp;
Steven9cd2d7a2017-12-20 12:43:01 -0800206} bond_if_t;
207
208typedef struct
209{
210 u8 persistent_hw_address[6];
211
212 /* neighbor's vlib software interface index */
213 u32 sw_if_index;
214
215 /* Neighbor time-to-live (usually 3s) */
216 f32 ttl_in_seconds;
217
218 /* 1 = interface is configured with long timeout (60s) */
219 u8 is_long_timeout;
220
221 /* 1 = debug is on; 0 = debug is off */
222 u8 debug;
223
224 /* tx packet template id for this neighbor */
225 u8 packet_template_index;
226
227 /* Info we actually keep about each neighbor */
228
229 /* Jenkins hash optimization: avoid tlv scan, send short keepalive msg */
230 u8 last_packet_signature_valid;
231 uword last_packet_signature;
232
233 /* last received lacp packet, for the J-hash optimization */
234 u8 *last_rx_pkt;
235
236 /* last marker packet */
237 u8 *last_marker_pkt;
238
239 /* neighbor vlib hw_if_index */
240 u32 hw_if_index;
241
242 /* actor does not initiate the protocol exchange */
243 u8 is_passive;
244
245 /* Partner port information */
246 lacp_port_info_t partner;
247 lacp_port_info_t partner_admin;;
248
Zhiyong Yang52c5f262019-06-13 21:14:33 -0400249 /* Actor port information */
Steven9cd2d7a2017-12-20 12:43:01 -0800250 lacp_port_info_t actor;
251 lacp_port_info_t actor_admin;
252
253 /* Need To Transmit flag */
254 u8 ntt;
255
256 /* Link has been established and Aggregate Port is operable */
257 u8 port_enabled;
258
259 /* Initialization or reinitialization of the lacp protocol entity */
260 u8 begin;
261
262 /* Aggregation Port is operating the lacp */
263 u8 lacp_enabled;
264
265 /* MUX to indicate to the Selection Logic wait_while_timer expired */
266 u8 ready_n;
267
268 /* Selection Logic indicates al Aggregation Ports attached */
269 u8 ready;
270
271 /* Selection Logic selected an Aggregator */
272 int selected;
273
274 /* RX machine indicates an Aggregation Port in PORT_DISABLED state */
275 u8 port_moved;
276
277 /* timer used to detect whether received protocol information has expired */
278 f64 current_while_timer;
279
280 /* timer used to detect actor churn states */
281 f64 actor_churn_timer;
282
283 /* time last lacpdu was sent */
Steven Luong82c5dda2019-03-05 09:38:33 -0800284 f64 last_lacpdu_sent_time;
285
286 /* time last lacpdu was received */
287 f64 last_lacpdu_recd_time;
288
289 /* time last marker pdu was sent */
290 f64 last_marker_pdu_sent_time;
291
292 /* time last marker pdu was received */
293 f64 last_marker_pdu_recd_time;
Steven9cd2d7a2017-12-20 12:43:01 -0800294
295 /* timer used to generate periodic transmission */
296 f64 periodic_timer;
297
298 /* timer used to detect partner churn states */
299 f64 partner_churn_timer;
300
301 /* provides hysteresis before performing an aggregation change */
302 f64 wait_while_timer;
303
304 /* Implemention variables, not in the spec */
305 int rx_state;
306 int tx_state;
307 int mux_state;
308 int ptx_state;
309
310 /* actor admin key */
311 u32 group;
312
313 u32 marker_tx_id;
314
315 u32 bif_dev_instance;
316
317 u8 loopback_port;
318
319 /* bond mode */
320 u8 mode;
Steven Luong82c5dda2019-03-05 09:38:33 -0800321
322 /* good lacp pdu received */
323 u64 pdu_received;
324
325 /* bad lacp pdu received */
326 u64 bad_pdu_received;
327
328 /* pdu sent */
329 u64 pdu_sent;
330
331 /* good marker pdu received */
332 u64 marker_pdu_received;
333
334 /* bad marker pdu received */
335 u64 marker_bad_pdu_received;
336
337 /* pdu sent */
338 u64 marker_pdu_sent;
Steven9cd2d7a2017-12-20 12:43:01 -0800339} slave_if_t;
340
341typedef void (*lacp_enable_disable_func) (vlib_main_t * vm, bond_if_t * bif,
342 slave_if_t * sif, u8 enable);
343
344typedef struct
345{
346 /* pool of bonding interfaces */
347 bond_if_t *interfaces;
348
Alexander Chernavinad9d5282018-12-13 09:08:09 -0500349 /* record used interface IDs */
350 uword *id_used;
351
Stevenc4e99c52018-09-27 20:06:26 -0700352 /* pool of slave interfaces */
Steven9cd2d7a2017-12-20 12:43:01 -0800353 slave_if_t *neighbors;
354
Steven9cd2d7a2017-12-20 12:43:01 -0800355 /* rapidly find a bond by vlib software interface index */
356 uword *bond_by_sw_if_index;
357
358 /* convenience variables */
359 vlib_main_t *vlib_main;
360 vnet_main_t *vnet_main;
361
362 /* lacp plugin is loaded */
363 u8 lacp_plugin_loaded;
364
365 lacp_enable_disable_func lacp_enable_disable;
Steven0d883012018-05-11 11:06:23 -0700366
367 uword *slave_by_sw_if_index;
Stevenc4e99c52018-09-27 20:06:26 -0700368
369 bond_per_thread_data_t *per_thread_data;
Steven9cd2d7a2017-12-20 12:43:01 -0800370} bond_main_t;
371
372/* bond packet trace capture */
373typedef struct
374{
375 ethernet_header_t ethernet;
376 u32 sw_if_index;
377 u32 bond_sw_if_index;
378} bond_packet_trace_t;
379
380typedef u32 (*load_balance_func) (vlib_main_t * vm,
381 vlib_node_runtime_t * node, bond_if_t * bif,
Steven18c0f222018-03-26 21:52:11 -0700382 vlib_buffer_t * b0, uword slave_count);
Steven9cd2d7a2017-12-20 12:43:01 -0800383
384typedef struct
385{
386 load_balance_func load_balance;
387} bond_load_balance_func_t;
388
389extern vlib_node_registration_t bond_input_node;
Steven9f781d82018-06-05 11:09:32 -0700390extern vlib_node_registration_t bond_process_node;
Steven9cd2d7a2017-12-20 12:43:01 -0800391extern vnet_device_class_t bond_dev_class;
392extern bond_main_t bond_main;
393
394void bond_disable_collecting_distributing (vlib_main_t * vm,
395 slave_if_t * sif);
396void bond_enable_collecting_distributing (vlib_main_t * vm, slave_if_t * sif);
397u8 *format_bond_interface_name (u8 * s, va_list * args);
398
399void bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args);
400int bond_delete_if (vlib_main_t * vm, u32 sw_if_index);
401void bond_enslave (vlib_main_t * vm, bond_enslave_args_t * args);
402void bond_detach_slave (vlib_main_t * vm, bond_detach_slave_args_t * args);
403int bond_dump_ifs (bond_interface_details_t ** out_bondids);
404int bond_dump_slave_ifs (slave_interface_details_t ** out_slaveids,
405 u32 bond_sw_if_index);
406
407static inline uword
408unformat_bond_mode (unformat_input_t * input, va_list * args)
409{
410 u8 *r = va_arg (*args, u8 *);
411
412 if (0);
413#define _(v, f, s) else if (unformat (input, s)) *r = BOND_MODE_##f;
414 foreach_bond_mode
415#undef _
416 else
417 return 0;
418
419 return 1;
420}
421
422static inline u8 *
423format_bond_mode (u8 * s, va_list * args)
424{
425 u32 i = va_arg (*args, u32);
426 u8 *t = 0;
427
428 switch (i)
429 {
430#define _(v, f, s) case BOND_MODE_##f: t = (u8 *) s; break;
431 foreach_bond_mode
432#undef _
433 default:
434 return format (s, "unknown");
435 }
436 return format (s, "%s", t);
437}
438
439static inline uword
440unformat_bond_load_balance (unformat_input_t * input, va_list * args)
441{
442 u8 *r = va_arg (*args, u8 *);
443
444 if (0);
445#define _(v, f, s, p) else if (unformat (input, s)) *r = BOND_LB_##f;
446 foreach_bond_lb
447#undef _
448 else
449 return 0;
450
451 return 1;
452}
453
454static inline u8 *
455format_bond_load_balance (u8 * s, va_list * args)
456{
457 u32 i = va_arg (*args, u32);
458 u8 *t = 0;
459
460 switch (i)
461 {
462#define _(v, f, s, p) case BOND_LB_##f: t = (u8 *) s; break;
463 foreach_bond_lb_algo
464#undef _
465 default:
466 return format (s, "unknown");
467 }
468 return format (s, "%s", t);
469}
470
471static inline void
472bond_register_callback (lacp_enable_disable_func func)
473{
474 bond_main_t *bm = &bond_main;
475
476 bm->lacp_plugin_loaded = 1;
477 bm->lacp_enable_disable = func;
478}
479
480static inline bond_if_t *
481bond_get_master_by_sw_if_index (u32 sw_if_index)
482{
483 bond_main_t *bm = &bond_main;
484 uword *p;
485
486 p = hash_get (bm->bond_by_sw_if_index, sw_if_index);
487 if (!p)
488 {
489 return 0;
490 }
491 return pool_elt_at_index (bm->interfaces, p[0]);
492}
493
494static inline bond_if_t *
495bond_get_master_by_dev_instance (u32 dev_instance)
496{
497 bond_main_t *bm = &bond_main;
498
499 return pool_elt_at_index (bm->interfaces, dev_instance);
500}
501
502static inline slave_if_t *
503bond_get_slave_by_sw_if_index (u32 sw_if_index)
504{
505 bond_main_t *bm = &bond_main;
506 slave_if_t *sif = 0;
Steven0d883012018-05-11 11:06:23 -0700507 uword p;
Steven9cd2d7a2017-12-20 12:43:01 -0800508
Steven0d883012018-05-11 11:06:23 -0700509 if (sw_if_index < vec_len (bm->slave_by_sw_if_index))
Steven9cd2d7a2017-12-20 12:43:01 -0800510 {
Steven0d883012018-05-11 11:06:23 -0700511 p = bm->slave_by_sw_if_index[sw_if_index];
512 if (p)
513 sif = pool_elt_at_index (bm->neighbors, p >> 1);
Steven9cd2d7a2017-12-20 12:43:01 -0800514 }
Steven0d883012018-05-11 11:06:23 -0700515
Steven9cd2d7a2017-12-20 12:43:01 -0800516 return sif;
517}
518
519#endif /* __included_vnet_bonding_node_h__ */
520
521/*
522 * fd.io coding-style-patch-verification: ON
523 *
524 * Local Variables:
525 * eval: (c-set-style "gnu")
526 * End:
527 */