blob: 8f2a0de6ea81721d31692c97eb736e75471d24bd [file] [log] [blame]
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001/*
2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
Neale Ranns0bfe5d82016-08-25 15:29:12 +010016#include <vnet/dpo/load_balance.h>
17#include <vnet/dpo/load_balance_map.h>
18#include <vnet/dpo/drop_dpo.h>
19#include <vppinfra/math.h> /* for fabs */
20#include <vnet/adj/adj.h>
Neale Ranns0bfe5d82016-08-25 15:29:12 +010021#include <vnet/adj/adj_internal.h>
Neale Ranns3ee44042016-10-03 13:05:48 +010022#include <vnet/fib/fib_urpf_list.h>
Neale Rannsef90ed02018-09-13 08:45:12 -070023#include <vnet/bier/bier_fwd.h>
Neale Ranns023d23a2019-06-26 02:16:50 -070024#include <vnet/fib/mpls_fib.h>
Neale Rannse4031132020-10-26 13:00:06 +000025#include <vnet/ip/ip4_inlines.h>
26#include <vnet/ip/ip6_inlines.h>
Neale Ranns0bfe5d82016-08-25 15:29:12 +010027
Neale Ranns8f5fef22020-12-21 08:29:34 +000028// clang-format off
29
Neale Ranns0bfe5d82016-08-25 15:29:12 +010030/*
31 * distribution error tolerance for load-balancing
32 */
33const f64 multipath_next_hop_error_tolerance = 0.1;
34
Neale Rannsac64b712018-10-08 14:51:11 +000035static const char *load_balance_attr_names[] = LOAD_BALANCE_ATTR_NAMES;
Neale Ranns0bfe5d82016-08-25 15:29:12 +010036
Neale Ranns710071b2018-09-24 12:36:26 +000037/**
38 * the logger
39 */
40vlib_log_class_t load_balance_logger;
41
Neale Ranns0bfe5d82016-08-25 15:29:12 +010042#define LB_DBG(_lb, _fmt, _args...) \
43{ \
Neale Ranns710071b2018-09-24 12:36:26 +000044 vlib_log_debug(load_balance_logger, \
45 "lb:[%U]:" _fmt, \
46 format_load_balance, load_balance_get_index(_lb), \
47 LOAD_BALANCE_FORMAT_NONE, \
48 ##_args); \
Neale Ranns0bfe5d82016-08-25 15:29:12 +010049}
Neale Ranns0bfe5d82016-08-25 15:29:12 +010050
51/**
52 * Pool of all DPOs. It's not static so the DP can have fast access
53 */
54load_balance_t *load_balance_pool;
55
56/**
57 * The one instance of load-balance main
58 */
Neale Ranns008dbe12018-09-07 09:32:36 -070059load_balance_main_t load_balance_main = {
60 .lbm_to_counters = {
61 .name = "route-to",
62 .stat_segment_name = "/net/route/to",
63 },
64 .lbm_via_counters = {
65 .name = "route-via",
66 .stat_segment_name = "/net/route/via",
67 }
68};
Neale Ranns0bfe5d82016-08-25 15:29:12 +010069
70f64
71load_balance_get_multipath_tolerance (void)
72{
73 return (multipath_next_hop_error_tolerance);
74}
75
76static inline index_t
77load_balance_get_index (const load_balance_t *lb)
78{
79 return (lb - load_balance_pool);
80}
81
82static inline dpo_id_t*
83load_balance_get_buckets (load_balance_t *lb)
84{
85 if (LB_HAS_INLINE_BUCKETS(lb))
86 {
87 return (lb->lb_buckets_inline);
88 }
89 else
90 {
91 return (lb->lb_buckets);
92 }
93}
94
95static load_balance_t *
96load_balance_alloc_i (void)
97{
98 load_balance_t *lb;
Dave Barach8341f762020-06-03 08:05:15 -040099 u8 need_barrier_sync = 0;
100 vlib_main_t *vm = vlib_get_main();
101 ASSERT (vm->thread_index == 0);
102
Damjan Marion66d4cb52022-03-17 18:59:46 +0100103 need_barrier_sync = pool_get_will_expand (load_balance_pool);
104
Dave Barach8341f762020-06-03 08:05:15 -0400105 if (need_barrier_sync)
106 vlib_worker_thread_barrier_sync (vm);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100107
108 pool_get_aligned(load_balance_pool, lb, CLIB_CACHE_LINE_BYTES);
Dave Barachb7b92992018-10-17 10:38:51 -0400109 clib_memset(lb, 0, sizeof(*lb));
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100110
111 lb->lb_map = INDEX_INVALID;
Neale Ranns3ee44042016-10-03 13:05:48 +0100112 lb->lb_urpf = INDEX_INVALID;
Dave Barach8341f762020-06-03 08:05:15 -0400113
114 if (need_barrier_sync == 0)
115 {
116 need_barrier_sync += vlib_validate_combined_counter_will_expand
117 (&(load_balance_main.lbm_to_counters),
118 load_balance_get_index(lb));
119 need_barrier_sync += vlib_validate_combined_counter_will_expand
120 (&(load_balance_main.lbm_via_counters),
121 load_balance_get_index(lb));
122 if (need_barrier_sync)
123 vlib_worker_thread_barrier_sync (vm);
124 }
125
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100126 vlib_validate_combined_counter(&(load_balance_main.lbm_to_counters),
127 load_balance_get_index(lb));
128 vlib_validate_combined_counter(&(load_balance_main.lbm_via_counters),
129 load_balance_get_index(lb));
130 vlib_zero_combined_counter(&(load_balance_main.lbm_to_counters),
131 load_balance_get_index(lb));
132 vlib_zero_combined_counter(&(load_balance_main.lbm_via_counters),
133 load_balance_get_index(lb));
134
Dave Barach8341f762020-06-03 08:05:15 -0400135 if (need_barrier_sync)
136 vlib_worker_thread_barrier_release (vm);
137
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100138 return (lb);
139}
140
141static u8*
142load_balance_format (index_t lbi,
143 load_balance_format_flags_t flags,
144 u32 indent,
145 u8 *s)
146{
147 vlib_counter_t to, via;
148 load_balance_t *lb;
149 dpo_id_t *buckets;
150 u32 i;
151
Nathan Skrzypczakf3b00042023-12-21 11:35:30 +0100152 lb = load_balance_get_or_null(lbi);
153 if (lb == NULL)
154 {
155 s = format(s, "DELETED lb:%u", lbi);
156 return (s);
157 }
158
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100159 vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to);
160 vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via);
161 buckets = load_balance_get_buckets(lb);
162
163 s = format(s, "%U: ", format_dpo_type, DPO_LOAD_BALANCE);
Neale Rannsf12a83f2017-04-18 09:09:40 -0700164 s = format(s, "[proto:%U ", format_dpo_proto, lb->lb_proto);
165 s = format(s, "index:%d buckets:%d ", lbi, lb->lb_n_buckets);
Neale Ranns3ee44042016-10-03 13:05:48 +0100166 s = format(s, "uRPF:%d ", lb->lb_urpf);
Neale Rannsac64b712018-10-08 14:51:11 +0000167 if (lb->lb_flags)
168 {
169 load_balance_attr_t attr;
170
171 s = format(s, "flags:[");
172
173 FOR_EACH_LOAD_BALANCE_ATTR(attr)
174 {
175 if (lb->lb_flags & (1 << attr))
176 {
177 s = format (s, "%s", load_balance_attr_names[attr]);
178 }
179 }
180 s = format(s, "] ");
181 }
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100182 s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes);
183 if (0 != via.packets)
184 {
185 s = format(s, " via:[%Ld:%Ld]",
186 via.packets, via.bytes);
187 }
188 s = format(s, "]");
189
190 if (INDEX_INVALID != lb->lb_map)
191 {
192 s = format(s, "\n%U%U",
193 format_white_space, indent+4,
194 format_load_balance_map, lb->lb_map, indent+4);
195 }
196 for (i = 0; i < lb->lb_n_buckets; i++)
197 {
198 s = format(s, "\n%U[%d] %U",
199 format_white_space, indent+2,
200 i,
201 format_dpo_id,
202 &buckets[i], indent+6);
203 }
204 return (s);
205}
206
207u8*
208format_load_balance (u8 * s, va_list * args)
209{
Billy McFallcfcf1e22016-10-14 09:51:49 -0400210 index_t lbi = va_arg(*args, index_t);
211 load_balance_format_flags_t flags = va_arg(*args, load_balance_format_flags_t);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100212
213 return (load_balance_format(lbi, flags, 0, s));
214}
Neale Ranns710071b2018-09-24 12:36:26 +0000215
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100216static u8*
217format_load_balance_dpo (u8 * s, va_list * args)
218{
Billy McFallcfcf1e22016-10-14 09:51:49 -0400219 index_t lbi = va_arg(*args, index_t);
220 u32 indent = va_arg(*args, u32);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100221
222 return (load_balance_format(lbi, LOAD_BALANCE_FORMAT_DETAIL, indent, s));
223}
224
Neale Ranns023d23a2019-06-26 02:16:50 -0700225flow_hash_config_t
226load_balance_get_default_flow_hash (dpo_proto_t lb_proto)
227{
228 switch (lb_proto)
229 {
230 case DPO_PROTO_IP4:
231 case DPO_PROTO_IP6:
232 return (IP_FLOW_HASH_DEFAULT);
233
234 case DPO_PROTO_MPLS:
235 return (MPLS_FLOW_HASH_DEFAULT);
236
237 case DPO_PROTO_ETHERNET:
238 case DPO_PROTO_BIER:
239 case DPO_PROTO_NSH:
240 break;
241 }
242
243 return (0);
244}
245
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100246static load_balance_t *
247load_balance_create_i (u32 num_buckets,
248 dpo_proto_t lb_proto,
249 flow_hash_config_t fhc)
250{
251 load_balance_t *lb;
252
Benoît Gannee211ac42023-02-21 16:09:47 +0100253 ASSERT (num_buckets <= LB_MAX_BUCKETS);
254
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100255 lb = load_balance_alloc_i();
256 lb->lb_hash_config = fhc;
257 lb->lb_n_buckets = num_buckets;
258 lb->lb_n_buckets_minus_1 = num_buckets-1;
259 lb->lb_proto = lb_proto;
260
261 if (!LB_HAS_INLINE_BUCKETS(lb))
262 {
263 vec_validate_aligned(lb->lb_buckets,
264 lb->lb_n_buckets - 1,
265 CLIB_CACHE_LINE_BYTES);
266 }
267
268 LB_DBG(lb, "create");
269
270 return (lb);
271}
272
273index_t
274load_balance_create (u32 n_buckets,
275 dpo_proto_t lb_proto,
276 flow_hash_config_t fhc)
277{
278 return (load_balance_get_index(load_balance_create_i(n_buckets, lb_proto, fhc)));
279}
280
281static inline void
282load_balance_set_bucket_i (load_balance_t *lb,
283 u32 bucket,
284 dpo_id_t *buckets,
285 const dpo_id_t *next)
286{
287 dpo_stack(DPO_LOAD_BALANCE, lb->lb_proto, &buckets[bucket], next);
288}
289
290void
291load_balance_set_bucket (index_t lbi,
292 u32 bucket,
293 const dpo_id_t *next)
294{
295 load_balance_t *lb;
296 dpo_id_t *buckets;
297
298 lb = load_balance_get(lbi);
299 buckets = load_balance_get_buckets(lb);
300
301 ASSERT(bucket < lb->lb_n_buckets);
302
303 load_balance_set_bucket_i(lb, bucket, buckets, next);
304}
305
306int
307load_balance_is_drop (const dpo_id_t *dpo)
308{
309 load_balance_t *lb;
310
311 if (DPO_LOAD_BALANCE != dpo->dpoi_type)
312 return (0);
313
314 lb = load_balance_get(dpo->dpoi_index);
315
316 if (1 == lb->lb_n_buckets)
317 {
318 return (dpo_is_drop(load_balance_get_bucket_i(lb, 0)));
319 }
320 return (0);
321}
322
Neale Ranns2303cb12018-02-21 04:57:17 -0800323u16
324load_balance_n_buckets (index_t lbi)
325{
326 load_balance_t *lb;
327
328 lb = load_balance_get(lbi);
329
330 return (lb->lb_n_buckets);
331}
332
Neale Ranns3ee44042016-10-03 13:05:48 +0100333void
Neale Ranns32e1c012016-11-22 17:07:28 +0000334load_balance_set_fib_entry_flags (index_t lbi,
335 fib_entry_flag_t flags)
336{
337 load_balance_t *lb;
338
339 lb = load_balance_get(lbi);
340 lb->lb_fib_entry_flags = flags;
341}
342
343
344void
Neale Ranns3ee44042016-10-03 13:05:48 +0100345load_balance_set_urpf (index_t lbi,
346 index_t urpf)
347{
348 load_balance_t *lb;
349 index_t old;
350
351 lb = load_balance_get(lbi);
352
353 /*
354 * packets in flight we see this change. but it's atomic, so :P
355 */
356 old = lb->lb_urpf;
357 lb->lb_urpf = urpf;
358
359 fib_urpf_list_unlock(old);
360 fib_urpf_list_lock(urpf);
361}
362
363index_t
364load_balance_get_urpf (index_t lbi)
365{
366 load_balance_t *lb;
367
368 lb = load_balance_get(lbi);
369
370 return (lb->lb_urpf);
371}
372
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100373const dpo_id_t *
374load_balance_get_bucket (index_t lbi,
375 u32 bucket)
376{
377 load_balance_t *lb;
378
379 lb = load_balance_get(lbi);
380
381 return (load_balance_get_bucket_i(lb, bucket));
382}
383
384static int
Neale Rannsc0790cf2017-01-05 01:01:47 -0800385next_hop_sort_by_weight (const load_balance_path_t * n1,
386 const load_balance_path_t * n2)
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100387{
388 return ((int) n1->path_weight - (int) n2->path_weight);
389}
390
391/* Given next hop vector is over-written with normalized one with sorted weights and
392 with weights corresponding to the number of adjacencies for each next hop.
393 Returns number of adjacencies in block. */
394u32
Neale Rannsc0790cf2017-01-05 01:01:47 -0800395ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100396 load_balance_path_t ** normalized_next_hops,
397 u32 *sum_weight_in,
398 f64 multipath_next_hop_error_tolerance)
399{
400 load_balance_path_t * nhs;
401 uword n_nhs, n_adj, n_adj_left, i, sum_weight;
402 f64 norm, error;
403
404 n_nhs = vec_len (raw_next_hops);
405 ASSERT (n_nhs > 0);
406 if (n_nhs == 0)
407 return 0;
408
409 /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */
410 nhs = *normalized_next_hops;
411 vec_validate (nhs, 2*n_nhs - 1);
412
413 /* Fast path: 1 next hop in block. */
414 n_adj = n_nhs;
415 if (n_nhs == 1)
416 {
417 nhs[0] = raw_next_hops[0];
418 nhs[0].path_weight = 1;
Damjan Marion8bea5892022-04-04 22:40:45 +0200419 vec_set_len (nhs, 1);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100420 sum_weight = 1;
421 goto done;
422 }
423
424 else if (n_nhs == 2)
425 {
426 int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0;
427
428 /* Fast sort. */
429 nhs[0] = raw_next_hops[cmp];
430 nhs[1] = raw_next_hops[cmp ^ 1];
431
432 /* Fast path: equal cost multipath with 2 next hops. */
433 if (nhs[0].path_weight == nhs[1].path_weight)
434 {
435 nhs[0].path_weight = nhs[1].path_weight = 1;
Damjan Marion8bea5892022-04-04 22:40:45 +0200436 vec_set_len (nhs, 2);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100437 sum_weight = 2;
438 goto done;
439 }
440 }
441 else
442 {
Dave Barach178cf492018-11-13 16:34:13 -0500443 clib_memcpy_fast (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100444 qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
445 }
446
447 /* Find total weight to normalize weights. */
448 sum_weight = 0;
449 for (i = 0; i < n_nhs; i++)
450 sum_weight += nhs[i].path_weight;
451
452 /* In the unlikely case that all weights are given as 0, set them all to 1. */
453 if (sum_weight == 0)
454 {
455 for (i = 0; i < n_nhs; i++)
456 nhs[i].path_weight = 1;
457 sum_weight = n_nhs;
458 }
459
460 /* Save copies of all next hop weights to avoid being overwritten in loop below. */
461 for (i = 0; i < n_nhs; i++)
462 nhs[n_nhs + i].path_weight = nhs[i].path_weight;
463
464 /* Try larger and larger power of 2 sized adjacency blocks until we
465 find one where traffic flows to within 1% of specified weights. */
Benoît Gannee211ac42023-02-21 16:09:47 +0100466 for (n_adj = clib_min(max_pow2 (n_nhs), LB_MAX_BUCKETS); ; n_adj *= 2)
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100467 {
Benoît Gannee211ac42023-02-21 16:09:47 +0100468 ASSERT (n_adj <= LB_MAX_BUCKETS);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100469 error = 0;
470
471 norm = n_adj / ((f64) sum_weight);
472 n_adj_left = n_adj;
473 for (i = 0; i < n_nhs; i++)
474 {
475 f64 nf = nhs[n_nhs + i].path_weight * norm; /* use saved weights */
476 word n = flt_round_nearest (nf);
477
478 n = n > n_adj_left ? n_adj_left : n;
479 n_adj_left -= n;
480 error += fabs (nf - n);
481 nhs[i].path_weight = n;
Neale Ranns0bd36ea2016-11-16 11:47:44 +0000482
483 if (0 == nhs[i].path_weight)
484 {
485 /*
486 * when the weight skew is high (norm is small) and n == nf.
487 * without this correction the path with a low weight would have
Lijian.Zhang33af8c12019-09-16 16:22:36 +0800488 * no representation in the load-balanace - don't want that.
Neale Ranns0bd36ea2016-11-16 11:47:44 +0000489 * If the weight skew is high so the load-balance has many buckets
490 * to allow it. pays ya money takes ya choice.
491 */
492 error = n_adj;
493 break;
494 }
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100495 }
496
497 nhs[0].path_weight += n_adj_left;
498
Benoît Gannee211ac42023-02-21 16:09:47 +0100499 /* Less than 1% average error per adjacency with this size adjacency block,
500 * or did we reached the maximum number of buckets we support? */
501 if (error <= multipath_next_hop_error_tolerance*n_adj ||
502 n_adj >= LB_MAX_BUCKETS)
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100503 {
Benoît Gannee211ac42023-02-21 16:09:47 +0100504 if (i < n_nhs)
505 {
506 /* Truncate any next hops in excess */
507 vlib_log_err(load_balance_logger,
508 "Too many paths for load-balance, truncating %d -> %d",
509 n_nhs, i);
510 for (int j = i; j < n_nhs; j++)
511 dpo_reset (&vec_elt(nhs, j).path_dpo);
512 }
513 vec_set_len (nhs, i);
514 break;
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100515 }
516 }
517
518done:
519 /* Save vector for next call. */
520 *normalized_next_hops = nhs;
521 *sum_weight_in = sum_weight;
522 return n_adj;
523}
524
525static load_balance_path_t *
Neale Rannsc0790cf2017-01-05 01:01:47 -0800526load_balance_multipath_next_hop_fixup (const load_balance_path_t *nhs,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100527 dpo_proto_t drop_proto)
528{
529 if (0 == vec_len(nhs))
530 {
Neale Rannsc0790cf2017-01-05 01:01:47 -0800531 load_balance_path_t *new_nhs = NULL, *nh;
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100532
533 /*
534 * we need something for the load-balance. so use the drop
535 */
Neale Rannsc0790cf2017-01-05 01:01:47 -0800536 vec_add2(new_nhs, nh, 1);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100537
538 nh->path_weight = 1;
539 dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
Neale Rannsc0790cf2017-01-05 01:01:47 -0800540
541 return (new_nhs);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100542 }
543
Neale Rannsc0790cf2017-01-05 01:01:47 -0800544 return (NULL);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100545}
546
547/*
548 * Fill in adjacencies in block based on corresponding
549 * next hop adjacencies.
550 */
551static void
Neale Rannsac64b712018-10-08 14:51:11 +0000552load_balance_fill_buckets_norm (load_balance_t *lb,
553 load_balance_path_t *nhs,
554 dpo_id_t *buckets,
555 u32 n_buckets)
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100556{
Neale Rannsac64b712018-10-08 14:51:11 +0000557 load_balance_path_t *nh;
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100558 u16 ii, bucket;
559
560 bucket = 0;
561
562 /*
563 * the next-hops have normalised weights. that means their sum is the number
564 * of buckets we need to fill.
565 */
566 vec_foreach (nh, nhs)
567 {
568 for (ii = 0; ii < nh->path_weight; ii++)
569 {
570 ASSERT(bucket < n_buckets);
571 load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo);
572 }
573 }
574}
Neale Rannsac64b712018-10-08 14:51:11 +0000575static void
576load_balance_fill_buckets_sticky (load_balance_t *lb,
577 load_balance_path_t *nhs,
578 dpo_id_t *buckets,
579 u32 n_buckets)
580{
581 load_balance_path_t *nh, *fwding_paths;
582 u16 ii, bucket, fpath;
583
584 fpath = bucket = 0;
585 fwding_paths = NULL;
586
587 vec_foreach (nh, nhs)
588 {
589 if (!dpo_is_drop(&nh->path_dpo))
590 {
591 vec_add1(fwding_paths, *nh);
592 }
593 }
594 if (vec_len(fwding_paths) == 0)
595 fwding_paths = vec_dup(nhs);
596
597 /*
598 * the next-hops have normalised weights. that means their sum is the number
599 * of buckets we need to fill.
600 */
601 vec_foreach (nh, nhs)
602 {
603 for (ii = 0; ii < nh->path_weight; ii++)
604 {
605 ASSERT(bucket < n_buckets);
606 if (!dpo_is_drop(&nh->path_dpo))
607 {
608 load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo);
609 }
610 else
611 {
612 /* fill the bucks from the next up path */
613 load_balance_set_bucket_i(lb, bucket++, buckets, &fwding_paths[fpath].path_dpo);
Andrew Yourtchenko165c9632022-08-23 17:23:47 +0000614 ASSERT(vec_len(fwding_paths) > 0);
Neale Rannsac64b712018-10-08 14:51:11 +0000615 fpath = (fpath + 1) % vec_len(fwding_paths);
616 }
617 }
618 }
619
620 vec_free(fwding_paths);
621}
622
623static void
624load_balance_fill_buckets (load_balance_t *lb,
625 load_balance_path_t *nhs,
626 dpo_id_t *buckets,
627 u32 n_buckets,
628 load_balance_flags_t flags)
629{
630 if (flags & LOAD_BALANCE_FLAG_STICKY)
631 {
632 load_balance_fill_buckets_sticky(lb, nhs, buckets, n_buckets);
633 }
634 else
635 {
636 load_balance_fill_buckets_norm(lb, nhs, buckets, n_buckets);
637 }
638}
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100639
640static inline void
641load_balance_set_n_buckets (load_balance_t *lb,
642 u32 n_buckets)
643{
Benoît Gannee211ac42023-02-21 16:09:47 +0100644 ASSERT (n_buckets <= LB_MAX_BUCKETS);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100645 lb->lb_n_buckets = n_buckets;
646 lb->lb_n_buckets_minus_1 = n_buckets-1;
647}
648
649void
650load_balance_multipath_update (const dpo_id_t *dpo,
Neale Rannsc0790cf2017-01-05 01:01:47 -0800651 const load_balance_path_t * raw_nhs,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100652 load_balance_flags_t flags)
653{
Neale Rannsc0790cf2017-01-05 01:01:47 -0800654 load_balance_path_t *nh, *nhs, *fixed_nhs;
655 u32 sum_of_weights, n_buckets, ii;
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100656 index_t lbmi, old_lbmi;
657 load_balance_t *lb;
658 dpo_id_t *tmp_dpo;
659
660 nhs = NULL;
661
662 ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type);
663 lb = load_balance_get(dpo->dpoi_index);
Neale Rannsac64b712018-10-08 14:51:11 +0000664 lb->lb_flags = flags;
Neale Rannsc0790cf2017-01-05 01:01:47 -0800665 fixed_nhs = load_balance_multipath_next_hop_fixup(raw_nhs, lb->lb_proto);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100666 n_buckets =
Neale Rannsc0790cf2017-01-05 01:01:47 -0800667 ip_multipath_normalize_next_hops((NULL == fixed_nhs ?
668 raw_nhs :
669 fixed_nhs),
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100670 &nhs,
671 &sum_of_weights,
672 multipath_next_hop_error_tolerance);
673
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100674 /*
675 * Save the old load-balance map used, and get a new one if required.
676 */
677 old_lbmi = lb->lb_map;
678 if (flags & LOAD_BALANCE_FLAG_USES_MAP)
679 {
680 lbmi = load_balance_map_add_or_lock(n_buckets, sum_of_weights, nhs);
681 }
682 else
683 {
684 lbmi = INDEX_INVALID;
685 }
686
687 if (0 == lb->lb_n_buckets)
688 {
689 /*
690 * first time initialisation. no packets inflight, so we can write
691 * at leisure.
692 */
693 load_balance_set_n_buckets(lb, n_buckets);
694
695 if (!LB_HAS_INLINE_BUCKETS(lb))
696 vec_validate_aligned(lb->lb_buckets,
697 lb->lb_n_buckets - 1,
698 CLIB_CACHE_LINE_BYTES);
699
700 load_balance_fill_buckets(lb, nhs,
701 load_balance_get_buckets(lb),
Neale Rannsac64b712018-10-08 14:51:11 +0000702 n_buckets, flags);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100703 lb->lb_map = lbmi;
704 }
705 else
706 {
707 /*
708 * This is a modification of an existing load-balance.
709 * We need to ensure that packets inflight see a consistent state, that
710 * is the number of reported buckets the LB has (read from
711 * lb_n_buckets_minus_1) is not more than it actually has. So if the
712 * number of buckets is increasing, we must update the bucket array first,
713 * then the reported number. vice-versa if the number of buckets goes down.
714 */
715 if (n_buckets == lb->lb_n_buckets)
716 {
717 /*
718 * no change in the number of buckets. we can simply fill what
719 * is new over what is old.
720 */
721 load_balance_fill_buckets(lb, nhs,
722 load_balance_get_buckets(lb),
Neale Rannsac64b712018-10-08 14:51:11 +0000723 n_buckets, flags);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100724 lb->lb_map = lbmi;
725 }
726 else if (n_buckets > lb->lb_n_buckets)
727 {
728 /*
729 * we have more buckets. the old load-balance map (if there is one)
730 * will remain valid, i.e. mapping to indices within range, so we
731 * update it last.
732 */
733 if (n_buckets > LB_NUM_INLINE_BUCKETS &&
734 lb->lb_n_buckets <= LB_NUM_INLINE_BUCKETS)
735 {
736 /*
737 * the new increased number of buckets is crossing the threshold
738 * from the inline storage to out-line. Alloc the outline buckets
739 * first, then fixup the number. then reset the inlines.
740 */
741 ASSERT(NULL == lb->lb_buckets);
742 vec_validate_aligned(lb->lb_buckets,
743 n_buckets - 1,
744 CLIB_CACHE_LINE_BYTES);
745
746 load_balance_fill_buckets(lb, nhs,
747 lb->lb_buckets,
Neale Rannsac64b712018-10-08 14:51:11 +0000748 n_buckets, flags);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100749 CLIB_MEMORY_BARRIER();
750 load_balance_set_n_buckets(lb, n_buckets);
751
752 CLIB_MEMORY_BARRIER();
753
754 for (ii = 0; ii < LB_NUM_INLINE_BUCKETS; ii++)
755 {
756 dpo_reset(&lb->lb_buckets_inline[ii]);
757 }
758 }
759 else
760 {
Neale Ranns0bd36ea2016-11-16 11:47:44 +0000761 if (n_buckets <= LB_NUM_INLINE_BUCKETS)
762 {
763 /*
764 * we are not crossing the threshold and it's still inline buckets.
765 * we can write the new on the old..
766 */
767 load_balance_fill_buckets(lb, nhs,
768 load_balance_get_buckets(lb),
Neale Rannsac64b712018-10-08 14:51:11 +0000769 n_buckets, flags);
Neale Ranns0bd36ea2016-11-16 11:47:44 +0000770 CLIB_MEMORY_BARRIER();
771 load_balance_set_n_buckets(lb, n_buckets);
772 }
773 else
774 {
775 /*
776 * we are not crossing the threshold. We need a new bucket array to
777 * hold the increased number of choices.
778 */
779 dpo_id_t *new_buckets, *old_buckets, *tmp_dpo;
780
781 new_buckets = NULL;
782 old_buckets = load_balance_get_buckets(lb);
783
784 vec_validate_aligned(new_buckets,
785 n_buckets - 1,
786 CLIB_CACHE_LINE_BYTES);
787
Neale Rannsac64b712018-10-08 14:51:11 +0000788 load_balance_fill_buckets(lb, nhs, new_buckets,
789 n_buckets, flags);
Neale Ranns0bd36ea2016-11-16 11:47:44 +0000790 CLIB_MEMORY_BARRIER();
791 lb->lb_buckets = new_buckets;
792 CLIB_MEMORY_BARRIER();
793 load_balance_set_n_buckets(lb, n_buckets);
794
795 vec_foreach(tmp_dpo, old_buckets)
796 {
797 dpo_reset(tmp_dpo);
798 }
799 vec_free(old_buckets);
800 }
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100801 }
802
803 /*
804 * buckets fixed. ready for the MAP update.
805 */
806 lb->lb_map = lbmi;
807 }
808 else
809 {
810 /*
811 * bucket size shrinkage.
812 * Any map we have will be based on the old
813 * larger number of buckets, so will be translating to indices
814 * out of range. So the new MAP must be installed first.
815 */
816 lb->lb_map = lbmi;
817 CLIB_MEMORY_BARRIER();
818
819
820 if (n_buckets <= LB_NUM_INLINE_BUCKETS &&
821 lb->lb_n_buckets > LB_NUM_INLINE_BUCKETS)
822 {
823 /*
824 * the new decreased number of buckets is crossing the threshold
825 * from out-line storage to inline:
826 * 1 - Fill the inline buckets,
827 * 2 - fixup the number (and this point the inline buckets are
828 * used).
829 * 3 - free the outline buckets
830 */
831 load_balance_fill_buckets(lb, nhs,
832 lb->lb_buckets_inline,
Neale Rannsac64b712018-10-08 14:51:11 +0000833 n_buckets, flags);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100834 CLIB_MEMORY_BARRIER();
835 load_balance_set_n_buckets(lb, n_buckets);
836 CLIB_MEMORY_BARRIER();
837
838 vec_foreach(tmp_dpo, lb->lb_buckets)
839 {
840 dpo_reset(tmp_dpo);
841 }
842 vec_free(lb->lb_buckets);
843 }
844 else
845 {
846 /*
847 * not crossing the threshold.
848 * 1 - update the number to the smaller size
849 * 2 - write the new buckets
850 * 3 - reset those no longer used.
851 */
852 dpo_id_t *buckets;
853 u32 old_n_buckets;
854
855 old_n_buckets = lb->lb_n_buckets;
856 buckets = load_balance_get_buckets(lb);
857
858 load_balance_set_n_buckets(lb, n_buckets);
859 CLIB_MEMORY_BARRIER();
860
Neale Rannsac64b712018-10-08 14:51:11 +0000861 load_balance_fill_buckets(lb, nhs, buckets,
862 n_buckets, flags);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100863
Neale Ranns32e1c012016-11-22 17:07:28 +0000864 for (ii = n_buckets; ii < old_n_buckets; ii++)
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100865 {
866 dpo_reset(&buckets[ii]);
867 }
868 }
869 }
870 }
871
872 vec_foreach (nh, nhs)
873 {
874 dpo_reset(&nh->path_dpo);
875 }
Neale Ranns33a7dd52016-10-07 15:14:33 +0100876 vec_free(nhs);
Neale Rannsc0790cf2017-01-05 01:01:47 -0800877 vec_free(fixed_nhs);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100878
879 load_balance_map_unlock(old_lbmi);
880}
881
882static void
883load_balance_lock (dpo_id_t *dpo)
884{
885 load_balance_t *lb;
886
887 lb = load_balance_get(dpo->dpoi_index);
888
889 lb->lb_locks++;
890}
891
892static void
893load_balance_destroy (load_balance_t *lb)
894{
895 dpo_id_t *buckets;
896 int i;
897
898 buckets = load_balance_get_buckets(lb);
899
900 for (i = 0; i < lb->lb_n_buckets; i++)
901 {
902 dpo_reset(&buckets[i]);
903 }
904
905 LB_DBG(lb, "destroy");
906 if (!LB_HAS_INLINE_BUCKETS(lb))
907 {
908 vec_free(lb->lb_buckets);
909 }
910
Neale Ranns3ee44042016-10-03 13:05:48 +0100911 fib_urpf_list_unlock(lb->lb_urpf);
912 load_balance_map_unlock(lb->lb_map);
913
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100914 pool_put(load_balance_pool, lb);
915}
916
917static void
918load_balance_unlock (dpo_id_t *dpo)
919{
920 load_balance_t *lb;
921
922 lb = load_balance_get(dpo->dpoi_index);
923
924 lb->lb_locks--;
925
926 if (0 == lb->lb_locks)
927 {
928 load_balance_destroy(lb);
929 }
930}
931
Neale Ranns6c3ebcc2016-10-02 21:20:15 +0100932static void
933load_balance_mem_show (void)
934{
935 fib_show_memory_usage("load-balance",
936 pool_elts(load_balance_pool),
937 pool_len(load_balance_pool),
938 sizeof(load_balance_t));
Neale Ranns3ee44042016-10-03 13:05:48 +0100939 load_balance_map_show_mem();
Neale Ranns6c3ebcc2016-10-02 21:20:15 +0100940}
941
Neale Ranns8f5fef22020-12-21 08:29:34 +0000942static u16
943load_balance_dpo_get_mtu (const dpo_id_t *dpo)
944{
945 const dpo_id_t *buckets;
946 load_balance_t *lb;
947 u16 i, mtu = 0xffff;
948
949 lb = load_balance_get(dpo->dpoi_index);
950 buckets = load_balance_get_buckets(lb);
951
952 for (i = 0; i < lb->lb_n_buckets; i++)
953 {
954 mtu = clib_min (mtu, dpo_get_mtu (&buckets[i]));
955 }
956
957 return (mtu);
958}
959
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100960const static dpo_vft_t lb_vft = {
961 .dv_lock = load_balance_lock,
962 .dv_unlock = load_balance_unlock,
963 .dv_format = format_load_balance_dpo,
Neale Ranns6c3ebcc2016-10-02 21:20:15 +0100964 .dv_mem_show = load_balance_mem_show,
Neale Ranns8f5fef22020-12-21 08:29:34 +0000965 .dv_get_mtu = load_balance_dpo_get_mtu,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100966};
967
968/**
969 * @brief The per-protocol VLIB graph nodes that are assigned to a load-balance
970 * object.
971 *
972 * this means that these graph nodes are ones from which a load-balance is the
973 * parent object in the DPO-graph.
974 *
975 * We do not list all the load-balance nodes, such as the *-lookup. instead
976 * we are relying on the correct use of the .sibling_of field when setting
977 * up these sibling nodes.
978 */
979const static char* const load_balance_ip4_nodes[] =
980{
981 "ip4-load-balance",
982 NULL,
983};
984const static char* const load_balance_ip6_nodes[] =
985{
986 "ip6-load-balance",
987 NULL,
988};
989const static char* const load_balance_mpls_nodes[] =
990{
991 "mpls-load-balance",
992 NULL,
993};
Neale Ranns5e575b12016-10-03 09:40:25 +0100994const static char* const load_balance_l2_nodes[] =
995{
996 "l2-load-balance",
997 NULL,
998};
Florin Corasb69111e2017-02-13 23:55:27 -0800999const static char* const load_balance_nsh_nodes[] =
1000{
1001 "nsh-load-balance",
Gabriel Ganne88fd5042017-11-14 16:07:34 +01001002 NULL
Neale Rannsd792d9c2017-10-21 10:53:20 -07001003};
1004const static char* const load_balance_bier_nodes[] =
1005{
1006 "bier-load-balance",
Florin Corasb69111e2017-02-13 23:55:27 -08001007 NULL,
1008};
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001009const static char* const * const load_balance_nodes[DPO_PROTO_NUM] =
1010{
1011 [DPO_PROTO_IP4] = load_balance_ip4_nodes,
1012 [DPO_PROTO_IP6] = load_balance_ip6_nodes,
1013 [DPO_PROTO_MPLS] = load_balance_mpls_nodes,
Neale Ranns5e575b12016-10-03 09:40:25 +01001014 [DPO_PROTO_ETHERNET] = load_balance_l2_nodes,
Florin Corasb69111e2017-02-13 23:55:27 -08001015 [DPO_PROTO_NSH] = load_balance_nsh_nodes,
Neale Rannsd792d9c2017-10-21 10:53:20 -07001016 [DPO_PROTO_BIER] = load_balance_bier_nodes,
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001017};
1018
1019void
1020load_balance_module_init (void)
1021{
Neale Rannsa3af3372017-03-28 03:49:52 -07001022 index_t lbi;
1023
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001024 dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes);
1025
Neale Ranns04a75e32017-03-23 06:46:01 -07001026 /*
1027 * Special LB with index zero. we need to define this since the v4 mtrie
1028 * assumes an index of 0 implies the ply is empty. therefore all 'real'
1029 * adjs need a non-zero index.
Neale Rannsa3af3372017-03-28 03:49:52 -07001030 * This should never be used, but just in case, stack it on a drop.
Neale Ranns04a75e32017-03-23 06:46:01 -07001031 */
Neale Rannsa3af3372017-03-28 03:49:52 -07001032 lbi = load_balance_create(1, DPO_PROTO_IP4, 0);
1033 load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4));
Neale Ranns04a75e32017-03-23 06:46:01 -07001034
Neale Ranns710071b2018-09-24 12:36:26 +00001035 load_balance_logger =
1036 vlib_log_register_class("dpo", "load-balance");
1037
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001038 load_balance_map_module_init();
1039}
1040
1041static clib_error_t *
1042load_balance_show (vlib_main_t * vm,
1043 unformat_input_t * input,
1044 vlib_cli_command_t * cmd)
1045{
1046 index_t lbi = INDEX_INVALID;
1047
1048 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1049 {
1050 if (unformat (input, "%d", &lbi))
1051 ;
1052 else
1053 break;
1054 }
1055
1056 if (INDEX_INVALID != lbi)
1057 {
Neale Rannsac64b712018-10-08 14:51:11 +00001058 if (pool_is_free_index(load_balance_pool, lbi))
1059 {
1060 vlib_cli_output (vm, "no such load-balance:%d", lbi);
1061 }
1062 else
1063 {
1064 vlib_cli_output (vm, "%U", format_load_balance, lbi,
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001065 LOAD_BALANCE_FORMAT_DETAIL);
Neale Rannsac64b712018-10-08 14:51:11 +00001066 }
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001067 }
1068 else
1069 {
1070 load_balance_t *lb;
1071
Damjan Marionb2c31b62020-12-13 21:47:40 +01001072 pool_foreach (lb, load_balance_pool)
1073 {
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001074 vlib_cli_output (vm, "%U", format_load_balance,
1075 load_balance_get_index(lb),
1076 LOAD_BALANCE_FORMAT_NONE);
Damjan Marionb2c31b62020-12-13 21:47:40 +01001077 }
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001078 }
1079
1080 return 0;
1081}
1082
1083VLIB_CLI_COMMAND (load_balance_show_command, static) = {
1084 .path = "show load-balance",
1085 .short_help = "show load-balance [<index>]",
1086 .function = load_balance_show,
1087};
Neale Ranns5e575b12016-10-03 09:40:25 +01001088
1089
1090always_inline u32
1091ip_flow_hash (void *data)
1092{
1093 ip4_header_t *iph = (ip4_header_t *) data;
1094
1095 if ((iph->ip_version_and_header_length & 0xF0) == 0x40)
1096 return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT);
1097 else
1098 return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT);
1099}
1100
1101always_inline u64
1102mac_to_u64 (u8 * m)
1103{
1104 return (*((u64 *) m) & 0xffffffffffff);
1105}
1106
1107always_inline u32
1108l2_flow_hash (vlib_buffer_t * b0)
1109{
1110 ethernet_header_t *eh;
1111 u64 a, b, c;
1112 uword is_ip, eh_size;
1113 u16 eh_type;
1114
1115 eh = vlib_buffer_get_current (b0);
1116 eh_type = clib_net_to_host_u16 (eh->type);
1117 eh_size = ethernet_buffer_header_size (b0);
1118
1119 is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6);
1120
1121 /* since we have 2 cache lines, use them */
1122 if (is_ip)
1123 a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size);
1124 else
1125 a = eh->type;
1126
1127 b = mac_to_u64 ((u8 *) eh->dst_address);
1128 c = mac_to_u64 ((u8 *) eh->src_address);
1129 hash_mix64 (a, b, c);
1130
1131 return (u32) c;
1132}
1133
1134typedef struct load_balance_trace_t_
1135{
1136 index_t lb_index;
1137} load_balance_trace_t;
1138
Neale Rannsd792d9c2017-10-21 10:53:20 -07001139always_inline uword
1140load_balance_inline (vlib_main_t * vm,
1141 vlib_node_runtime_t * node,
1142 vlib_frame_t * frame,
1143 int is_l2)
Neale Ranns5e575b12016-10-03 09:40:25 +01001144{
1145 u32 n_left_from, next_index, *from, *to_next;
1146
1147 from = vlib_frame_vector_args (frame);
1148 n_left_from = frame->n_vectors;
1149
1150 next_index = node->cached_next_index;
1151
1152 while (n_left_from > 0)
1153 {
1154 u32 n_left_to_next;
1155
1156 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1157
1158 while (n_left_from > 0 && n_left_to_next > 0)
1159 {
1160 vlib_buffer_t *b0;
1161 u32 bi0, lbi0, next0;
1162 const dpo_id_t *dpo0;
1163 const load_balance_t *lb0;
1164
1165 bi0 = from[0];
1166 to_next[0] = bi0;
1167 from += 1;
1168 to_next += 1;
1169 n_left_from -= 1;
1170 n_left_to_next -= 1;
1171
1172 b0 = vlib_get_buffer (vm, bi0);
1173
1174 /* lookup dst + src mac */
1175 lbi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
1176 lb0 = load_balance_get(lbi0);
1177
Neale Rannsd792d9c2017-10-21 10:53:20 -07001178 if (is_l2)
1179 {
1180 vnet_buffer(b0)->ip.flow_hash = l2_flow_hash(b0);
1181 }
1182 else
1183 {
1184 /* it's BIER */
1185 const bier_hdr_t *bh0 = vlib_buffer_get_current(b0);
Neale Rannsef90ed02018-09-13 08:45:12 -07001186 vnet_buffer(b0)->ip.flow_hash = bier_compute_flow_hash(bh0);
Neale Rannsd792d9c2017-10-21 10:53:20 -07001187 }
Neale Ranns5e575b12016-10-03 09:40:25 +01001188
Dave Barach8341f762020-06-03 08:05:15 -04001189 dpo0 = load_balance_get_bucket_i(lb0,
Neale Ranns5e575b12016-10-03 09:40:25 +01001190 vnet_buffer(b0)->ip.flow_hash &
1191 (lb0->lb_n_buckets_minus_1));
1192
1193 next0 = dpo0->dpoi_next_node;
1194 vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
1195
1196 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
1197 {
1198 load_balance_trace_t *tr = vlib_add_trace (vm, node, b0,
1199 sizeof (*tr));
1200 tr->lb_index = lbi0;
1201 }
1202 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1203 n_left_to_next, bi0, next0);
1204 }
1205
1206 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1207 }
1208
1209 return frame->n_vectors;
1210}
1211
Neale Rannsd792d9c2017-10-21 10:53:20 -07001212static uword
1213l2_load_balance (vlib_main_t * vm,
1214 vlib_node_runtime_t * node,
1215 vlib_frame_t * frame)
1216{
1217 return (load_balance_inline(vm, node, frame, 1));
1218}
1219
Neale Ranns5e575b12016-10-03 09:40:25 +01001220static u8 *
Florin Corasb69111e2017-02-13 23:55:27 -08001221format_l2_load_balance_trace (u8 * s, va_list * args)
Neale Ranns5e575b12016-10-03 09:40:25 +01001222{
1223 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1224 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1225 load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
1226
1227 s = format (s, "L2-load-balance: index %d", t->lb_index);
1228 return s;
1229}
1230
1231/**
1232 * @brief
1233 */
1234VLIB_REGISTER_NODE (l2_load_balance_node) = {
1235 .function = l2_load_balance,
1236 .name = "l2-load-balance",
1237 .vector_size = sizeof (u32),
1238
Florin Corasb69111e2017-02-13 23:55:27 -08001239 .format_trace = format_l2_load_balance_trace,
1240 .n_next_nodes = 1,
1241 .next_nodes = {
1242 [0] = "error-drop",
1243 },
1244};
1245
1246static uword
1247nsh_load_balance (vlib_main_t * vm,
1248 vlib_node_runtime_t * node,
1249 vlib_frame_t * frame)
1250{
1251 u32 n_left_from, next_index, *from, *to_next;
1252
1253 from = vlib_frame_vector_args (frame);
1254 n_left_from = frame->n_vectors;
1255
1256 next_index = node->cached_next_index;
1257
1258 while (n_left_from > 0)
1259 {
1260 u32 n_left_to_next;
1261
1262 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1263
1264 while (n_left_from > 0 && n_left_to_next > 0)
1265 {
1266 vlib_buffer_t *b0;
1267 u32 bi0, lbi0, next0, *nsh0;
1268 const dpo_id_t *dpo0;
1269 const load_balance_t *lb0;
1270
1271 bi0 = from[0];
1272 to_next[0] = bi0;
1273 from += 1;
1274 to_next += 1;
1275 n_left_from -= 1;
1276 n_left_to_next -= 1;
1277
1278 b0 = vlib_get_buffer (vm, bi0);
1279
1280 lbi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
1281 lb0 = load_balance_get(lbi0);
1282
1283 /* SPI + SI are the second word of the NSH header */
1284 nsh0 = vlib_buffer_get_current (b0);
1285 vnet_buffer(b0)->ip.flow_hash = nsh0[1] % lb0->lb_n_buckets;
1286
1287 dpo0 = load_balance_get_bucket_i(lb0,
1288 vnet_buffer(b0)->ip.flow_hash &
1289 (lb0->lb_n_buckets_minus_1));
1290
1291 next0 = dpo0->dpoi_next_node;
1292 vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
1293
1294 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
1295 {
1296 load_balance_trace_t *tr = vlib_add_trace (vm, node, b0,
1297 sizeof (*tr));
1298 tr->lb_index = lbi0;
1299 }
1300 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1301 n_left_to_next, bi0, next0);
1302 }
1303
1304 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1305 }
1306
1307 return frame->n_vectors;
1308}
1309
1310static u8 *
1311format_nsh_load_balance_trace (u8 * s, va_list * args)
1312{
1313 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1314 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1315 load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
1316
1317 s = format (s, "NSH-load-balance: index %d", t->lb_index);
1318 return s;
1319}
1320
1321/**
1322 * @brief
1323 */
1324VLIB_REGISTER_NODE (nsh_load_balance_node) = {
1325 .function = nsh_load_balance,
1326 .name = "nsh-load-balance",
1327 .vector_size = sizeof (u32),
1328
1329 .format_trace = format_nsh_load_balance_trace,
Neale Ranns5e575b12016-10-03 09:40:25 +01001330 .n_next_nodes = 1,
1331 .next_nodes = {
1332 [0] = "error-drop",
1333 },
1334};
Neale Rannsd792d9c2017-10-21 10:53:20 -07001335
1336static u8 *
1337format_bier_load_balance_trace (u8 * s, va_list * args)
1338{
1339 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1340 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1341 load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
1342
1343 s = format (s, "BIER-load-balance: index %d", t->lb_index);
1344 return s;
1345}
1346
1347static uword
1348bier_load_balance (vlib_main_t * vm,
1349 vlib_node_runtime_t * node,
1350 vlib_frame_t * frame)
1351{
1352 return (load_balance_inline(vm, node, frame, 0));
1353}
1354
1355/**
1356 * @brief
1357 */
1358VLIB_REGISTER_NODE (bier_load_balance_node) = {
1359 .function = bier_load_balance,
1360 .name = "bier-load-balance",
1361 .vector_size = sizeof (u32),
1362
1363 .format_trace = format_bier_load_balance_trace,
1364 .sibling_of = "mpls-load-balance",
1365};
Neale Ranns8f5fef22020-12-21 08:29:34 +00001366
1367// clang-format on