blob: fb876a09ec2af04e2c6e4b181b2459727b7e47a9 [file] [log] [blame]
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001/*
2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
Neale Ranns0bfe5d82016-08-25 15:29:12 +010016#include <vnet/dpo/load_balance.h>
17#include <vnet/dpo/load_balance_map.h>
18#include <vnet/dpo/drop_dpo.h>
19#include <vppinfra/math.h> /* for fabs */
20#include <vnet/adj/adj.h>
Neale Ranns0bfe5d82016-08-25 15:29:12 +010021#include <vnet/adj/adj_internal.h>
Neale Ranns3ee44042016-10-03 13:05:48 +010022#include <vnet/fib/fib_urpf_list.h>
Neale Rannsef90ed02018-09-13 08:45:12 -070023#include <vnet/bier/bier_fwd.h>
Neale Ranns023d23a2019-06-26 02:16:50 -070024#include <vnet/fib/mpls_fib.h>
Neale Rannse4031132020-10-26 13:00:06 +000025#include <vnet/ip/ip4_inlines.h>
26#include <vnet/ip/ip6_inlines.h>
Neale Ranns0bfe5d82016-08-25 15:29:12 +010027
28/*
29 * distribution error tolerance for load-balancing
30 */
31const f64 multipath_next_hop_error_tolerance = 0.1;
32
Neale Rannsac64b712018-10-08 14:51:11 +000033static const char *load_balance_attr_names[] = LOAD_BALANCE_ATTR_NAMES;
Neale Ranns0bfe5d82016-08-25 15:29:12 +010034
Neale Ranns710071b2018-09-24 12:36:26 +000035/**
36 * the logger
37 */
38vlib_log_class_t load_balance_logger;
39
Neale Ranns0bfe5d82016-08-25 15:29:12 +010040#define LB_DBG(_lb, _fmt, _args...) \
41{ \
Neale Ranns710071b2018-09-24 12:36:26 +000042 vlib_log_debug(load_balance_logger, \
43 "lb:[%U]:" _fmt, \
44 format_load_balance, load_balance_get_index(_lb), \
45 LOAD_BALANCE_FORMAT_NONE, \
46 ##_args); \
Neale Ranns0bfe5d82016-08-25 15:29:12 +010047}
Neale Ranns0bfe5d82016-08-25 15:29:12 +010048
49/**
50 * Pool of all DPOs. It's not static so the DP can have fast access
51 */
52load_balance_t *load_balance_pool;
53
54/**
55 * The one instance of load-balance main
56 */
Neale Ranns008dbe12018-09-07 09:32:36 -070057load_balance_main_t load_balance_main = {
58 .lbm_to_counters = {
59 .name = "route-to",
60 .stat_segment_name = "/net/route/to",
61 },
62 .lbm_via_counters = {
63 .name = "route-via",
64 .stat_segment_name = "/net/route/via",
65 }
66};
Neale Ranns0bfe5d82016-08-25 15:29:12 +010067
68f64
69load_balance_get_multipath_tolerance (void)
70{
71 return (multipath_next_hop_error_tolerance);
72}
73
74static inline index_t
75load_balance_get_index (const load_balance_t *lb)
76{
77 return (lb - load_balance_pool);
78}
79
80static inline dpo_id_t*
81load_balance_get_buckets (load_balance_t *lb)
82{
83 if (LB_HAS_INLINE_BUCKETS(lb))
84 {
85 return (lb->lb_buckets_inline);
86 }
87 else
88 {
89 return (lb->lb_buckets);
90 }
91}
92
93static load_balance_t *
94load_balance_alloc_i (void)
95{
96 load_balance_t *lb;
Dave Barach8341f762020-06-03 08:05:15 -040097 u8 need_barrier_sync = 0;
98 vlib_main_t *vm = vlib_get_main();
99 ASSERT (vm->thread_index == 0);
100
101 pool_get_aligned_will_expand (load_balance_pool, need_barrier_sync,
102 CLIB_CACHE_LINE_BYTES);
103 if (need_barrier_sync)
104 vlib_worker_thread_barrier_sync (vm);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100105
106 pool_get_aligned(load_balance_pool, lb, CLIB_CACHE_LINE_BYTES);
Dave Barachb7b92992018-10-17 10:38:51 -0400107 clib_memset(lb, 0, sizeof(*lb));
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100108
109 lb->lb_map = INDEX_INVALID;
Neale Ranns3ee44042016-10-03 13:05:48 +0100110 lb->lb_urpf = INDEX_INVALID;
Dave Barach8341f762020-06-03 08:05:15 -0400111
112 if (need_barrier_sync == 0)
113 {
114 need_barrier_sync += vlib_validate_combined_counter_will_expand
115 (&(load_balance_main.lbm_to_counters),
116 load_balance_get_index(lb));
117 need_barrier_sync += vlib_validate_combined_counter_will_expand
118 (&(load_balance_main.lbm_via_counters),
119 load_balance_get_index(lb));
120 if (need_barrier_sync)
121 vlib_worker_thread_barrier_sync (vm);
122 }
123
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100124 vlib_validate_combined_counter(&(load_balance_main.lbm_to_counters),
125 load_balance_get_index(lb));
126 vlib_validate_combined_counter(&(load_balance_main.lbm_via_counters),
127 load_balance_get_index(lb));
128 vlib_zero_combined_counter(&(load_balance_main.lbm_to_counters),
129 load_balance_get_index(lb));
130 vlib_zero_combined_counter(&(load_balance_main.lbm_via_counters),
131 load_balance_get_index(lb));
132
Dave Barach8341f762020-06-03 08:05:15 -0400133 if (need_barrier_sync)
134 vlib_worker_thread_barrier_release (vm);
135
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100136 return (lb);
137}
138
139static u8*
140load_balance_format (index_t lbi,
141 load_balance_format_flags_t flags,
142 u32 indent,
143 u8 *s)
144{
145 vlib_counter_t to, via;
146 load_balance_t *lb;
147 dpo_id_t *buckets;
148 u32 i;
149
150 lb = load_balance_get(lbi);
151 vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to);
152 vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via);
153 buckets = load_balance_get_buckets(lb);
154
155 s = format(s, "%U: ", format_dpo_type, DPO_LOAD_BALANCE);
Neale Rannsf12a83f2017-04-18 09:09:40 -0700156 s = format(s, "[proto:%U ", format_dpo_proto, lb->lb_proto);
157 s = format(s, "index:%d buckets:%d ", lbi, lb->lb_n_buckets);
Neale Ranns3ee44042016-10-03 13:05:48 +0100158 s = format(s, "uRPF:%d ", lb->lb_urpf);
Neale Rannsac64b712018-10-08 14:51:11 +0000159 if (lb->lb_flags)
160 {
161 load_balance_attr_t attr;
162
163 s = format(s, "flags:[");
164
165 FOR_EACH_LOAD_BALANCE_ATTR(attr)
166 {
167 if (lb->lb_flags & (1 << attr))
168 {
169 s = format (s, "%s", load_balance_attr_names[attr]);
170 }
171 }
172 s = format(s, "] ");
173 }
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100174 s = format(s, "to:[%Ld:%Ld]", to.packets, to.bytes);
175 if (0 != via.packets)
176 {
177 s = format(s, " via:[%Ld:%Ld]",
178 via.packets, via.bytes);
179 }
180 s = format(s, "]");
181
182 if (INDEX_INVALID != lb->lb_map)
183 {
184 s = format(s, "\n%U%U",
185 format_white_space, indent+4,
186 format_load_balance_map, lb->lb_map, indent+4);
187 }
188 for (i = 0; i < lb->lb_n_buckets; i++)
189 {
190 s = format(s, "\n%U[%d] %U",
191 format_white_space, indent+2,
192 i,
193 format_dpo_id,
194 &buckets[i], indent+6);
195 }
196 return (s);
197}
198
199u8*
200format_load_balance (u8 * s, va_list * args)
201{
Billy McFallcfcf1e22016-10-14 09:51:49 -0400202 index_t lbi = va_arg(*args, index_t);
203 load_balance_format_flags_t flags = va_arg(*args, load_balance_format_flags_t);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100204
205 return (load_balance_format(lbi, flags, 0, s));
206}
Neale Ranns710071b2018-09-24 12:36:26 +0000207
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100208static u8*
209format_load_balance_dpo (u8 * s, va_list * args)
210{
Billy McFallcfcf1e22016-10-14 09:51:49 -0400211 index_t lbi = va_arg(*args, index_t);
212 u32 indent = va_arg(*args, u32);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100213
214 return (load_balance_format(lbi, LOAD_BALANCE_FORMAT_DETAIL, indent, s));
215}
216
Neale Ranns023d23a2019-06-26 02:16:50 -0700217flow_hash_config_t
218load_balance_get_default_flow_hash (dpo_proto_t lb_proto)
219{
220 switch (lb_proto)
221 {
222 case DPO_PROTO_IP4:
223 case DPO_PROTO_IP6:
224 return (IP_FLOW_HASH_DEFAULT);
225
226 case DPO_PROTO_MPLS:
227 return (MPLS_FLOW_HASH_DEFAULT);
228
229 case DPO_PROTO_ETHERNET:
230 case DPO_PROTO_BIER:
231 case DPO_PROTO_NSH:
232 break;
233 }
234
235 return (0);
236}
237
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100238static load_balance_t *
239load_balance_create_i (u32 num_buckets,
240 dpo_proto_t lb_proto,
241 flow_hash_config_t fhc)
242{
243 load_balance_t *lb;
244
245 lb = load_balance_alloc_i();
246 lb->lb_hash_config = fhc;
247 lb->lb_n_buckets = num_buckets;
248 lb->lb_n_buckets_minus_1 = num_buckets-1;
249 lb->lb_proto = lb_proto;
250
251 if (!LB_HAS_INLINE_BUCKETS(lb))
252 {
253 vec_validate_aligned(lb->lb_buckets,
254 lb->lb_n_buckets - 1,
255 CLIB_CACHE_LINE_BYTES);
256 }
257
258 LB_DBG(lb, "create");
259
260 return (lb);
261}
262
263index_t
264load_balance_create (u32 n_buckets,
265 dpo_proto_t lb_proto,
266 flow_hash_config_t fhc)
267{
268 return (load_balance_get_index(load_balance_create_i(n_buckets, lb_proto, fhc)));
269}
270
271static inline void
272load_balance_set_bucket_i (load_balance_t *lb,
273 u32 bucket,
274 dpo_id_t *buckets,
275 const dpo_id_t *next)
276{
277 dpo_stack(DPO_LOAD_BALANCE, lb->lb_proto, &buckets[bucket], next);
278}
279
280void
281load_balance_set_bucket (index_t lbi,
282 u32 bucket,
283 const dpo_id_t *next)
284{
285 load_balance_t *lb;
286 dpo_id_t *buckets;
287
288 lb = load_balance_get(lbi);
289 buckets = load_balance_get_buckets(lb);
290
291 ASSERT(bucket < lb->lb_n_buckets);
292
293 load_balance_set_bucket_i(lb, bucket, buckets, next);
294}
295
296int
297load_balance_is_drop (const dpo_id_t *dpo)
298{
299 load_balance_t *lb;
300
301 if (DPO_LOAD_BALANCE != dpo->dpoi_type)
302 return (0);
303
304 lb = load_balance_get(dpo->dpoi_index);
305
306 if (1 == lb->lb_n_buckets)
307 {
308 return (dpo_is_drop(load_balance_get_bucket_i(lb, 0)));
309 }
310 return (0);
311}
312
Neale Ranns2303cb12018-02-21 04:57:17 -0800313u16
314load_balance_n_buckets (index_t lbi)
315{
316 load_balance_t *lb;
317
318 lb = load_balance_get(lbi);
319
320 return (lb->lb_n_buckets);
321}
322
Neale Ranns3ee44042016-10-03 13:05:48 +0100323void
Neale Ranns32e1c012016-11-22 17:07:28 +0000324load_balance_set_fib_entry_flags (index_t lbi,
325 fib_entry_flag_t flags)
326{
327 load_balance_t *lb;
328
329 lb = load_balance_get(lbi);
330 lb->lb_fib_entry_flags = flags;
331}
332
333
334void
Neale Ranns3ee44042016-10-03 13:05:48 +0100335load_balance_set_urpf (index_t lbi,
336 index_t urpf)
337{
338 load_balance_t *lb;
339 index_t old;
340
341 lb = load_balance_get(lbi);
342
343 /*
344 * packets in flight we see this change. but it's atomic, so :P
345 */
346 old = lb->lb_urpf;
347 lb->lb_urpf = urpf;
348
349 fib_urpf_list_unlock(old);
350 fib_urpf_list_lock(urpf);
351}
352
353index_t
354load_balance_get_urpf (index_t lbi)
355{
356 load_balance_t *lb;
357
358 lb = load_balance_get(lbi);
359
360 return (lb->lb_urpf);
361}
362
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100363const dpo_id_t *
364load_balance_get_bucket (index_t lbi,
365 u32 bucket)
366{
367 load_balance_t *lb;
368
369 lb = load_balance_get(lbi);
370
371 return (load_balance_get_bucket_i(lb, bucket));
372}
373
374static int
Neale Rannsc0790cf2017-01-05 01:01:47 -0800375next_hop_sort_by_weight (const load_balance_path_t * n1,
376 const load_balance_path_t * n2)
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100377{
378 return ((int) n1->path_weight - (int) n2->path_weight);
379}
380
381/* Given next hop vector is over-written with normalized one with sorted weights and
382 with weights corresponding to the number of adjacencies for each next hop.
383 Returns number of adjacencies in block. */
384u32
Neale Rannsc0790cf2017-01-05 01:01:47 -0800385ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100386 load_balance_path_t ** normalized_next_hops,
387 u32 *sum_weight_in,
388 f64 multipath_next_hop_error_tolerance)
389{
390 load_balance_path_t * nhs;
391 uword n_nhs, n_adj, n_adj_left, i, sum_weight;
392 f64 norm, error;
393
394 n_nhs = vec_len (raw_next_hops);
395 ASSERT (n_nhs > 0);
396 if (n_nhs == 0)
397 return 0;
398
399 /* Allocate enough space for 2 copies; we'll use second copy to save original weights. */
400 nhs = *normalized_next_hops;
401 vec_validate (nhs, 2*n_nhs - 1);
402
403 /* Fast path: 1 next hop in block. */
404 n_adj = n_nhs;
405 if (n_nhs == 1)
406 {
407 nhs[0] = raw_next_hops[0];
408 nhs[0].path_weight = 1;
409 _vec_len (nhs) = 1;
410 sum_weight = 1;
411 goto done;
412 }
413
414 else if (n_nhs == 2)
415 {
416 int cmp = next_hop_sort_by_weight (&raw_next_hops[0], &raw_next_hops[1]) < 0;
417
418 /* Fast sort. */
419 nhs[0] = raw_next_hops[cmp];
420 nhs[1] = raw_next_hops[cmp ^ 1];
421
422 /* Fast path: equal cost multipath with 2 next hops. */
423 if (nhs[0].path_weight == nhs[1].path_weight)
424 {
425 nhs[0].path_weight = nhs[1].path_weight = 1;
426 _vec_len (nhs) = 2;
427 sum_weight = 2;
428 goto done;
429 }
430 }
431 else
432 {
Dave Barach178cf492018-11-13 16:34:13 -0500433 clib_memcpy_fast (nhs, raw_next_hops, n_nhs * sizeof (raw_next_hops[0]));
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100434 qsort (nhs, n_nhs, sizeof (nhs[0]), (void *) next_hop_sort_by_weight);
435 }
436
437 /* Find total weight to normalize weights. */
438 sum_weight = 0;
439 for (i = 0; i < n_nhs; i++)
440 sum_weight += nhs[i].path_weight;
441
442 /* In the unlikely case that all weights are given as 0, set them all to 1. */
443 if (sum_weight == 0)
444 {
445 for (i = 0; i < n_nhs; i++)
446 nhs[i].path_weight = 1;
447 sum_weight = n_nhs;
448 }
449
450 /* Save copies of all next hop weights to avoid being overwritten in loop below. */
451 for (i = 0; i < n_nhs; i++)
452 nhs[n_nhs + i].path_weight = nhs[i].path_weight;
453
454 /* Try larger and larger power of 2 sized adjacency blocks until we
455 find one where traffic flows to within 1% of specified weights. */
456 for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
457 {
458 error = 0;
459
460 norm = n_adj / ((f64) sum_weight);
461 n_adj_left = n_adj;
462 for (i = 0; i < n_nhs; i++)
463 {
464 f64 nf = nhs[n_nhs + i].path_weight * norm; /* use saved weights */
465 word n = flt_round_nearest (nf);
466
467 n = n > n_adj_left ? n_adj_left : n;
468 n_adj_left -= n;
469 error += fabs (nf - n);
470 nhs[i].path_weight = n;
Neale Ranns0bd36ea2016-11-16 11:47:44 +0000471
472 if (0 == nhs[i].path_weight)
473 {
474 /*
475 * when the weight skew is high (norm is small) and n == nf.
476 * without this correction the path with a low weight would have
Lijian.Zhang33af8c12019-09-16 16:22:36 +0800477 * no representation in the load-balanace - don't want that.
Neale Ranns0bd36ea2016-11-16 11:47:44 +0000478 * If the weight skew is high so the load-balance has many buckets
479 * to allow it. pays ya money takes ya choice.
480 */
481 error = n_adj;
482 break;
483 }
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100484 }
485
486 nhs[0].path_weight += n_adj_left;
487
488 /* Less than 5% average error per adjacency with this size adjacency block? */
489 if (error <= multipath_next_hop_error_tolerance*n_adj)
490 {
491 /* Truncate any next hops with zero weight. */
492 _vec_len (nhs) = i;
493 break;
494 }
495 }
496
497done:
498 /* Save vector for next call. */
499 *normalized_next_hops = nhs;
500 *sum_weight_in = sum_weight;
501 return n_adj;
502}
503
504static load_balance_path_t *
Neale Rannsc0790cf2017-01-05 01:01:47 -0800505load_balance_multipath_next_hop_fixup (const load_balance_path_t *nhs,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100506 dpo_proto_t drop_proto)
507{
508 if (0 == vec_len(nhs))
509 {
Neale Rannsc0790cf2017-01-05 01:01:47 -0800510 load_balance_path_t *new_nhs = NULL, *nh;
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100511
512 /*
513 * we need something for the load-balance. so use the drop
514 */
Neale Rannsc0790cf2017-01-05 01:01:47 -0800515 vec_add2(new_nhs, nh, 1);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100516
517 nh->path_weight = 1;
518 dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
Neale Rannsc0790cf2017-01-05 01:01:47 -0800519
520 return (new_nhs);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100521 }
522
Neale Rannsc0790cf2017-01-05 01:01:47 -0800523 return (NULL);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100524}
525
526/*
527 * Fill in adjacencies in block based on corresponding
528 * next hop adjacencies.
529 */
530static void
Neale Rannsac64b712018-10-08 14:51:11 +0000531load_balance_fill_buckets_norm (load_balance_t *lb,
532 load_balance_path_t *nhs,
533 dpo_id_t *buckets,
534 u32 n_buckets)
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100535{
Neale Rannsac64b712018-10-08 14:51:11 +0000536 load_balance_path_t *nh;
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100537 u16 ii, bucket;
538
539 bucket = 0;
540
541 /*
542 * the next-hops have normalised weights. that means their sum is the number
543 * of buckets we need to fill.
544 */
545 vec_foreach (nh, nhs)
546 {
547 for (ii = 0; ii < nh->path_weight; ii++)
548 {
549 ASSERT(bucket < n_buckets);
550 load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo);
551 }
552 }
553}
Neale Rannsac64b712018-10-08 14:51:11 +0000554static void
555load_balance_fill_buckets_sticky (load_balance_t *lb,
556 load_balance_path_t *nhs,
557 dpo_id_t *buckets,
558 u32 n_buckets)
559{
560 load_balance_path_t *nh, *fwding_paths;
561 u16 ii, bucket, fpath;
562
563 fpath = bucket = 0;
564 fwding_paths = NULL;
565
566 vec_foreach (nh, nhs)
567 {
568 if (!dpo_is_drop(&nh->path_dpo))
569 {
570 vec_add1(fwding_paths, *nh);
571 }
572 }
573 if (vec_len(fwding_paths) == 0)
574 fwding_paths = vec_dup(nhs);
575
576 /*
577 * the next-hops have normalised weights. that means their sum is the number
578 * of buckets we need to fill.
579 */
580 vec_foreach (nh, nhs)
581 {
582 for (ii = 0; ii < nh->path_weight; ii++)
583 {
584 ASSERT(bucket < n_buckets);
585 if (!dpo_is_drop(&nh->path_dpo))
586 {
587 load_balance_set_bucket_i(lb, bucket++, buckets, &nh->path_dpo);
588 }
589 else
590 {
591 /* fill the bucks from the next up path */
592 load_balance_set_bucket_i(lb, bucket++, buckets, &fwding_paths[fpath].path_dpo);
593 fpath = (fpath + 1) % vec_len(fwding_paths);
594 }
595 }
596 }
597
598 vec_free(fwding_paths);
599}
600
601static void
602load_balance_fill_buckets (load_balance_t *lb,
603 load_balance_path_t *nhs,
604 dpo_id_t *buckets,
605 u32 n_buckets,
606 load_balance_flags_t flags)
607{
608 if (flags & LOAD_BALANCE_FLAG_STICKY)
609 {
610 load_balance_fill_buckets_sticky(lb, nhs, buckets, n_buckets);
611 }
612 else
613 {
614 load_balance_fill_buckets_norm(lb, nhs, buckets, n_buckets);
615 }
616}
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100617
618static inline void
619load_balance_set_n_buckets (load_balance_t *lb,
620 u32 n_buckets)
621{
622 lb->lb_n_buckets = n_buckets;
623 lb->lb_n_buckets_minus_1 = n_buckets-1;
624}
625
626void
627load_balance_multipath_update (const dpo_id_t *dpo,
Neale Rannsc0790cf2017-01-05 01:01:47 -0800628 const load_balance_path_t * raw_nhs,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100629 load_balance_flags_t flags)
630{
Neale Rannsc0790cf2017-01-05 01:01:47 -0800631 load_balance_path_t *nh, *nhs, *fixed_nhs;
632 u32 sum_of_weights, n_buckets, ii;
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100633 index_t lbmi, old_lbmi;
634 load_balance_t *lb;
635 dpo_id_t *tmp_dpo;
636
637 nhs = NULL;
638
639 ASSERT(DPO_LOAD_BALANCE == dpo->dpoi_type);
640 lb = load_balance_get(dpo->dpoi_index);
Neale Rannsac64b712018-10-08 14:51:11 +0000641 lb->lb_flags = flags;
Neale Rannsc0790cf2017-01-05 01:01:47 -0800642 fixed_nhs = load_balance_multipath_next_hop_fixup(raw_nhs, lb->lb_proto);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100643 n_buckets =
Neale Rannsc0790cf2017-01-05 01:01:47 -0800644 ip_multipath_normalize_next_hops((NULL == fixed_nhs ?
645 raw_nhs :
646 fixed_nhs),
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100647 &nhs,
648 &sum_of_weights,
649 multipath_next_hop_error_tolerance);
650
Neale Rannsc0790cf2017-01-05 01:01:47 -0800651 ASSERT (n_buckets >= vec_len (raw_nhs));
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100652
653 /*
654 * Save the old load-balance map used, and get a new one if required.
655 */
656 old_lbmi = lb->lb_map;
657 if (flags & LOAD_BALANCE_FLAG_USES_MAP)
658 {
659 lbmi = load_balance_map_add_or_lock(n_buckets, sum_of_weights, nhs);
660 }
661 else
662 {
663 lbmi = INDEX_INVALID;
664 }
665
666 if (0 == lb->lb_n_buckets)
667 {
668 /*
669 * first time initialisation. no packets inflight, so we can write
670 * at leisure.
671 */
672 load_balance_set_n_buckets(lb, n_buckets);
673
674 if (!LB_HAS_INLINE_BUCKETS(lb))
675 vec_validate_aligned(lb->lb_buckets,
676 lb->lb_n_buckets - 1,
677 CLIB_CACHE_LINE_BYTES);
678
679 load_balance_fill_buckets(lb, nhs,
680 load_balance_get_buckets(lb),
Neale Rannsac64b712018-10-08 14:51:11 +0000681 n_buckets, flags);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100682 lb->lb_map = lbmi;
683 }
684 else
685 {
686 /*
687 * This is a modification of an existing load-balance.
688 * We need to ensure that packets inflight see a consistent state, that
689 * is the number of reported buckets the LB has (read from
690 * lb_n_buckets_minus_1) is not more than it actually has. So if the
691 * number of buckets is increasing, we must update the bucket array first,
692 * then the reported number. vice-versa if the number of buckets goes down.
693 */
694 if (n_buckets == lb->lb_n_buckets)
695 {
696 /*
697 * no change in the number of buckets. we can simply fill what
698 * is new over what is old.
699 */
700 load_balance_fill_buckets(lb, nhs,
701 load_balance_get_buckets(lb),
Neale Rannsac64b712018-10-08 14:51:11 +0000702 n_buckets, flags);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100703 lb->lb_map = lbmi;
704 }
705 else if (n_buckets > lb->lb_n_buckets)
706 {
707 /*
708 * we have more buckets. the old load-balance map (if there is one)
709 * will remain valid, i.e. mapping to indices within range, so we
710 * update it last.
711 */
712 if (n_buckets > LB_NUM_INLINE_BUCKETS &&
713 lb->lb_n_buckets <= LB_NUM_INLINE_BUCKETS)
714 {
715 /*
716 * the new increased number of buckets is crossing the threshold
717 * from the inline storage to out-line. Alloc the outline buckets
718 * first, then fixup the number. then reset the inlines.
719 */
720 ASSERT(NULL == lb->lb_buckets);
721 vec_validate_aligned(lb->lb_buckets,
722 n_buckets - 1,
723 CLIB_CACHE_LINE_BYTES);
724
725 load_balance_fill_buckets(lb, nhs,
726 lb->lb_buckets,
Neale Rannsac64b712018-10-08 14:51:11 +0000727 n_buckets, flags);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100728 CLIB_MEMORY_BARRIER();
729 load_balance_set_n_buckets(lb, n_buckets);
730
731 CLIB_MEMORY_BARRIER();
732
733 for (ii = 0; ii < LB_NUM_INLINE_BUCKETS; ii++)
734 {
735 dpo_reset(&lb->lb_buckets_inline[ii]);
736 }
737 }
738 else
739 {
Neale Ranns0bd36ea2016-11-16 11:47:44 +0000740 if (n_buckets <= LB_NUM_INLINE_BUCKETS)
741 {
742 /*
743 * we are not crossing the threshold and it's still inline buckets.
744 * we can write the new on the old..
745 */
746 load_balance_fill_buckets(lb, nhs,
747 load_balance_get_buckets(lb),
Neale Rannsac64b712018-10-08 14:51:11 +0000748 n_buckets, flags);
Neale Ranns0bd36ea2016-11-16 11:47:44 +0000749 CLIB_MEMORY_BARRIER();
750 load_balance_set_n_buckets(lb, n_buckets);
751 }
752 else
753 {
754 /*
755 * we are not crossing the threshold. We need a new bucket array to
756 * hold the increased number of choices.
757 */
758 dpo_id_t *new_buckets, *old_buckets, *tmp_dpo;
759
760 new_buckets = NULL;
761 old_buckets = load_balance_get_buckets(lb);
762
763 vec_validate_aligned(new_buckets,
764 n_buckets - 1,
765 CLIB_CACHE_LINE_BYTES);
766
Neale Rannsac64b712018-10-08 14:51:11 +0000767 load_balance_fill_buckets(lb, nhs, new_buckets,
768 n_buckets, flags);
Neale Ranns0bd36ea2016-11-16 11:47:44 +0000769 CLIB_MEMORY_BARRIER();
770 lb->lb_buckets = new_buckets;
771 CLIB_MEMORY_BARRIER();
772 load_balance_set_n_buckets(lb, n_buckets);
773
774 vec_foreach(tmp_dpo, old_buckets)
775 {
776 dpo_reset(tmp_dpo);
777 }
778 vec_free(old_buckets);
779 }
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100780 }
781
782 /*
783 * buckets fixed. ready for the MAP update.
784 */
785 lb->lb_map = lbmi;
786 }
787 else
788 {
789 /*
790 * bucket size shrinkage.
791 * Any map we have will be based on the old
792 * larger number of buckets, so will be translating to indices
793 * out of range. So the new MAP must be installed first.
794 */
795 lb->lb_map = lbmi;
796 CLIB_MEMORY_BARRIER();
797
798
799 if (n_buckets <= LB_NUM_INLINE_BUCKETS &&
800 lb->lb_n_buckets > LB_NUM_INLINE_BUCKETS)
801 {
802 /*
803 * the new decreased number of buckets is crossing the threshold
804 * from out-line storage to inline:
805 * 1 - Fill the inline buckets,
806 * 2 - fixup the number (and this point the inline buckets are
807 * used).
808 * 3 - free the outline buckets
809 */
810 load_balance_fill_buckets(lb, nhs,
811 lb->lb_buckets_inline,
Neale Rannsac64b712018-10-08 14:51:11 +0000812 n_buckets, flags);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100813 CLIB_MEMORY_BARRIER();
814 load_balance_set_n_buckets(lb, n_buckets);
815 CLIB_MEMORY_BARRIER();
816
817 vec_foreach(tmp_dpo, lb->lb_buckets)
818 {
819 dpo_reset(tmp_dpo);
820 }
821 vec_free(lb->lb_buckets);
822 }
823 else
824 {
825 /*
826 * not crossing the threshold.
827 * 1 - update the number to the smaller size
828 * 2 - write the new buckets
829 * 3 - reset those no longer used.
830 */
831 dpo_id_t *buckets;
832 u32 old_n_buckets;
833
834 old_n_buckets = lb->lb_n_buckets;
835 buckets = load_balance_get_buckets(lb);
836
837 load_balance_set_n_buckets(lb, n_buckets);
838 CLIB_MEMORY_BARRIER();
839
Neale Rannsac64b712018-10-08 14:51:11 +0000840 load_balance_fill_buckets(lb, nhs, buckets,
841 n_buckets, flags);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100842
Neale Ranns32e1c012016-11-22 17:07:28 +0000843 for (ii = n_buckets; ii < old_n_buckets; ii++)
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100844 {
845 dpo_reset(&buckets[ii]);
846 }
847 }
848 }
849 }
850
851 vec_foreach (nh, nhs)
852 {
853 dpo_reset(&nh->path_dpo);
854 }
Neale Ranns33a7dd52016-10-07 15:14:33 +0100855 vec_free(nhs);
Neale Rannsc0790cf2017-01-05 01:01:47 -0800856 vec_free(fixed_nhs);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100857
858 load_balance_map_unlock(old_lbmi);
859}
860
861static void
862load_balance_lock (dpo_id_t *dpo)
863{
864 load_balance_t *lb;
865
866 lb = load_balance_get(dpo->dpoi_index);
867
868 lb->lb_locks++;
869}
870
871static void
872load_balance_destroy (load_balance_t *lb)
873{
874 dpo_id_t *buckets;
875 int i;
876
877 buckets = load_balance_get_buckets(lb);
878
879 for (i = 0; i < lb->lb_n_buckets; i++)
880 {
881 dpo_reset(&buckets[i]);
882 }
883
884 LB_DBG(lb, "destroy");
885 if (!LB_HAS_INLINE_BUCKETS(lb))
886 {
887 vec_free(lb->lb_buckets);
888 }
889
Neale Ranns3ee44042016-10-03 13:05:48 +0100890 fib_urpf_list_unlock(lb->lb_urpf);
891 load_balance_map_unlock(lb->lb_map);
892
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100893 pool_put(load_balance_pool, lb);
894}
895
896static void
897load_balance_unlock (dpo_id_t *dpo)
898{
899 load_balance_t *lb;
900
901 lb = load_balance_get(dpo->dpoi_index);
902
903 lb->lb_locks--;
904
905 if (0 == lb->lb_locks)
906 {
907 load_balance_destroy(lb);
908 }
909}
910
Neale Ranns6c3ebcc2016-10-02 21:20:15 +0100911static void
912load_balance_mem_show (void)
913{
914 fib_show_memory_usage("load-balance",
915 pool_elts(load_balance_pool),
916 pool_len(load_balance_pool),
917 sizeof(load_balance_t));
Neale Ranns3ee44042016-10-03 13:05:48 +0100918 load_balance_map_show_mem();
Neale Ranns6c3ebcc2016-10-02 21:20:15 +0100919}
920
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100921const static dpo_vft_t lb_vft = {
922 .dv_lock = load_balance_lock,
923 .dv_unlock = load_balance_unlock,
924 .dv_format = format_load_balance_dpo,
Neale Ranns6c3ebcc2016-10-02 21:20:15 +0100925 .dv_mem_show = load_balance_mem_show,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100926};
927
928/**
929 * @brief The per-protocol VLIB graph nodes that are assigned to a load-balance
930 * object.
931 *
932 * this means that these graph nodes are ones from which a load-balance is the
933 * parent object in the DPO-graph.
934 *
935 * We do not list all the load-balance nodes, such as the *-lookup. instead
936 * we are relying on the correct use of the .sibling_of field when setting
937 * up these sibling nodes.
938 */
939const static char* const load_balance_ip4_nodes[] =
940{
941 "ip4-load-balance",
942 NULL,
943};
944const static char* const load_balance_ip6_nodes[] =
945{
946 "ip6-load-balance",
947 NULL,
948};
949const static char* const load_balance_mpls_nodes[] =
950{
951 "mpls-load-balance",
952 NULL,
953};
Neale Ranns5e575b12016-10-03 09:40:25 +0100954const static char* const load_balance_l2_nodes[] =
955{
956 "l2-load-balance",
957 NULL,
958};
Florin Corasb69111e2017-02-13 23:55:27 -0800959const static char* const load_balance_nsh_nodes[] =
960{
961 "nsh-load-balance",
Gabriel Ganne88fd5042017-11-14 16:07:34 +0100962 NULL
Neale Rannsd792d9c2017-10-21 10:53:20 -0700963};
964const static char* const load_balance_bier_nodes[] =
965{
966 "bier-load-balance",
Florin Corasb69111e2017-02-13 23:55:27 -0800967 NULL,
968};
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100969const static char* const * const load_balance_nodes[DPO_PROTO_NUM] =
970{
971 [DPO_PROTO_IP4] = load_balance_ip4_nodes,
972 [DPO_PROTO_IP6] = load_balance_ip6_nodes,
973 [DPO_PROTO_MPLS] = load_balance_mpls_nodes,
Neale Ranns5e575b12016-10-03 09:40:25 +0100974 [DPO_PROTO_ETHERNET] = load_balance_l2_nodes,
Florin Corasb69111e2017-02-13 23:55:27 -0800975 [DPO_PROTO_NSH] = load_balance_nsh_nodes,
Neale Rannsd792d9c2017-10-21 10:53:20 -0700976 [DPO_PROTO_BIER] = load_balance_bier_nodes,
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100977};
978
979void
980load_balance_module_init (void)
981{
Neale Rannsa3af3372017-03-28 03:49:52 -0700982 index_t lbi;
983
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100984 dpo_register(DPO_LOAD_BALANCE, &lb_vft, load_balance_nodes);
985
Neale Ranns04a75e32017-03-23 06:46:01 -0700986 /*
987 * Special LB with index zero. we need to define this since the v4 mtrie
988 * assumes an index of 0 implies the ply is empty. therefore all 'real'
989 * adjs need a non-zero index.
Neale Rannsa3af3372017-03-28 03:49:52 -0700990 * This should never be used, but just in case, stack it on a drop.
Neale Ranns04a75e32017-03-23 06:46:01 -0700991 */
Neale Rannsa3af3372017-03-28 03:49:52 -0700992 lbi = load_balance_create(1, DPO_PROTO_IP4, 0);
993 load_balance_set_bucket(lbi, 0, drop_dpo_get(DPO_PROTO_IP4));
Neale Ranns04a75e32017-03-23 06:46:01 -0700994
Neale Ranns710071b2018-09-24 12:36:26 +0000995 load_balance_logger =
996 vlib_log_register_class("dpo", "load-balance");
997
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100998 load_balance_map_module_init();
999}
1000
1001static clib_error_t *
1002load_balance_show (vlib_main_t * vm,
1003 unformat_input_t * input,
1004 vlib_cli_command_t * cmd)
1005{
1006 index_t lbi = INDEX_INVALID;
1007
1008 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1009 {
1010 if (unformat (input, "%d", &lbi))
1011 ;
1012 else
1013 break;
1014 }
1015
1016 if (INDEX_INVALID != lbi)
1017 {
Neale Rannsac64b712018-10-08 14:51:11 +00001018 if (pool_is_free_index(load_balance_pool, lbi))
1019 {
1020 vlib_cli_output (vm, "no such load-balance:%d", lbi);
1021 }
1022 else
1023 {
1024 vlib_cli_output (vm, "%U", format_load_balance, lbi,
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001025 LOAD_BALANCE_FORMAT_DETAIL);
Neale Rannsac64b712018-10-08 14:51:11 +00001026 }
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001027 }
1028 else
1029 {
1030 load_balance_t *lb;
1031
Damjan Marionb2c31b62020-12-13 21:47:40 +01001032 pool_foreach (lb, load_balance_pool)
1033 {
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001034 vlib_cli_output (vm, "%U", format_load_balance,
1035 load_balance_get_index(lb),
1036 LOAD_BALANCE_FORMAT_NONE);
Damjan Marionb2c31b62020-12-13 21:47:40 +01001037 }
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001038 }
1039
1040 return 0;
1041}
1042
1043VLIB_CLI_COMMAND (load_balance_show_command, static) = {
1044 .path = "show load-balance",
1045 .short_help = "show load-balance [<index>]",
1046 .function = load_balance_show,
1047};
Neale Ranns5e575b12016-10-03 09:40:25 +01001048
1049
1050always_inline u32
1051ip_flow_hash (void *data)
1052{
1053 ip4_header_t *iph = (ip4_header_t *) data;
1054
1055 if ((iph->ip_version_and_header_length & 0xF0) == 0x40)
1056 return ip4_compute_flow_hash (iph, IP_FLOW_HASH_DEFAULT);
1057 else
1058 return ip6_compute_flow_hash ((ip6_header_t *) iph, IP_FLOW_HASH_DEFAULT);
1059}
1060
1061always_inline u64
1062mac_to_u64 (u8 * m)
1063{
1064 return (*((u64 *) m) & 0xffffffffffff);
1065}
1066
1067always_inline u32
1068l2_flow_hash (vlib_buffer_t * b0)
1069{
1070 ethernet_header_t *eh;
1071 u64 a, b, c;
1072 uword is_ip, eh_size;
1073 u16 eh_type;
1074
1075 eh = vlib_buffer_get_current (b0);
1076 eh_type = clib_net_to_host_u16 (eh->type);
1077 eh_size = ethernet_buffer_header_size (b0);
1078
1079 is_ip = (eh_type == ETHERNET_TYPE_IP4 || eh_type == ETHERNET_TYPE_IP6);
1080
1081 /* since we have 2 cache lines, use them */
1082 if (is_ip)
1083 a = ip_flow_hash ((u8 *) vlib_buffer_get_current (b0) + eh_size);
1084 else
1085 a = eh->type;
1086
1087 b = mac_to_u64 ((u8 *) eh->dst_address);
1088 c = mac_to_u64 ((u8 *) eh->src_address);
1089 hash_mix64 (a, b, c);
1090
1091 return (u32) c;
1092}
1093
1094typedef struct load_balance_trace_t_
1095{
1096 index_t lb_index;
1097} load_balance_trace_t;
1098
Neale Rannsd792d9c2017-10-21 10:53:20 -07001099always_inline uword
1100load_balance_inline (vlib_main_t * vm,
1101 vlib_node_runtime_t * node,
1102 vlib_frame_t * frame,
1103 int is_l2)
Neale Ranns5e575b12016-10-03 09:40:25 +01001104{
1105 u32 n_left_from, next_index, *from, *to_next;
1106
1107 from = vlib_frame_vector_args (frame);
1108 n_left_from = frame->n_vectors;
1109
1110 next_index = node->cached_next_index;
1111
1112 while (n_left_from > 0)
1113 {
1114 u32 n_left_to_next;
1115
1116 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1117
1118 while (n_left_from > 0 && n_left_to_next > 0)
1119 {
1120 vlib_buffer_t *b0;
1121 u32 bi0, lbi0, next0;
1122 const dpo_id_t *dpo0;
1123 const load_balance_t *lb0;
1124
1125 bi0 = from[0];
1126 to_next[0] = bi0;
1127 from += 1;
1128 to_next += 1;
1129 n_left_from -= 1;
1130 n_left_to_next -= 1;
1131
1132 b0 = vlib_get_buffer (vm, bi0);
1133
1134 /* lookup dst + src mac */
1135 lbi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
1136 lb0 = load_balance_get(lbi0);
1137
Neale Rannsd792d9c2017-10-21 10:53:20 -07001138 if (is_l2)
1139 {
1140 vnet_buffer(b0)->ip.flow_hash = l2_flow_hash(b0);
1141 }
1142 else
1143 {
1144 /* it's BIER */
1145 const bier_hdr_t *bh0 = vlib_buffer_get_current(b0);
Neale Rannsef90ed02018-09-13 08:45:12 -07001146 vnet_buffer(b0)->ip.flow_hash = bier_compute_flow_hash(bh0);
Neale Rannsd792d9c2017-10-21 10:53:20 -07001147 }
Neale Ranns5e575b12016-10-03 09:40:25 +01001148
Dave Barach8341f762020-06-03 08:05:15 -04001149 dpo0 = load_balance_get_bucket_i(lb0,
Neale Ranns5e575b12016-10-03 09:40:25 +01001150 vnet_buffer(b0)->ip.flow_hash &
1151 (lb0->lb_n_buckets_minus_1));
1152
1153 next0 = dpo0->dpoi_next_node;
1154 vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
1155
1156 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
1157 {
1158 load_balance_trace_t *tr = vlib_add_trace (vm, node, b0,
1159 sizeof (*tr));
1160 tr->lb_index = lbi0;
1161 }
1162 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1163 n_left_to_next, bi0, next0);
1164 }
1165
1166 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1167 }
1168
1169 return frame->n_vectors;
1170}
1171
Neale Rannsd792d9c2017-10-21 10:53:20 -07001172static uword
1173l2_load_balance (vlib_main_t * vm,
1174 vlib_node_runtime_t * node,
1175 vlib_frame_t * frame)
1176{
1177 return (load_balance_inline(vm, node, frame, 1));
1178}
1179
Neale Ranns5e575b12016-10-03 09:40:25 +01001180static u8 *
Florin Corasb69111e2017-02-13 23:55:27 -08001181format_l2_load_balance_trace (u8 * s, va_list * args)
Neale Ranns5e575b12016-10-03 09:40:25 +01001182{
1183 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1184 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1185 load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
1186
1187 s = format (s, "L2-load-balance: index %d", t->lb_index);
1188 return s;
1189}
1190
1191/**
1192 * @brief
1193 */
1194VLIB_REGISTER_NODE (l2_load_balance_node) = {
1195 .function = l2_load_balance,
1196 .name = "l2-load-balance",
1197 .vector_size = sizeof (u32),
1198
Florin Corasb69111e2017-02-13 23:55:27 -08001199 .format_trace = format_l2_load_balance_trace,
1200 .n_next_nodes = 1,
1201 .next_nodes = {
1202 [0] = "error-drop",
1203 },
1204};
1205
1206static uword
1207nsh_load_balance (vlib_main_t * vm,
1208 vlib_node_runtime_t * node,
1209 vlib_frame_t * frame)
1210{
1211 u32 n_left_from, next_index, *from, *to_next;
1212
1213 from = vlib_frame_vector_args (frame);
1214 n_left_from = frame->n_vectors;
1215
1216 next_index = node->cached_next_index;
1217
1218 while (n_left_from > 0)
1219 {
1220 u32 n_left_to_next;
1221
1222 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1223
1224 while (n_left_from > 0 && n_left_to_next > 0)
1225 {
1226 vlib_buffer_t *b0;
1227 u32 bi0, lbi0, next0, *nsh0;
1228 const dpo_id_t *dpo0;
1229 const load_balance_t *lb0;
1230
1231 bi0 = from[0];
1232 to_next[0] = bi0;
1233 from += 1;
1234 to_next += 1;
1235 n_left_from -= 1;
1236 n_left_to_next -= 1;
1237
1238 b0 = vlib_get_buffer (vm, bi0);
1239
1240 lbi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
1241 lb0 = load_balance_get(lbi0);
1242
1243 /* SPI + SI are the second word of the NSH header */
1244 nsh0 = vlib_buffer_get_current (b0);
1245 vnet_buffer(b0)->ip.flow_hash = nsh0[1] % lb0->lb_n_buckets;
1246
1247 dpo0 = load_balance_get_bucket_i(lb0,
1248 vnet_buffer(b0)->ip.flow_hash &
1249 (lb0->lb_n_buckets_minus_1));
1250
1251 next0 = dpo0->dpoi_next_node;
1252 vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
1253
1254 if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
1255 {
1256 load_balance_trace_t *tr = vlib_add_trace (vm, node, b0,
1257 sizeof (*tr));
1258 tr->lb_index = lbi0;
1259 }
1260 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1261 n_left_to_next, bi0, next0);
1262 }
1263
1264 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1265 }
1266
1267 return frame->n_vectors;
1268}
1269
1270static u8 *
1271format_nsh_load_balance_trace (u8 * s, va_list * args)
1272{
1273 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1274 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1275 load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
1276
1277 s = format (s, "NSH-load-balance: index %d", t->lb_index);
1278 return s;
1279}
1280
1281/**
1282 * @brief
1283 */
1284VLIB_REGISTER_NODE (nsh_load_balance_node) = {
1285 .function = nsh_load_balance,
1286 .name = "nsh-load-balance",
1287 .vector_size = sizeof (u32),
1288
1289 .format_trace = format_nsh_load_balance_trace,
Neale Ranns5e575b12016-10-03 09:40:25 +01001290 .n_next_nodes = 1,
1291 .next_nodes = {
1292 [0] = "error-drop",
1293 },
1294};
Neale Rannsd792d9c2017-10-21 10:53:20 -07001295
1296static u8 *
1297format_bier_load_balance_trace (u8 * s, va_list * args)
1298{
1299 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1300 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1301 load_balance_trace_t *t = va_arg (*args, load_balance_trace_t *);
1302
1303 s = format (s, "BIER-load-balance: index %d", t->lb_index);
1304 return s;
1305}
1306
1307static uword
1308bier_load_balance (vlib_main_t * vm,
1309 vlib_node_runtime_t * node,
1310 vlib_frame_t * frame)
1311{
1312 return (load_balance_inline(vm, node, frame, 0));
1313}
1314
1315/**
1316 * @brief
1317 */
1318VLIB_REGISTER_NODE (bier_load_balance_node) = {
1319 .function = bier_load_balance,
1320 .name = "bier-load-balance",
1321 .vector_size = sizeof (u32),
1322
1323 .format_trace = format_bier_load_balance_trace,
1324 .sibling_of = "mpls-load-balance",
1325};