blob: 55249747e5da2282cccd3ce659ff0185456b0e2c [file] [log] [blame]
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001/*
2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/**
16 * @brief
17 */
18#include <vnet/fib/fib_path.h>
19#include <vnet/fib/fib_node_list.h>
20#include <vnet/dpo/load_balance_map.h>
21#include <vnet/dpo/load_balance.h>
22
23/**
24 * A hash-table of load-balance maps by path index.
25 * this provides the fast lookup of the LB map when a path goes down
26 */
27static uword *lb_maps_by_path_index;
28
29/**
30 * A hash-table of load-balance maps by set of paths.
31 * This provides the LB map sharing.
32 * LB maps do not necessarily use all the paths in the list, since
33 * the entry that is requesting the map, may not have an out-going
34 * label for each of the paths.
35 */
36static uword *load_balance_map_db;
37
38typedef enum load_balance_map_path_flags_t_
39{
40 LOAD_BALANCE_MAP_PATH_UP = (1 << 0),
41 LOAD_BALANCE_MAP_PATH_USABLE = (1 << 1),
42} __attribute__ ((packed)) load_balance_map_path_flags_t;
43
44typedef struct load_balance_map_path_t_ {
45 /**
46 * Index of the path
47 */
48 fib_node_index_t lbmp_index;
49
50 /**
51 * Sibling Index in the list of all maps with this path index
52 */
53 fib_node_index_t lbmp_sibling;
54
55 /**
56 * the normalised wegiht of the path
57 */
58 u32 lbmp_weight;
59
60 /**
61 * The sate of the path
62 */
63 load_balance_map_path_flags_t lbmp_flags;
64} load_balance_map_path_t;
65
66/**
67 * The global pool of LB maps
68 */
69load_balance_map_t *load_balance_map_pool;
70
Neale Ranns710071b2018-09-24 12:36:26 +000071/**
72 * the logger
73 */
74vlib_log_class_t load_balance_map_logger;
75
Neale Ranns0bfe5d82016-08-25 15:29:12 +010076/*
77 * Debug macro
78 */
Neale Ranns710071b2018-09-24 12:36:26 +000079#define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...) \
80{ \
81 vlib_log_debug(load_balance_map_logger, \
82 "lbm:" _fmt, \
83 ##_args); \
84}
Neale Ranns0bfe5d82016-08-25 15:29:12 +010085
86static index_t
87load_balance_map_get_index (load_balance_map_t *lbm)
88{
89 return (lbm - load_balance_map_pool);
90}
91
92u8*
Christophe Fontained3c008d2017-10-02 18:10:54 +020093format_load_balance_map (u8 *s, va_list * ap)
Neale Ranns0bfe5d82016-08-25 15:29:12 +010094{
Christophe Fontained3c008d2017-10-02 18:10:54 +020095 index_t lbmi = va_arg(*ap, index_t);
96 u32 indent = va_arg(*ap, u32);
Neale Ranns0bfe5d82016-08-25 15:29:12 +010097 load_balance_map_t *lbm;
98 u32 n_buckets, ii;
99
100 lbm = load_balance_map_get(lbmi);
101 n_buckets = vec_len(lbm->lbm_buckets);
102
103 s = format(s, "load-balance-map: index:%d buckets:%d", lbmi, n_buckets);
104 s = format(s, "\n%U index:", format_white_space, indent+2);
105 for (ii = 0; ii < n_buckets; ii++)
106 {
107 s = format(s, "%5d", ii);
108 }
109 s = format(s, "\n%U map:", format_white_space, indent+2);
110 for (ii = 0; ii < n_buckets; ii++)
111 {
112 s = format(s, "%5d", lbm->lbm_buckets[ii]);
113 }
114
115 return (s);
116}
117
118
119static uword
120load_balance_map_hash (load_balance_map_t *lbm)
121{
122 u32 old_lbm_hash, new_lbm_hash, hash;
123 load_balance_map_path_t *lb_path;
124
125 new_lbm_hash = old_lbm_hash = vec_len(lbm->lbm_paths);
126
127 vec_foreach (lb_path, lbm->lbm_paths)
128 {
129 hash = lb_path->lbmp_index;
130 hash_mix32(hash, old_lbm_hash, new_lbm_hash);
131 }
132
133 return (new_lbm_hash);
134}
135
136always_inline uword
137load_balance_map_db_hash_key_from_index (uword index)
138{
139 return 1 + 2*index;
140}
141
142always_inline uword
143load_balance_map_db_hash_key_is_index (uword key)
144{
145 return key & 1;
146}
147
148always_inline uword
149load_balance_map_db_hash_key_2_index (uword key)
150{
151 ASSERT (load_balance_map_db_hash_key_is_index (key));
152 return key / 2;
153}
154
155static load_balance_map_t*
156load_balance_map_db_get_from_hash_key (uword key)
157{
158 load_balance_map_t *lbm;
159
160 if (load_balance_map_db_hash_key_is_index (key))
161 {
162 index_t lbm_index;
163
164 lbm_index = load_balance_map_db_hash_key_2_index(key);
165 lbm = load_balance_map_get(lbm_index);
166 }
167 else
168 {
169 lbm = uword_to_pointer (key, load_balance_map_t *);
170 }
171
172 return (lbm);
173}
174
175static uword
176load_balance_map_db_hash_key_sum (hash_t * h,
177 uword key)
178{
179 load_balance_map_t *lbm;
180
181 lbm = load_balance_map_db_get_from_hash_key(key);
182
183 return (load_balance_map_hash(lbm));
184}
185
186static uword
187load_balance_map_db_hash_key_equal (hash_t * h,
188 uword key1,
189 uword key2)
190{
191 load_balance_map_t *lbm1, *lbm2;
192
193 lbm1 = load_balance_map_db_get_from_hash_key(key1);
194 lbm2 = load_balance_map_db_get_from_hash_key(key2);
195
196 return (load_balance_map_hash(lbm1) ==
197 load_balance_map_hash(lbm2));
198}
199
200static index_t
201load_balance_map_db_find (load_balance_map_t *lbm)
202{
203 uword *p;
204
205 p = hash_get(load_balance_map_db, lbm);
206
207 if (NULL != p)
208 {
209 return p[0];
210 }
211
212 return (FIB_NODE_INDEX_INVALID);
213}
214
215static void
216load_balance_map_db_insert (load_balance_map_t *lbm)
217{
218 load_balance_map_path_t *lbmp;
219 fib_node_list_t list;
220 uword *p;
221
222 ASSERT(FIB_NODE_INDEX_INVALID == load_balance_map_db_find(lbm));
223
224 /*
225 * insert into the DB based on the set of paths.
226 */
227 hash_set (load_balance_map_db,
228 load_balance_map_db_hash_key_from_index(
229 load_balance_map_get_index(lbm)),
230 load_balance_map_get_index(lbm));
231
232 /*
233 * insert into each per-path list.
234 */
235 vec_foreach(lbmp, lbm->lbm_paths)
236 {
237 p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index);
238
239 if (NULL == p)
240 {
241 list = fib_node_list_create();
242 hash_set(lb_maps_by_path_index, lbmp->lbmp_index, list);
243 }
244 else
245 {
246 list = p[0];
247 }
248
249 lbmp->lbmp_sibling =
250 fib_node_list_push_front(list,
251 0, FIB_NODE_TYPE_FIRST,
252 load_balance_map_get_index(lbm));
253 }
254
255 LOAD_BALANCE_MAP_DBG(lbm, "DB-inserted");
256}
257
258static void
259load_balance_map_db_remove (load_balance_map_t *lbm)
260{
261 load_balance_map_path_t *lbmp;
262 uword *p;
263
264 ASSERT(FIB_NODE_INDEX_INVALID != load_balance_map_db_find(lbm));
265
266 hash_unset(load_balance_map_db,
267 load_balance_map_db_hash_key_from_index(
268 load_balance_map_get_index(lbm)));
269
270 /*
271 * remove from each per-path list.
272 */
273 vec_foreach(lbmp, lbm->lbm_paths)
274 {
275 p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index);
276
Dave Barach47d41ad2020-02-17 09:13:26 -0500277 ALWAYS_ASSERT(NULL != p);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100278
279 fib_node_list_remove(p[0], lbmp->lbmp_sibling);
280 }
281
282 LOAD_BALANCE_MAP_DBG(lbm, "DB-removed");
283}
284
285/**
286 * @brief from the paths that are usable, fill the Map.
287 */
288static void
289load_balance_map_fill (load_balance_map_t *lbm)
290{
291 load_balance_map_path_t *lbmp;
292 u32 n_buckets, bucket, ii, jj;
293 u16 *tmp_buckets;
294
295 tmp_buckets = NULL;
296 n_buckets = vec_len(lbm->lbm_buckets);
297
298 /*
299 * run throught the set of paths once, and build a vector of the
300 * indices that are usable. we do this is a scratch space, since we
301 * need to refer to it multiple times as we build the real buckets.
302 */
303 vec_validate(tmp_buckets, n_buckets-1);
304
305 bucket = jj = 0;
306 vec_foreach (lbmp, lbm->lbm_paths)
307 {
308 if (fib_path_is_resolved(lbmp->lbmp_index))
309 {
310 for (ii = 0; ii < lbmp->lbmp_weight; ii++)
311 {
312 tmp_buckets[jj++] = bucket++;
313 }
314 }
Dave Barach47d41ad2020-02-17 09:13:26 -0500315 else
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100316 {
317 bucket += lbmp->lbmp_weight;
318 }
319 }
320 _vec_len(tmp_buckets) = jj;
321
322 /*
323 * If the number of temporaries written is as many as we need, implying
324 * all paths were up, then we can simply copy the scratch area over the
325 * actual buckets' memory
326 */
327 if (jj == n_buckets)
328 {
329 memcpy(lbm->lbm_buckets,
330 tmp_buckets,
331 sizeof(lbm->lbm_buckets[0]) * n_buckets);
332 }
333 else
334 {
335 /*
336 * one or more paths are down.
337 */
338 if (0 == vec_len(tmp_buckets))
339 {
340 /*
341 * if the scratch area is empty, then no paths are usable.
342 * they will all drop. so use them all, lest we account drops
343 * against only one.
344 */
345 for (bucket = 0; bucket < n_buckets; bucket++)
346 {
347 lbm->lbm_buckets[bucket] = bucket;
348 }
349 }
350 else
351 {
352 bucket = jj = 0;
353 vec_foreach (lbmp, lbm->lbm_paths)
354 {
355 if (fib_path_is_resolved(lbmp->lbmp_index))
356 {
357 for (ii = 0; ii < lbmp->lbmp_weight; ii++)
358 {
359 lbm->lbm_buckets[bucket] = bucket;
360 bucket++;
361 }
362 }
363 else
364 {
365 /*
366 * path is unusable
367 * cycle through the scratch space selecting a index.
368 * this means we load balance, in the intended ratio,
369 * over the paths that are still usable.
370 */
371 for (ii = 0; ii < lbmp->lbmp_weight; ii++)
372 {
373 lbm->lbm_buckets[bucket] = tmp_buckets[jj];
374 jj = (jj + 1) % vec_len(tmp_buckets);
375 bucket++;
376 }
377 }
378 }
379 }
380 }
381
382 vec_free(tmp_buckets);
383}
384
385static load_balance_map_t*
386load_balance_map_alloc (const load_balance_path_t *paths)
387{
388 load_balance_map_t *lbm;
389 u32 ii;
Dave Barach26d890e2020-06-05 09:42:50 -0400390 vlib_main_t *vm;
391 u8 did_barrier_sync;
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100392
Dave Barach26d890e2020-06-05 09:42:50 -0400393 dpo_pool_barrier_sync (vm, load_balance_map_pool, did_barrier_sync);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100394 pool_get_aligned(load_balance_map_pool, lbm, CLIB_CACHE_LINE_BYTES);
Dave Barach26d890e2020-06-05 09:42:50 -0400395 dpo_pool_barrier_release (vm, did_barrier_sync);
396
Dave Barachb7b92992018-10-17 10:38:51 -0400397 clib_memset(lbm, 0, sizeof(*lbm));
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100398
399 vec_validate(lbm->lbm_paths, vec_len(paths)-1);
400
401 vec_foreach_index(ii, paths)
402 {
403 lbm->lbm_paths[ii].lbmp_index = paths[ii].path_index;
404 lbm->lbm_paths[ii].lbmp_weight = paths[ii].path_weight;
405 }
406
407 return (lbm);
408}
409
410static load_balance_map_t *
411load_balance_map_init (load_balance_map_t *lbm,
412 u32 n_buckets,
413 u32 sum_of_weights)
414{
415 lbm->lbm_sum_of_norm_weights = sum_of_weights;
416 vec_validate(lbm->lbm_buckets, n_buckets-1);
417
418 load_balance_map_db_insert(lbm);
419
420 load_balance_map_fill(lbm);
421
Neale Ranns710071b2018-09-24 12:36:26 +0000422 load_balance_map_logger =
423 vlib_log_register_class ("dpo", "load-balance-map");
424
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100425 return (lbm);
426}
427
Neale Ranns994dab42017-04-18 12:56:45 -0700428static void
429load_balance_map_destroy (load_balance_map_t *lbm)
430{
431 vec_free(lbm->lbm_paths);
432 vec_free(lbm->lbm_buckets);
433 pool_put(load_balance_map_pool, lbm);
434}
435
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100436index_t
437load_balance_map_add_or_lock (u32 n_buckets,
438 u32 sum_of_weights,
439 const load_balance_path_t *paths)
440{
441 load_balance_map_t *tmp, *lbm;
442 index_t lbmi;
443
444 tmp = load_balance_map_alloc(paths);
445
446 lbmi = load_balance_map_db_find(tmp);
447
448 if (INDEX_INVALID == lbmi)
449 {
450 lbm = load_balance_map_init(tmp, n_buckets, sum_of_weights);
451 }
452 else
453 {
454 lbm = load_balance_map_get(lbmi);
Neale Ranns994dab42017-04-18 12:56:45 -0700455 load_balance_map_destroy(tmp);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100456 }
457
458 lbm->lbm_locks++;
459
460 return (load_balance_map_get_index(lbm));
461}
462
463void
464load_balance_map_lock (index_t lbmi)
465{
466 load_balance_map_t *lbm;
467
468 lbm = load_balance_map_get(lbmi);
469
470 lbm->lbm_locks++;
471}
472
473void
474load_balance_map_unlock (index_t lbmi)
475{
476 load_balance_map_t *lbm;
477
478 if (INDEX_INVALID == lbmi)
479 {
480 return;
481 }
482
483 lbm = load_balance_map_get(lbmi);
484
485 lbm->lbm_locks--;
486
487 if (0 == lbm->lbm_locks)
488 {
489 load_balance_map_db_remove(lbm);
Neale Ranns994dab42017-04-18 12:56:45 -0700490 load_balance_map_destroy(lbm);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100491 }
492}
493
Neale Rannsfdf6b6f2020-11-26 09:34:39 +0000494static walk_rc_t
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100495load_balance_map_path_state_change_walk (fib_node_ptr_t *fptr,
496 void *ctx)
497{
498 load_balance_map_t *lbm;
499
500 lbm = load_balance_map_get(fptr->fnp_index);
501
502 load_balance_map_fill(lbm);
503
Neale Rannsfdf6b6f2020-11-26 09:34:39 +0000504 return (WALK_CONTINUE);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100505}
506
507/**
508 * @brief the state of a path has changed (it has no doubt gone down).
509 * This is the trigger to perform a PIC edge cutover and update the maps
510 * to exclude this path.
511 */
512void
513load_balance_map_path_state_change (fib_node_index_t path_index)
514{
515 uword *p;
516
517 /*
518 * re-stripe the buckets for each affect MAP
519 */
520 p = hash_get(lb_maps_by_path_index, path_index);
521
522 if (NULL == p)
523 return;
524
525 fib_node_list_walk(p[0], load_balance_map_path_state_change_walk, NULL);
526}
527
528/**
529 * @brief Make/add a new or lock an existing Load-balance map
530 */
531void
532load_balance_map_module_init (void)
533{
534 load_balance_map_db =
535 hash_create2 (/* elts */ 0,
536 /* user */ 0,
537 /* value_bytes */ sizeof (index_t),
538 load_balance_map_db_hash_key_sum,
539 load_balance_map_db_hash_key_equal,
540 /* format pair/arg */
541 0, 0);
542
543 lb_maps_by_path_index = hash_create(0, sizeof(fib_node_list_t));
544}
545
Neale Ranns3ee44042016-10-03 13:05:48 +0100546void
547load_balance_map_show_mem (void)
548{
549 fib_show_memory_usage("Load-Balance Map",
550 pool_elts(load_balance_map_pool),
551 pool_len(load_balance_map_pool),
552 sizeof(load_balance_map_t));
553}
554
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100555static clib_error_t *
556load_balance_map_show (vlib_main_t * vm,
557 unformat_input_t * input,
558 vlib_cli_command_t * cmd)
559{
560 index_t lbmi = INDEX_INVALID;
561
562 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
563 {
564 if (unformat (input, "%d", &lbmi))
565 ;
566 else
567 break;
568 }
569
570 if (INDEX_INVALID != lbmi)
571 {
572 vlib_cli_output (vm, "%U", format_load_balance_map, lbmi, 0);
573 }
574 else
575 {
576 load_balance_map_t *lbm;
577
Damjan Marionb2c31b62020-12-13 21:47:40 +0100578 pool_foreach (lbm, load_balance_map_pool)
579 {
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100580 vlib_cli_output (vm, "%U", format_load_balance_map,
581 load_balance_map_get_index(lbm), 0);
Damjan Marionb2c31b62020-12-13 21:47:40 +0100582 }
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100583 }
584
585 return 0;
586}
587
588VLIB_CLI_COMMAND (load_balance_map_show_command, static) = {
589 .path = "show load-balance-map",
590 .short_help = "show load-balance-map [<index>]",
591 .function = load_balance_map_show,
592};