blob: 588c1a75a4f92712e8bcbaaf3b80b19f40980f8f [file] [log] [blame]
Neale Ranns0bfe5d82016-08-25 15:29:12 +01001/*
2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/**
16 * @brief
17 */
18#include <vnet/fib/fib_path.h>
19#include <vnet/fib/fib_node_list.h>
20#include <vnet/dpo/load_balance_map.h>
21#include <vnet/dpo/load_balance.h>
22
23/**
24 * A hash-table of load-balance maps by path index.
25 * this provides the fast lookup of the LB map when a path goes down
26 */
27static uword *lb_maps_by_path_index;
28
29/**
30 * A hash-table of load-balance maps by set of paths.
31 * This provides the LB map sharing.
32 * LB maps do not necessarily use all the paths in the list, since
33 * the entry that is requesting the map, may not have an out-going
34 * label for each of the paths.
35 */
36static uword *load_balance_map_db;
37
38typedef enum load_balance_map_path_flags_t_
39{
40 LOAD_BALANCE_MAP_PATH_UP = (1 << 0),
41 LOAD_BALANCE_MAP_PATH_USABLE = (1 << 1),
42} __attribute__ ((packed)) load_balance_map_path_flags_t;
43
44typedef struct load_balance_map_path_t_ {
45 /**
46 * Index of the path
47 */
48 fib_node_index_t lbmp_index;
49
50 /**
51 * Sibling Index in the list of all maps with this path index
52 */
53 fib_node_index_t lbmp_sibling;
54
55 /**
56 * the normalised wegiht of the path
57 */
58 u32 lbmp_weight;
59
60 /**
61 * The sate of the path
62 */
63 load_balance_map_path_flags_t lbmp_flags;
64} load_balance_map_path_t;
65
66/**
67 * The global pool of LB maps
68 */
69load_balance_map_t *load_balance_map_pool;
70
71/*
72 * Debug macro
73 */
74#ifdef FIB_DEBUG
75#define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...) \
76 { \
77 clib_warning("lbm: FIXME" _fmt, \
78 ##_args); \
79 }
80#else
81#define LOAD_BALANCE_MAP_DBG(_pl, _fmt, _args...)
82#endif
83
84static index_t
85load_balance_map_get_index (load_balance_map_t *lbm)
86{
87 return (lbm - load_balance_map_pool);
88}
89
90u8*
Christophe Fontained3c008d2017-10-02 18:10:54 +020091format_load_balance_map (u8 *s, va_list * ap)
Neale Ranns0bfe5d82016-08-25 15:29:12 +010092{
Christophe Fontained3c008d2017-10-02 18:10:54 +020093 index_t lbmi = va_arg(*ap, index_t);
94 u32 indent = va_arg(*ap, u32);
Neale Ranns0bfe5d82016-08-25 15:29:12 +010095 load_balance_map_t *lbm;
96 u32 n_buckets, ii;
97
98 lbm = load_balance_map_get(lbmi);
99 n_buckets = vec_len(lbm->lbm_buckets);
100
101 s = format(s, "load-balance-map: index:%d buckets:%d", lbmi, n_buckets);
102 s = format(s, "\n%U index:", format_white_space, indent+2);
103 for (ii = 0; ii < n_buckets; ii++)
104 {
105 s = format(s, "%5d", ii);
106 }
107 s = format(s, "\n%U map:", format_white_space, indent+2);
108 for (ii = 0; ii < n_buckets; ii++)
109 {
110 s = format(s, "%5d", lbm->lbm_buckets[ii]);
111 }
112
113 return (s);
114}
115
116
117static uword
118load_balance_map_hash (load_balance_map_t *lbm)
119{
120 u32 old_lbm_hash, new_lbm_hash, hash;
121 load_balance_map_path_t *lb_path;
122
123 new_lbm_hash = old_lbm_hash = vec_len(lbm->lbm_paths);
124
125 vec_foreach (lb_path, lbm->lbm_paths)
126 {
127 hash = lb_path->lbmp_index;
128 hash_mix32(hash, old_lbm_hash, new_lbm_hash);
129 }
130
131 return (new_lbm_hash);
132}
133
134always_inline uword
135load_balance_map_db_hash_key_from_index (uword index)
136{
137 return 1 + 2*index;
138}
139
140always_inline uword
141load_balance_map_db_hash_key_is_index (uword key)
142{
143 return key & 1;
144}
145
146always_inline uword
147load_balance_map_db_hash_key_2_index (uword key)
148{
149 ASSERT (load_balance_map_db_hash_key_is_index (key));
150 return key / 2;
151}
152
153static load_balance_map_t*
154load_balance_map_db_get_from_hash_key (uword key)
155{
156 load_balance_map_t *lbm;
157
158 if (load_balance_map_db_hash_key_is_index (key))
159 {
160 index_t lbm_index;
161
162 lbm_index = load_balance_map_db_hash_key_2_index(key);
163 lbm = load_balance_map_get(lbm_index);
164 }
165 else
166 {
167 lbm = uword_to_pointer (key, load_balance_map_t *);
168 }
169
170 return (lbm);
171}
172
173static uword
174load_balance_map_db_hash_key_sum (hash_t * h,
175 uword key)
176{
177 load_balance_map_t *lbm;
178
179 lbm = load_balance_map_db_get_from_hash_key(key);
180
181 return (load_balance_map_hash(lbm));
182}
183
184static uword
185load_balance_map_db_hash_key_equal (hash_t * h,
186 uword key1,
187 uword key2)
188{
189 load_balance_map_t *lbm1, *lbm2;
190
191 lbm1 = load_balance_map_db_get_from_hash_key(key1);
192 lbm2 = load_balance_map_db_get_from_hash_key(key2);
193
194 return (load_balance_map_hash(lbm1) ==
195 load_balance_map_hash(lbm2));
196}
197
198static index_t
199load_balance_map_db_find (load_balance_map_t *lbm)
200{
201 uword *p;
202
203 p = hash_get(load_balance_map_db, lbm);
204
205 if (NULL != p)
206 {
207 return p[0];
208 }
209
210 return (FIB_NODE_INDEX_INVALID);
211}
212
213static void
214load_balance_map_db_insert (load_balance_map_t *lbm)
215{
216 load_balance_map_path_t *lbmp;
217 fib_node_list_t list;
218 uword *p;
219
220 ASSERT(FIB_NODE_INDEX_INVALID == load_balance_map_db_find(lbm));
221
222 /*
223 * insert into the DB based on the set of paths.
224 */
225 hash_set (load_balance_map_db,
226 load_balance_map_db_hash_key_from_index(
227 load_balance_map_get_index(lbm)),
228 load_balance_map_get_index(lbm));
229
230 /*
231 * insert into each per-path list.
232 */
233 vec_foreach(lbmp, lbm->lbm_paths)
234 {
235 p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index);
236
237 if (NULL == p)
238 {
239 list = fib_node_list_create();
240 hash_set(lb_maps_by_path_index, lbmp->lbmp_index, list);
241 }
242 else
243 {
244 list = p[0];
245 }
246
247 lbmp->lbmp_sibling =
248 fib_node_list_push_front(list,
249 0, FIB_NODE_TYPE_FIRST,
250 load_balance_map_get_index(lbm));
251 }
252
253 LOAD_BALANCE_MAP_DBG(lbm, "DB-inserted");
254}
255
256static void
257load_balance_map_db_remove (load_balance_map_t *lbm)
258{
259 load_balance_map_path_t *lbmp;
260 uword *p;
261
262 ASSERT(FIB_NODE_INDEX_INVALID != load_balance_map_db_find(lbm));
263
264 hash_unset(load_balance_map_db,
265 load_balance_map_db_hash_key_from_index(
266 load_balance_map_get_index(lbm)));
267
268 /*
269 * remove from each per-path list.
270 */
271 vec_foreach(lbmp, lbm->lbm_paths)
272 {
273 p = hash_get(lb_maps_by_path_index, lbmp->lbmp_index);
274
275 ASSERT(NULL != p);
276
277 fib_node_list_remove(p[0], lbmp->lbmp_sibling);
278 }
279
280 LOAD_BALANCE_MAP_DBG(lbm, "DB-removed");
281}
282
283/**
284 * @brief from the paths that are usable, fill the Map.
285 */
286static void
287load_balance_map_fill (load_balance_map_t *lbm)
288{
289 load_balance_map_path_t *lbmp;
290 u32 n_buckets, bucket, ii, jj;
291 u16 *tmp_buckets;
292
293 tmp_buckets = NULL;
294 n_buckets = vec_len(lbm->lbm_buckets);
295
296 /*
297 * run throught the set of paths once, and build a vector of the
298 * indices that are usable. we do this is a scratch space, since we
299 * need to refer to it multiple times as we build the real buckets.
300 */
301 vec_validate(tmp_buckets, n_buckets-1);
302
303 bucket = jj = 0;
304 vec_foreach (lbmp, lbm->lbm_paths)
305 {
306 if (fib_path_is_resolved(lbmp->lbmp_index))
307 {
308 for (ii = 0; ii < lbmp->lbmp_weight; ii++)
309 {
310 tmp_buckets[jj++] = bucket++;
311 }
312 }
313 else
314 {
315 bucket += lbmp->lbmp_weight;
316 }
317 }
318 _vec_len(tmp_buckets) = jj;
319
320 /*
321 * If the number of temporaries written is as many as we need, implying
322 * all paths were up, then we can simply copy the scratch area over the
323 * actual buckets' memory
324 */
325 if (jj == n_buckets)
326 {
327 memcpy(lbm->lbm_buckets,
328 tmp_buckets,
329 sizeof(lbm->lbm_buckets[0]) * n_buckets);
330 }
331 else
332 {
333 /*
334 * one or more paths are down.
335 */
336 if (0 == vec_len(tmp_buckets))
337 {
338 /*
339 * if the scratch area is empty, then no paths are usable.
340 * they will all drop. so use them all, lest we account drops
341 * against only one.
342 */
343 for (bucket = 0; bucket < n_buckets; bucket++)
344 {
345 lbm->lbm_buckets[bucket] = bucket;
346 }
347 }
348 else
349 {
350 bucket = jj = 0;
351 vec_foreach (lbmp, lbm->lbm_paths)
352 {
353 if (fib_path_is_resolved(lbmp->lbmp_index))
354 {
355 for (ii = 0; ii < lbmp->lbmp_weight; ii++)
356 {
357 lbm->lbm_buckets[bucket] = bucket;
358 bucket++;
359 }
360 }
361 else
362 {
363 /*
364 * path is unusable
365 * cycle through the scratch space selecting a index.
366 * this means we load balance, in the intended ratio,
367 * over the paths that are still usable.
368 */
369 for (ii = 0; ii < lbmp->lbmp_weight; ii++)
370 {
371 lbm->lbm_buckets[bucket] = tmp_buckets[jj];
372 jj = (jj + 1) % vec_len(tmp_buckets);
373 bucket++;
374 }
375 }
376 }
377 }
378 }
379
380 vec_free(tmp_buckets);
381}
382
383static load_balance_map_t*
384load_balance_map_alloc (const load_balance_path_t *paths)
385{
386 load_balance_map_t *lbm;
387 u32 ii;
388
389 pool_get_aligned(load_balance_map_pool, lbm, CLIB_CACHE_LINE_BYTES);
390 memset(lbm, 0, sizeof(*lbm));
391
392 vec_validate(lbm->lbm_paths, vec_len(paths)-1);
393
394 vec_foreach_index(ii, paths)
395 {
396 lbm->lbm_paths[ii].lbmp_index = paths[ii].path_index;
397 lbm->lbm_paths[ii].lbmp_weight = paths[ii].path_weight;
398 }
399
400 return (lbm);
401}
402
403static load_balance_map_t *
404load_balance_map_init (load_balance_map_t *lbm,
405 u32 n_buckets,
406 u32 sum_of_weights)
407{
408 lbm->lbm_sum_of_norm_weights = sum_of_weights;
409 vec_validate(lbm->lbm_buckets, n_buckets-1);
410
411 load_balance_map_db_insert(lbm);
412
413 load_balance_map_fill(lbm);
414
415 return (lbm);
416}
417
Neale Ranns994dab42017-04-18 12:56:45 -0700418static void
419load_balance_map_destroy (load_balance_map_t *lbm)
420{
421 vec_free(lbm->lbm_paths);
422 vec_free(lbm->lbm_buckets);
423 pool_put(load_balance_map_pool, lbm);
424}
425
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100426index_t
427load_balance_map_add_or_lock (u32 n_buckets,
428 u32 sum_of_weights,
429 const load_balance_path_t *paths)
430{
431 load_balance_map_t *tmp, *lbm;
432 index_t lbmi;
433
434 tmp = load_balance_map_alloc(paths);
435
436 lbmi = load_balance_map_db_find(tmp);
437
438 if (INDEX_INVALID == lbmi)
439 {
440 lbm = load_balance_map_init(tmp, n_buckets, sum_of_weights);
441 }
442 else
443 {
444 lbm = load_balance_map_get(lbmi);
Neale Ranns994dab42017-04-18 12:56:45 -0700445 load_balance_map_destroy(tmp);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100446 }
447
448 lbm->lbm_locks++;
449
450 return (load_balance_map_get_index(lbm));
451}
452
453void
454load_balance_map_lock (index_t lbmi)
455{
456 load_balance_map_t *lbm;
457
458 lbm = load_balance_map_get(lbmi);
459
460 lbm->lbm_locks++;
461}
462
463void
464load_balance_map_unlock (index_t lbmi)
465{
466 load_balance_map_t *lbm;
467
468 if (INDEX_INVALID == lbmi)
469 {
470 return;
471 }
472
473 lbm = load_balance_map_get(lbmi);
474
475 lbm->lbm_locks--;
476
477 if (0 == lbm->lbm_locks)
478 {
479 load_balance_map_db_remove(lbm);
Neale Ranns994dab42017-04-18 12:56:45 -0700480 load_balance_map_destroy(lbm);
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100481 }
482}
483
484static int
485load_balance_map_path_state_change_walk (fib_node_ptr_t *fptr,
486 void *ctx)
487{
488 load_balance_map_t *lbm;
489
490 lbm = load_balance_map_get(fptr->fnp_index);
491
492 load_balance_map_fill(lbm);
493
494 return (!0);
495}
496
497/**
498 * @brief the state of a path has changed (it has no doubt gone down).
499 * This is the trigger to perform a PIC edge cutover and update the maps
500 * to exclude this path.
501 */
502void
503load_balance_map_path_state_change (fib_node_index_t path_index)
504{
505 uword *p;
506
507 /*
508 * re-stripe the buckets for each affect MAP
509 */
510 p = hash_get(lb_maps_by_path_index, path_index);
511
512 if (NULL == p)
513 return;
514
515 fib_node_list_walk(p[0], load_balance_map_path_state_change_walk, NULL);
516}
517
518/**
519 * @brief Make/add a new or lock an existing Load-balance map
520 */
521void
522load_balance_map_module_init (void)
523{
524 load_balance_map_db =
525 hash_create2 (/* elts */ 0,
526 /* user */ 0,
527 /* value_bytes */ sizeof (index_t),
528 load_balance_map_db_hash_key_sum,
529 load_balance_map_db_hash_key_equal,
530 /* format pair/arg */
531 0, 0);
532
533 lb_maps_by_path_index = hash_create(0, sizeof(fib_node_list_t));
534}
535
Neale Ranns3ee44042016-10-03 13:05:48 +0100536void
537load_balance_map_show_mem (void)
538{
539 fib_show_memory_usage("Load-Balance Map",
540 pool_elts(load_balance_map_pool),
541 pool_len(load_balance_map_pool),
542 sizeof(load_balance_map_t));
543}
544
Neale Ranns0bfe5d82016-08-25 15:29:12 +0100545static clib_error_t *
546load_balance_map_show (vlib_main_t * vm,
547 unformat_input_t * input,
548 vlib_cli_command_t * cmd)
549{
550 index_t lbmi = INDEX_INVALID;
551
552 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
553 {
554 if (unformat (input, "%d", &lbmi))
555 ;
556 else
557 break;
558 }
559
560 if (INDEX_INVALID != lbmi)
561 {
562 vlib_cli_output (vm, "%U", format_load_balance_map, lbmi, 0);
563 }
564 else
565 {
566 load_balance_map_t *lbm;
567
568 pool_foreach(lbm, load_balance_map_pool,
569 ({
570 vlib_cli_output (vm, "%U", format_load_balance_map,
571 load_balance_map_get_index(lbm), 0);
572 }));
573 }
574
575 return 0;
576}
577
578VLIB_CLI_COMMAND (load_balance_map_show_command, static) = {
579 .path = "show load-balance-map",
580 .short_help = "show load-balance-map [<index>]",
581 .function = load_balance_map_show,
582};