fib: fix load-balance and replicate dpos buckets overflow
load-balance and replicate dpos both store their number of buckets as
u16, which can overflow if too many paths are configured. For
load-balance it can happens quite quickly because of weights
normalization.
Type: fix
Change-Id: I0c78c39fc3d40626dfc58b49e7d99d71f9852b50
Signed-off-by: Benoît Ganne <bganne@cisco.com>
diff --git a/src/plugins/unittest/fib_test.c b/src/plugins/unittest/fib_test.c
index d417d5f..fbac809 100644
--- a/src/plugins/unittest/fib_test.c
+++ b/src/plugins/unittest/fib_test.c
@@ -781,6 +781,69 @@
}
static int
+fib_test_multipath_v4 (const test_main_t *tm, const u32 fib_index,
+ const fib_prefix_t *pfx, const int n_paths,
+ const int expected_n_buckets)
+{
+ const int path_list_pool_size = fib_path_list_pool_size();
+ const int path_list_db_size = fib_path_list_db_size();
+ const int entry_pool_size = fib_entry_pool_size();
+ fib_route_path_t *r_paths = NULL;
+ const load_balance_t *lb;
+ const dpo_id_t *dpo;
+ u32 fei;
+ int res = 0;
+ int i;
+
+ for (i = 0; i < n_paths; i++)
+ {
+ fib_route_path_t r_path = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + i),
+ },
+ .frp_sw_if_index = tm->hw[0]->sw_if_index,
+ .frp_weight = 1,
+ .frp_fib_index = ~0,
+ .frp_flags = FIB_ROUTE_PATH_ATTACHED,
+ };
+ vec_add1(r_paths, r_path);
+ }
+
+ fib_table_entry_update(fib_index,
+ pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ r_paths);
+
+ fei = fib_table_lookup_exact_match(fib_index, pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "prefix present");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ lb = load_balance_get(dpo->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == expected_n_buckets),
+ "prefix lb over %d paths", lb->lb_n_buckets);
+
+ fib_table_entry_delete(fib_index,
+ pfx,
+ FIB_SOURCE_API);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, pfx), "prefix removed");
+ vec_free(r_paths);
+
+ /*
+ * add-remove test. no change.
+ */
+ FIB_TEST((path_list_db_size == fib_path_list_db_size()),
+ "path list DB population:%d", fib_path_list_db_size());
+ FIB_TEST((path_list_pool_size == fib_path_list_pool_size()),
+ "path list pool size is %d", fib_path_list_pool_size());
+ FIB_TEST((entry_pool_size == fib_entry_pool_size()),
+ "entry pool size is %d", fib_entry_pool_size());
+ return res;
+}
+
+static int
fib_test_v4 (void)
{
/*
@@ -3614,52 +3677,26 @@
/*
* A route with multiple paths at once
*/
- fib_route_path_t *r_paths = NULL;
-
- for (ii = 0; ii < 4; ii++)
- {
- fib_route_path_t r_path = {
- .frp_proto = DPO_PROTO_IP4,
- .frp_addr = {
- .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + ii),
- },
- .frp_sw_if_index = tm->hw[0]->sw_if_index,
- .frp_weight = 1,
- .frp_fib_index = ~0,
- };
- vec_add1(r_paths, r_path);
- }
-
- fib_table_entry_update(fib_index,
- &pfx_4_4_4_4_s_32,
- FIB_SOURCE_API,
- FIB_ENTRY_FLAG_NONE,
- r_paths);
-
- fei = fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32);
- FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.4.4.4/32 present");
- dpo = fib_entry_contribute_ip_forwarding(fei);
-
- lb = load_balance_get(dpo->dpoi_index);
- FIB_TEST((lb->lb_n_buckets == 4), "4.4.4.4/32 lb over %d paths", lb->lb_n_buckets);
-
- fib_table_entry_delete(fib_index,
- &pfx_4_4_4_4_s_32,
- FIB_SOURCE_API);
- FIB_TEST(FIB_NODE_INDEX_INVALID ==
- fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
- "4.4.4.4/32 removed");
- vec_free(r_paths);
+ FIB_TEST(0 ==
+ fib_test_multipath_v4(tm, fib_index, &pfx_4_4_4_4_s_32, 4, 4),
+ "multipath with 4 nexthops");
/*
- * add-remove test. no change.
+ * A route with lots of multiple paths that will overflow max supported
+ * lb buckets because of normalization
*/
- FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
- fib_path_list_db_size());
- FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
- fib_path_list_pool_size());
- FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
- fib_entry_pool_size());
+ FIB_TEST(0 ==
+ fib_test_multipath_v4(tm, fib_index, &pfx_4_4_4_4_s_32,
+ LB_MAX_BUCKETS / 2 + 23, LB_MAX_BUCKETS),
+ "multipath with too many nexthops");
+
+ /*
+ * A route with more paths than max supported lb buckets
+ */
+ FIB_TEST(0 ==
+ fib_test_multipath_v4 (tm, fib_index, &pfx_4_4_4_4_s_32,
+ LB_MAX_BUCKETS + 13, LB_MAX_BUCKETS),
+ "multipath with too many nexthops");
/*
* A route deag route
@@ -3698,7 +3735,6 @@
FIB_TEST(FIB_NODE_INDEX_INVALID ==
fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
"4.4.4.4/32 removed");
- vec_free(r_paths);
/*
* A route deag route in a source lookup table
@@ -3737,7 +3773,6 @@
FIB_TEST(FIB_NODE_INDEX_INVALID ==
fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
"4.4.4.4/32 removed");
- vec_free(r_paths);
/*
* add-remove test. no change.
diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
index ff46d56..fae7c1d 100644
--- a/src/vnet/dpo/load_balance.c
+++ b/src/vnet/dpo/load_balance.c
@@ -244,6 +244,8 @@
{
load_balance_t *lb;
+ ASSERT (num_buckets <= LB_MAX_BUCKETS);
+
lb = load_balance_alloc_i();
lb->lb_hash_config = fhc;
lb->lb_n_buckets = num_buckets;
@@ -455,8 +457,9 @@
/* Try larger and larger power of 2 sized adjacency blocks until we
find one where traffic flows to within 1% of specified weights. */
- for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
+ for (n_adj = clib_min(max_pow2 (n_nhs), LB_MAX_BUCKETS); ; n_adj *= 2)
{
+ ASSERT (n_adj <= LB_MAX_BUCKETS);
error = 0;
norm = n_adj / ((f64) sum_weight);
@@ -487,12 +490,22 @@
nhs[0].path_weight += n_adj_left;
- /* Less than 5% average error per adjacency with this size adjacency block? */
- if (error <= multipath_next_hop_error_tolerance*n_adj)
+ /* Less than 1% average error per adjacency with this size adjacency block,
+ * or did we reached the maximum number of buckets we support? */
+ if (error <= multipath_next_hop_error_tolerance*n_adj ||
+ n_adj >= LB_MAX_BUCKETS)
{
- /* Truncate any next hops with zero weight. */
- vec_set_len (nhs, i);
- break;
+ if (i < n_nhs)
+ {
+ /* Truncate any next hops in excess */
+ vlib_log_err(load_balance_logger,
+ "Too many paths for load-balance, truncating %d -> %d",
+ n_nhs, i);
+ for (int j = i; j < n_nhs; j++)
+ dpo_reset (&vec_elt(nhs, j).path_dpo);
+ }
+ vec_set_len (nhs, i);
+ break;
}
}
@@ -622,6 +635,7 @@
load_balance_set_n_buckets (load_balance_t *lb,
u32 n_buckets)
{
+ ASSERT (n_buckets <= LB_MAX_BUCKETS);
lb->lb_n_buckets = n_buckets;
lb->lb_n_buckets_minus_1 = n_buckets-1;
}
@@ -651,8 +665,6 @@
&sum_of_weights,
multipath_next_hop_error_tolerance);
- ASSERT (n_buckets >= vec_len (raw_nhs));
-
/*
* Save the old load-balance map used, and get a new one if required.
*/
diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h
index 5428e20..3605d82 100644
--- a/src/vnet/dpo/load_balance.h
+++ b/src/vnet/dpo/load_balance.h
@@ -50,6 +50,12 @@
extern load_balance_main_t load_balance_main;
/**
+ * The maximum number of buckets that a load-balance object can have
+ * This must not overflow the lb_n_buckets field
+ */
+#define LB_MAX_BUCKETS 8192
+
+/**
* The number of buckets that a load-balance object can have and still
* fit in one cache-line
*/
@@ -176,6 +182,10 @@
STATIC_ASSERT(sizeof(load_balance_t) <= CLIB_CACHE_LINE_BYTES,
"A load_balance object size exceeds one cacheline");
+STATIC_ASSERT (LB_MAX_BUCKETS <= CLIB_U16_MAX,
+ "Too many buckets for load_balance object");
+STATIC_ASSERT (LB_MAX_BUCKETS && !(LB_MAX_BUCKETS & (LB_MAX_BUCKETS - 1)),
+ "LB_MAX_BUCKETS must be a power of 2");
/**
* Flags controlling load-balance formatting/display
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
index 5f88f12..0474fd8 100644
--- a/src/vnet/dpo/replicate_dpo.c
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -172,6 +172,8 @@
{
replicate_t *rep;
+ ASSERT (num_buckets <= REP_MAX_BUCKETS);
+
rep = replicate_alloc_i();
rep->rep_n_buckets = num_buckets;
rep->rep_proto = rep_proto;
@@ -311,7 +313,8 @@
replicate_set_n_buckets (replicate_t *rep,
u32 n_buckets)
{
- rep->rep_n_buckets = n_buckets;
+ ASSERT (n_buckets <= REP_MAX_BUCKETS);
+ rep->rep_n_buckets = n_buckets;
}
void
@@ -331,6 +334,17 @@
rep->rep_proto);
n_buckets = vec_len(nhs);
+ if (n_buckets > REP_MAX_BUCKETS)
+ {
+ vlib_log_err (replicate_logger,
+ "Too many paths for replicate, truncating %d -> %d",
+ n_buckets, REP_MAX_BUCKETS);
+ for (int i = REP_MAX_BUCKETS; i < n_buckets; i++)
+ dpo_reset (&vec_elt (nhs, i).path_dpo);
+ vec_set_len (nhs, REP_MAX_BUCKETS);
+ n_buckets = REP_MAX_BUCKETS;
+ }
+
if (0 == rep->rep_n_buckets)
{
/*
diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h
index 908c20c..d21f52a 100644
--- a/src/vnet/dpo/replicate_dpo.h
+++ b/src/vnet/dpo/replicate_dpo.h
@@ -41,6 +41,12 @@
extern replicate_main_t replicate_main;
/**
+ * The number of buckets that a replicate object can have
+ * This must not overflow the rep_n_buckets field
+ */
+#define REP_MAX_BUCKETS 1024
+
+/**
* The number of buckets that a load-balance object can have and still
* fit in one cache-line
*/
@@ -108,6 +114,8 @@
STATIC_ASSERT(sizeof(replicate_t) <= CLIB_CACHE_LINE_BYTES,
"A replicate object size exceeds one cacheline");
+STATIC_ASSERT (REP_MAX_BUCKETS <= CLIB_U16_MAX,
+ "Too many buckets for replicate object");
/**
* Flags controlling load-balance formatting/display