fib: fix load-balance and replicate dpos buckets overflow

load-balance and replicate dpos both store their number of buckets as
u16, which can overflow if too many paths are configured. For
load-balance it can happens quite quickly because of weights
normalization.

Type: fix

Change-Id: I0c78c39fc3d40626dfc58b49e7d99d71f9852b50
Signed-off-by: Benoît Ganne <bganne@cisco.com>
diff --git a/src/plugins/unittest/fib_test.c b/src/plugins/unittest/fib_test.c
index d417d5f..fbac809 100644
--- a/src/plugins/unittest/fib_test.c
+++ b/src/plugins/unittest/fib_test.c
@@ -781,6 +781,69 @@
 }
 
 static int
+fib_test_multipath_v4 (const test_main_t *tm, const u32 fib_index,
+                       const fib_prefix_t *pfx, const int n_paths,
+                       const int expected_n_buckets)
+{
+    const int path_list_pool_size = fib_path_list_pool_size();
+    const int path_list_db_size = fib_path_list_db_size();
+    const int entry_pool_size = fib_entry_pool_size();
+    fib_route_path_t *r_paths = NULL;
+    const load_balance_t *lb;
+    const dpo_id_t *dpo;
+    u32 fei;
+    int res = 0;
+    int i;
+
+    for (i = 0; i < n_paths; i++)
+    {
+        fib_route_path_t r_path = {
+            .frp_proto = DPO_PROTO_IP4,
+            .frp_addr = {
+                .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + i),
+            },
+            .frp_sw_if_index = tm->hw[0]->sw_if_index,
+            .frp_weight = 1,
+            .frp_fib_index = ~0,
+	    .frp_flags = FIB_ROUTE_PATH_ATTACHED,
+        };
+        vec_add1(r_paths, r_path);
+    }
+
+    fib_table_entry_update(fib_index,
+                           pfx,
+                           FIB_SOURCE_API,
+                           FIB_ENTRY_FLAG_NONE,
+                           r_paths);
+
+    fei = fib_table_lookup_exact_match(fib_index, pfx);
+    FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "prefix present");
+    dpo = fib_entry_contribute_ip_forwarding(fei);
+
+    lb = load_balance_get(dpo->dpoi_index);
+    FIB_TEST((lb->lb_n_buckets == expected_n_buckets),
+             "prefix lb over %d paths", lb->lb_n_buckets);
+
+    fib_table_entry_delete(fib_index,
+                           pfx,
+                           FIB_SOURCE_API);
+    FIB_TEST(FIB_NODE_INDEX_INVALID ==
+             fib_table_lookup_exact_match(fib_index, pfx), "prefix removed");
+    vec_free(r_paths);
+
+    /*
+     * add-remove test. no change.
+     */
+    FIB_TEST((path_list_db_size == fib_path_list_db_size()),
+             "path list DB population:%d", fib_path_list_db_size());
+    FIB_TEST((path_list_pool_size == fib_path_list_pool_size()),
+             "path list pool size is %d", fib_path_list_pool_size());
+    FIB_TEST((entry_pool_size == fib_entry_pool_size()),
+             "entry pool size is %d", fib_entry_pool_size());
+    return res;
+}
+
+static int
 fib_test_v4 (void)
 {
     /*
@@ -3614,52 +3677,26 @@
     /*
      * A route with multiple paths at once
      */
-    fib_route_path_t *r_paths = NULL;
-
-    for (ii = 0; ii < 4; ii++)
-    {
-        fib_route_path_t r_path = {
-            .frp_proto = DPO_PROTO_IP4,
-            .frp_addr = {
-                .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + ii),
-            },
-            .frp_sw_if_index = tm->hw[0]->sw_if_index,
-            .frp_weight = 1,
-            .frp_fib_index = ~0,
-        };
-        vec_add1(r_paths, r_path);
-    }
-
-    fib_table_entry_update(fib_index,
-                           &pfx_4_4_4_4_s_32,
-                           FIB_SOURCE_API,
-                           FIB_ENTRY_FLAG_NONE,
-                           r_paths);
-
-    fei = fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32);
-    FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.4.4.4/32 present");
-    dpo = fib_entry_contribute_ip_forwarding(fei);
-
-    lb = load_balance_get(dpo->dpoi_index);
-    FIB_TEST((lb->lb_n_buckets == 4), "4.4.4.4/32 lb over %d paths", lb->lb_n_buckets);
-
-    fib_table_entry_delete(fib_index,
-                           &pfx_4_4_4_4_s_32,
-                           FIB_SOURCE_API);
-    FIB_TEST(FIB_NODE_INDEX_INVALID ==
-             fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
-             "4.4.4.4/32 removed");
-    vec_free(r_paths);
+    FIB_TEST(0 ==
+             fib_test_multipath_v4(tm, fib_index, &pfx_4_4_4_4_s_32, 4, 4),
+             "multipath with 4 nexthops");
 
     /*
-     * add-remove test. no change.
+     * A route with lots of multiple paths that will overflow max supported
+     * lb buckets because of normalization
      */
-    FIB_TEST((1  == fib_path_list_db_size()),   "path list DB population:%d",
-             fib_path_list_db_size());
-    FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
-             fib_path_list_pool_size());
-    FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
-             fib_entry_pool_size());
+    FIB_TEST(0 ==
+             fib_test_multipath_v4(tm, fib_index, &pfx_4_4_4_4_s_32,
+				   LB_MAX_BUCKETS / 2 + 23, LB_MAX_BUCKETS),
+             "multipath with too many nexthops");
+
+    /*
+     * A route with more paths than max supported lb buckets
+     */
+    FIB_TEST(0 ==
+             fib_test_multipath_v4 (tm, fib_index, &pfx_4_4_4_4_s_32,
+                                    LB_MAX_BUCKETS + 13, LB_MAX_BUCKETS),
+             "multipath with too many nexthops");
 
     /*
      * A route deag route
@@ -3698,7 +3735,6 @@
     FIB_TEST(FIB_NODE_INDEX_INVALID ==
              fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
              "4.4.4.4/32 removed");
-    vec_free(r_paths);
 
     /*
      * A route deag route in a source lookup table
@@ -3737,7 +3773,6 @@
     FIB_TEST(FIB_NODE_INDEX_INVALID ==
              fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
              "4.4.4.4/32 removed");
-    vec_free(r_paths);
 
     /*
      * add-remove test. no change.
diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
index ff46d56..fae7c1d 100644
--- a/src/vnet/dpo/load_balance.c
+++ b/src/vnet/dpo/load_balance.c
@@ -244,6 +244,8 @@
 {
     load_balance_t *lb;
 
+    ASSERT (num_buckets <= LB_MAX_BUCKETS);
+
     lb = load_balance_alloc_i();
     lb->lb_hash_config = fhc;
     lb->lb_n_buckets = num_buckets;
@@ -455,8 +457,9 @@
 
     /* Try larger and larger power of 2 sized adjacency blocks until we
        find one where traffic flows to within 1% of specified weights. */
-    for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
+    for (n_adj = clib_min(max_pow2 (n_nhs), LB_MAX_BUCKETS); ; n_adj *= 2)
     {
+        ASSERT (n_adj <= LB_MAX_BUCKETS);
         error = 0;
 
         norm = n_adj / ((f64) sum_weight);
@@ -487,12 +490,22 @@
 
         nhs[0].path_weight += n_adj_left;
 
-        /* Less than 5% average error per adjacency with this size adjacency block? */
-        if (error <= multipath_next_hop_error_tolerance*n_adj)
+        /* Less than 1% average error per adjacency with this size adjacency block,
+         * or did we reached the maximum number of buckets we support? */
+        if (error <= multipath_next_hop_error_tolerance*n_adj ||
+            n_adj >= LB_MAX_BUCKETS)
         {
-            /* Truncate any next hops with zero weight. */
-            vec_set_len (nhs, i);
-            break;
+          if (i < n_nhs)
+          {
+            /* Truncate any next hops in excess */
+            vlib_log_err(load_balance_logger,
+                         "Too many paths for load-balance, truncating %d -> %d",
+                         n_nhs, i);
+            for (int j = i; j < n_nhs; j++)
+              dpo_reset (&vec_elt(nhs, j).path_dpo);
+          }
+          vec_set_len (nhs, i);
+          break;
         }
     }
 
@@ -622,6 +635,7 @@
 load_balance_set_n_buckets (load_balance_t *lb,
                             u32 n_buckets)
 {
+    ASSERT (n_buckets <= LB_MAX_BUCKETS);
     lb->lb_n_buckets = n_buckets;
     lb->lb_n_buckets_minus_1 = n_buckets-1;
 }
@@ -651,8 +665,6 @@
                                          &sum_of_weights,
                                          multipath_next_hop_error_tolerance);
 
-    ASSERT (n_buckets >= vec_len (raw_nhs));
-
     /*
      * Save the old load-balance map used, and get a new one if required.
      */
diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h
index 5428e20..3605d82 100644
--- a/src/vnet/dpo/load_balance.h
+++ b/src/vnet/dpo/load_balance.h
@@ -50,6 +50,12 @@
 extern load_balance_main_t load_balance_main;
 
 /**
+ * The maximum number of buckets that a load-balance object can have
+ * This must not overflow the lb_n_buckets field
+ */
+#define LB_MAX_BUCKETS 8192
+
+/**
  * The number of buckets that a load-balance object can have and still
  * fit in one cache-line
  */
@@ -176,6 +182,10 @@
 
 STATIC_ASSERT(sizeof(load_balance_t) <= CLIB_CACHE_LINE_BYTES,
 	      "A load_balance object size exceeds one cacheline");
+STATIC_ASSERT (LB_MAX_BUCKETS <= CLIB_U16_MAX,
+	       "Too many buckets for load_balance object");
+STATIC_ASSERT (LB_MAX_BUCKETS && !(LB_MAX_BUCKETS & (LB_MAX_BUCKETS - 1)),
+	       "LB_MAX_BUCKETS must be a power of 2");
 
 /**
  * Flags controlling load-balance formatting/display
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
index 5f88f12..0474fd8 100644
--- a/src/vnet/dpo/replicate_dpo.c
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -172,6 +172,8 @@
 {
     replicate_t *rep;
 
+    ASSERT (num_buckets <= REP_MAX_BUCKETS);
+
     rep = replicate_alloc_i();
     rep->rep_n_buckets = num_buckets;
     rep->rep_proto = rep_proto;
@@ -311,7 +313,8 @@
 replicate_set_n_buckets (replicate_t *rep,
                          u32 n_buckets)
 {
-    rep->rep_n_buckets = n_buckets;
+  ASSERT (n_buckets <= REP_MAX_BUCKETS);
+  rep->rep_n_buckets = n_buckets;
 }
 
 void
@@ -331,6 +334,17 @@
                                              rep->rep_proto);
     n_buckets = vec_len(nhs);
 
+    if (n_buckets > REP_MAX_BUCKETS)
+      {
+	vlib_log_err (replicate_logger,
+		      "Too many paths for replicate, truncating %d -> %d",
+		      n_buckets, REP_MAX_BUCKETS);
+	for (int i = REP_MAX_BUCKETS; i < n_buckets; i++)
+	  dpo_reset (&vec_elt (nhs, i).path_dpo);
+	vec_set_len (nhs, REP_MAX_BUCKETS);
+	n_buckets = REP_MAX_BUCKETS;
+      }
+
     if (0 == rep->rep_n_buckets)
     {
         /*
diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h
index 908c20c..d21f52a 100644
--- a/src/vnet/dpo/replicate_dpo.h
+++ b/src/vnet/dpo/replicate_dpo.h
@@ -41,6 +41,12 @@
 extern replicate_main_t replicate_main;
 
 /**
+ * The number of buckets that a replicate object can have
+ * This must not overflow the rep_n_buckets field
+ */
+#define REP_MAX_BUCKETS 1024
+
+/**
  * The number of buckets that a load-balance object can have and still
  * fit in one cache-line
  */
@@ -108,6 +114,8 @@
 
 STATIC_ASSERT(sizeof(replicate_t) <= CLIB_CACHE_LINE_BYTES,
 	      "A replicate object size exceeds one cacheline");
+STATIC_ASSERT (REP_MAX_BUCKETS <= CLIB_U16_MAX,
+	       "Too many buckets for replicate object");
 
 /**
  * Flags controlling load-balance formatting/display