FIB path preference

Paths are given a preference, lowest value is 'best'. Only paths that are up are up contribute to fprwarding - that's unchanged. What's new is that of the path's that re up only those that have the best preference contribute. A poor man's primary and backup. It's not true primary/backup function because the FIB must converge before the lower preference paths are used.

Change-Id: Ie4453c4a7b1094c6c2b51fe1594b8302103bb68e
Signed-off-by: Neale Ranns <nranns@cisco.com>
diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h
index 10d0cb5..73d76a4 100644
--- a/src/vnet/fib/fib_api.h
+++ b/src/vnet/fib/fib_api.h
@@ -47,7 +47,8 @@
 			 const ip46_address_t * next_hop,
 			 u32 next_hop_sw_if_index,
 			 u8 next_hop_fib_index,
-			 u32 next_hop_weight,
+			 u16 next_hop_weight,
+			 u16 next_hop_preference,
 			 mpls_label_t next_hop_via_label,
 			 mpls_label_t * next_hop_out_label_stack);
 
diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c
index 1143f05..d7ff1c8 100644
--- a/src/vnet/fib/fib_entry.c
+++ b/src/vnet/fib/fib_entry.c
@@ -1373,7 +1373,7 @@
     if (NULL == fed)
     {
         /*
-         * no BFD tracking - resolved
+         * no BFD tracking - consider it resolved.
          */
         return (!0);
     }
diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c
index 8c50828..ff73cbf 100644
--- a/src/vnet/fib/fib_entry_src.c
+++ b/src/vnet/fib/fib_entry_src.c
@@ -193,6 +193,7 @@
     const fib_entry_src_t *esrc;
     fib_forward_chain_type_t fct;
     int n_recursive_constrained;
+    u16 preference;
 } fib_entry_src_collect_forwarding_ctx_t;
 
 /**
@@ -351,6 +352,22 @@
     {
         ctx->n_recursive_constrained += 1;
     }
+    if (0xffff == ctx->preference)
+    {
+        /*
+         * not set a preference yet, so the first path we encounter
+         * sets the preference we are collecting.
+         */
+        ctx->preference = fib_path_get_preference(path_index);
+    }
+    else if (ctx->preference != fib_path_get_preference(path_index))
+    {
+        /*
+         * this path does not belong to the same preference as the
+         * previous paths encountered. we are done now.
+         */
+        return (FIB_PATH_LIST_WALK_STOP);
+    }
 
     /*
      * get the matching path-extension for the path being visited.
@@ -422,6 +439,7 @@
         .next_hops = NULL,
         .n_recursive_constrained = 0,
         .fct = fct,
+        .preference = 0xffff,
     };
 
     /*
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
index 274b0ef..a32baf2 100644
--- a/src/vnet/fib/fib_path.c
+++ b/src/vnet/fib/fib_path.c
@@ -196,9 +196,15 @@
     fib_protocol_t fp_nh_proto;
 
     /**
-     * UCMP [unnormalised] weigt
+     * UCMP [unnormalised] weigth
      */
-    u32 fp_weight;
+    u16 fp_weight;
+    /**
+     * A path preference. 0 is the best.
+     * Only paths of the best preference, that are 'up', are considered
+     * for forwarding.
+     */
+    u16 fp_preference;
 
     /**
      * per-type union of the data required to resolve the path
@@ -376,6 +382,7 @@
     s = format (s, "pl-index:%d ", path->fp_pl_index);
     s = format (s, "%U ", format_fib_protocol, path->fp_nh_proto);
     s = format (s, "weight=%d ", path->fp_weight);
+    s = format (s, "pref=%d ", path->fp_preference);
     s = format (s, "%s: ", fib_path_type_names[path->fp_type]);
     if (FIB_PATH_OPER_FLAG_NONE != path->fp_oper_flags) {
 	s = format(s, " oper-flags:");
@@ -700,6 +707,14 @@
     }
 
     /*
+     * If this path is contributing a drop, then it's not resolved
+     */
+    if (dpo_is_drop(&via_dpo) || load_balance_is_drop(&via_dpo))
+    {
+        path->fp_oper_flags &= ~FIB_PATH_OPER_FLAG_RESOLVED;
+    }
+
+    /*
      * update the path's contributed DPO
      */
     dpo_copy(dpo, &via_dpo);
@@ -1071,6 +1086,7 @@
          */
         path->fp_weight = 1;
     }
+    path->fp_preference = rpath->frp_preference;
     path->fp_cfg_flags = fib_path_route_flags_to_cfg_flags(rpath);
 
     /*
@@ -1165,6 +1181,7 @@
 
     path->fp_pl_index = pl_index;
     path->fp_weight = 1;
+    path->fp_preference = 0;
     path->fp_nh_proto = nh_proto;
     path->fp_via_fib = FIB_NODE_INDEX_INVALID;
     path->fp_cfg_flags = flags;
@@ -1275,7 +1292,7 @@
 
     /*
      * paths of different types and protocol are not equal.
-     * different weights only are the same path.
+     * different weights and/or preference only are the same path.
      */
     if (path1->fp_type != path2->fp_type)
     {
@@ -1350,6 +1367,15 @@
     path1 = fib_path_get(*pi1);
     path2 = fib_path_get(*pi2);
 
+    /*
+     * when sorting paths we want the highest preference paths
+     * first, so that the choices set built is in prefernce order
+     */
+    if (path1->fp_preference != path2->fp_preference)
+    {
+	return (path1->fp_preference - path2->fp_preference);
+    }
+
     return (fib_path_cmp_i(path1, path2));
 }
 
@@ -1750,7 +1776,7 @@
     return (ADJ_INDEX_INVALID);
 }
 
-int
+u16
 fib_path_get_weight (fib_node_index_t path_index)
 {
     fib_path_t *path;
@@ -1762,6 +1788,18 @@
     return (path->fp_weight);
 }
 
+u16
+fib_path_get_preference (fib_node_index_t path_index)
+{
+    fib_path_t *path;
+
+    path = fib_path_get(path_index);
+
+    ASSERT(path);
+
+    return (path->fp_preference);
+}
+
 /**
  * @brief Contribute the path's adjacency to the list passed.
  * By calling this function over all paths, recursively, a child
@@ -2116,6 +2154,7 @@
       return (FIB_PATH_LIST_WALK_CONTINUE);
     vec_add2(*api_rpaths, api_rpath, 1);
     api_rpath->rpath.frp_weight = path->fp_weight;
+    api_rpath->rpath.frp_preference = path->fp_preference;
     api_rpath->rpath.frp_proto = path->fp_nh_proto;
     api_rpath->rpath.frp_sw_if_index = ~0;
     api_rpath->dpo = path->exclusive.fp_ex_dpo;
diff --git a/src/vnet/fib/fib_path.h b/src/vnet/fib/fib_path.h
index 0b5e607..a34cb43 100644
--- a/src/vnet/fib/fib_path.h
+++ b/src/vnet/fib/fib_path.h
@@ -167,7 +167,8 @@
 extern int fib_path_recursive_loop_detect(fib_node_index_t path_index,
 					  fib_node_index_t **entry_indicies);
 extern u32 fib_path_get_resolving_interface(fib_node_index_t fib_entry_index);
-extern int fib_path_get_weight(fib_node_index_t path_index);
+extern u16 fib_path_get_weight(fib_node_index_t path_index);
+extern u16 fib_path_get_preference(fib_node_index_t path_index);
 
 extern void fib_path_module_init(void);
 extern fib_path_list_walk_rc_t fib_path_encode(fib_node_index_t path_list_index,
diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c
index 3e4c333..7a9c328 100644
--- a/src/vnet/fib/fib_path_list.c
+++ b/src/vnet/fib/fib_path_list.c
@@ -680,6 +680,16 @@
                      fib_path_create(path_list_index,
                                      &rpaths[i]));
         }
+        /*
+         * we sort the paths since the key for the path-list is
+         * the description of the paths it contains. The paths need to
+         * be sorted else this description will differ.
+         */
+        if (vec_len(path_list->fpl_paths) > 1)
+        {
+            vec_sort_with_function(path_list->fpl_paths,
+                                   fib_path_cmp_for_sort);
+        }
     }
 
     /*
diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c
index 06aeda6..720f54e 100644
--- a/src/vnet/fib/fib_test.c
+++ b/src/vnet/fib/fib_test.c
@@ -604,9 +604,10 @@
                        bucket,
                        format_dpo_type, dpo->dpoi_type);
 	    FIB_TEST_LB((exp->lb.lb == dpo->dpoi_index),
-			"bucket %d stacks on lb %d",
+			"bucket %d stacks on lb %d not %d",
 			bucket,
-			exp->lb.lb);
+			dpo->dpoi_index,
+                        exp->lb.lb);
 	    break;
 	case FT_LB_SPECIAL:
 	    FIB_TEST_I((DPO_DROP == dpo->dpoi_type),
@@ -1482,19 +1483,20 @@
 	},
     };
 
-    fib_table_entry_path_add(fib_index,
-			     &bgp_200_pfx,
-			     FIB_SOURCE_API,
-			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
-			     &pfx_1_1_1_2_s_32.fp_addr,
-			     ~0, // no index provided.
-			     fib_index, // nexthop in same fib as route
-			     1,
-			     NULL,
-			     FIB_ROUTE_PATH_FLAG_NONE);
+    fei = fib_table_entry_path_add(fib_index,
+                                   &bgp_200_pfx,
+                                   FIB_SOURCE_API,
+                                   FIB_ENTRY_FLAG_NONE,
+                                   FIB_PROTOCOL_IP4,
+                                   &pfx_1_1_1_2_s_32.fp_addr,
+                                   ~0, // no index provided.
+                                   fib_index, // nexthop in same fib as route
+                                   1,
+                                   NULL,
+                                   FIB_ROUTE_PATH_FLAG_NONE);
 
-    FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32, 0);
+    FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+             "Recursive via unresolved is drop");
 
     /*
      * the adj should be recursive via drop, since the route resolves via
@@ -2101,19 +2103,20 @@
 	},
     };
 
-    fib_table_entry_path_add(fib_index,
-			     &bgp_201_pfx,
-			     FIB_SOURCE_API,
-			     FIB_ENTRY_FLAG_NONE,
-			     FIB_PROTOCOL_IP4,
-			     &pfx_1_1_1_200_s_32.fp_addr,
-			     ~0, // no index provided.
-			     fib_index, // nexthop in same fib as route
-			     1,
-			     NULL,
-			     FIB_ROUTE_PATH_FLAG_NONE);
+    fei = fib_table_entry_path_add(fib_index,
+                                   &bgp_201_pfx,
+                                   FIB_SOURCE_API,
+                                   FIB_ENTRY_FLAG_NONE,
+                                   FIB_PROTOCOL_IP4,
+                                   &pfx_1_1_1_200_s_32.fp_addr,
+                                   ~0, // no index provided.
+                                   fib_index, // nexthop in same fib as route
+                                   1,
+                                   NULL,
+                                   FIB_ROUTE_PATH_FLAG_NONE);
 
-    FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32, 0);
+    FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+             "Recursive via unresolved is drop");
 
     fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_200_s_32);
     FIB_TEST((FIB_ENTRY_FLAG_NONE == fib_entry_get_flags(fei)),
@@ -2285,12 +2288,16 @@
     fei = fib_table_lookup(fib_index, &pfx_1_1_1_2_s_32);
     FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
 	     "1.1.1.2/32 route is DROP");
-    fei = fib_table_lookup(fib_index, &pfx_1_1_1_200_s_32);
+    fei = fib_table_lookup_exact_match(fib_index, &pfx_1_1_1_200_s_32);
     FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
 	     "1.1.1.200/32 route is DROP");
 
-    FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32, 0);
-    FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32, 0);
+    fei = fib_table_lookup_exact_match(fib_index, &bgp_201_pfx);
+    FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+             "201 is drop");
+    fei = fib_table_lookup_exact_match(fib_index, &bgp_200_pfx);
+    FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+             "200 is drop");
 
     /*
      * -1 entry
@@ -2320,7 +2327,9 @@
     ai = fib_entry_get_adj(fei);
     FIB_TEST((ai = ai_01), "1.1.1.2/32 resolves via 10.10.10.1");
 
-    FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_200_s_32, 0);
+    fei = fib_table_lookup_exact_match(fib_index, &bgp_201_pfx);
+    FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
+             "201 is drop");
     FIB_TEST_REC_FORW(&bgp_200_pfx, &pfx_1_1_1_2_s_32, 0);
 
     /*
@@ -2334,6 +2343,33 @@
 	     fib_entry_pool_size());
 
     /*
+     * give 201 a resolved path.
+     *  it now has the unresolved 1.1.1.200 and the resolved 1.1.1.2,
+     *  only the latter contributes forwarding.
+     */
+    fei = fib_table_entry_path_add(fib_index,
+                                   &bgp_201_pfx,
+                                   FIB_SOURCE_API,
+                                   FIB_ENTRY_FLAG_NONE,
+                                   FIB_PROTOCOL_IP4,
+                                   &pfx_1_1_1_2_s_32.fp_addr,
+                                   ~0,
+                                   fib_index,
+                                   1,
+                                   NULL,
+                                   FIB_ROUTE_PATH_FLAG_NONE);
+    FIB_TEST_REC_FORW(&bgp_201_pfx, &pfx_1_1_1_2_s_32, 0);
+    fib_table_entry_path_remove(fib_index,
+                                &bgp_201_pfx,
+                                FIB_SOURCE_API,
+                                FIB_PROTOCOL_IP4,
+                                &pfx_1_1_1_2_s_32.fp_addr,
+                                ~0,
+                                fib_index,
+                                1,
+                                FIB_ROUTE_PATH_FLAG_NONE);
+
+    /*
      * remove 200.200.200.201/32 which does not have a valid via FIB
      */
     fib_table_entry_path_remove(fib_index,
@@ -3609,6 +3645,17 @@
 	},
     };
     fei = fib_table_entry_path_add(fib_index,
+                                   &pfx_34_34_1_1_s_32,
+                                   FIB_SOURCE_API,
+                                   FIB_ENTRY_FLAG_NONE,
+                                   FIB_PROTOCOL_IP4,
+                                   &nh_10_10_10_1,
+                                   tm->hw[0]->sw_if_index,
+                                   0,
+                                   1,
+                                   NULL,
+                                   FIB_ROUTE_PATH_FLAG_NONE);
+    fei = fib_table_entry_path_add(fib_index,
                                    &pfx_34_1_1_1_s_32,
                                    FIB_SOURCE_API,
                                    FIB_ENTRY_FLAG_NONE,
@@ -3632,6 +3679,9 @@
                                    FIB_ROUTE_PATH_FLAG_NONE);
     FIB_TEST_REC_FORW(&pfx_34_1_1_1_s_32, &pfx_34_34_1_1_s_32, 0);
     fib_table_entry_delete_index(fei, FIB_SOURCE_API);
+    fib_table_entry_delete(fib_index,
+                           &pfx_34_34_1_1_s_32,
+                           FIB_SOURCE_API);
 
     /*
      * CLEANUP
@@ -5554,6 +5604,459 @@
     return (0);
 }
 
+/*
+ * Test Path Preference
+ */
+static int
+fib_test_pref (void)
+{
+    test_main_t *tm = &test_main;
+
+    const fib_prefix_t pfx_1_1_1_1_s_32 = {
+        .fp_len = 32,
+        .fp_proto = FIB_PROTOCOL_IP4,
+        .fp_addr = {
+            .ip4 = {
+                .as_u32 = clib_host_to_net_u32(0x01010101),
+            },
+        },
+    };
+
+    /*
+     * 2 high, 2 medium and 2 low preference non-recursive paths
+     */
+    fib_route_path_t nr_path_hi_1 = {
+        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_sw_if_index = tm->hw[0]->sw_if_index,
+        .frp_fib_index = ~0,
+        .frp_weight = 1,
+        .frp_preference = 0,
+        .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+        .frp_addr = {
+            .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01),
+        },
+    };
+    fib_route_path_t nr_path_hi_2 = {
+        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_sw_if_index = tm->hw[0]->sw_if_index,
+        .frp_fib_index = ~0,
+        .frp_weight = 1,
+        .frp_preference = 0,
+        .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+        .frp_addr = {
+            .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02),
+        },
+    };
+    fib_route_path_t nr_path_med_1 = {
+        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_sw_if_index = tm->hw[1]->sw_if_index,
+        .frp_fib_index = ~0,
+        .frp_weight = 1,
+        .frp_preference = 1,
+        .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+        .frp_addr = {
+            .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0c01),
+        },
+    };
+    fib_route_path_t nr_path_med_2 = {
+        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_sw_if_index = tm->hw[1]->sw_if_index,
+        .frp_fib_index = ~0,
+        .frp_weight = 1,
+        .frp_preference = 1,
+        .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+        .frp_addr = {
+            .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0c01),
+        },
+    };
+    fib_route_path_t nr_path_low_1 = {
+        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_sw_if_index = tm->hw[2]->sw_if_index,
+        .frp_fib_index = ~0,
+        .frp_weight = 1,
+        .frp_preference = 2,
+        .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+        .frp_addr = {
+            .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0b01),
+        },
+    };
+    fib_route_path_t nr_path_low_2 = {
+        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_sw_if_index = tm->hw[2]->sw_if_index,
+        .frp_fib_index = ~0,
+        .frp_weight = 1,
+        .frp_preference = 2,
+        .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+        .frp_addr = {
+            .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0b02),
+        },
+    };
+    fib_route_path_t *nr_paths = NULL;
+
+    vec_add1(nr_paths, nr_path_hi_1);
+    vec_add1(nr_paths, nr_path_hi_2);
+    vec_add1(nr_paths, nr_path_med_1);
+    vec_add1(nr_paths, nr_path_med_2);
+    vec_add1(nr_paths, nr_path_low_1);
+    vec_add1(nr_paths, nr_path_low_2);
+
+    adj_index_t ai_hi_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+                                              VNET_LINK_IP4,
+                                              &nr_path_hi_1.frp_addr,
+                                              nr_path_hi_1.frp_sw_if_index);
+    adj_index_t ai_hi_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+                                              VNET_LINK_IP4,
+                                              &nr_path_hi_2.frp_addr,
+                                              nr_path_hi_2.frp_sw_if_index);
+    adj_index_t ai_med_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+                                               VNET_LINK_IP4,
+                                               &nr_path_med_1.frp_addr,
+                                               nr_path_med_1.frp_sw_if_index);
+    adj_index_t ai_med_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+                                               VNET_LINK_IP4,
+                                               &nr_path_med_2.frp_addr,
+                                               nr_path_med_2.frp_sw_if_index);
+    adj_index_t ai_low_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+                                               VNET_LINK_IP4,
+                                               &nr_path_low_1.frp_addr,
+                                               nr_path_low_1.frp_sw_if_index);
+    adj_index_t ai_low_2 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
+                                               VNET_LINK_IP4,
+                                               &nr_path_low_2.frp_addr,
+                                               nr_path_low_2.frp_sw_if_index);
+
+    fib_test_lb_bucket_t ip_hi_1 = {
+        .type = FT_LB_ADJ,
+        .adj = {
+            .adj = ai_hi_1,
+        },
+    };
+    fib_test_lb_bucket_t ip_hi_2 = {
+        .type = FT_LB_ADJ,
+        .adj = {
+            .adj = ai_hi_2,
+        },
+    };
+    fib_test_lb_bucket_t ip_med_1 = {
+        .type = FT_LB_ADJ,
+        .adj = {
+            .adj = ai_med_1,
+        },
+    };
+    fib_test_lb_bucket_t ip_med_2 = {
+        .type = FT_LB_ADJ,
+        .adj = {
+            .adj = ai_med_2,
+        },
+    };
+    fib_test_lb_bucket_t ip_low_1 = {
+        .type = FT_LB_ADJ,
+        .adj = {
+            .adj = ai_low_1,
+        },
+    };
+    fib_test_lb_bucket_t ip_low_2 = {
+        .type = FT_LB_ADJ,
+        .adj = {
+            .adj = ai_low_2,
+        },
+    };
+
+    fib_node_index_t fei;
+
+    fei = fib_table_entry_path_add2(0,
+                                    &pfx_1_1_1_1_s_32,
+                                    FIB_SOURCE_API,
+                                    FIB_ENTRY_FLAG_NONE,
+                                    nr_paths);
+
+    FIB_TEST(fib_test_validate_entry(fei,
+                                     FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+                                     2,
+                                     &ip_hi_1,
+                                     &ip_hi_2),
+             "1.1.1.1/32 via high preference paths");
+
+    /*
+     * bring down the interface on which the high preference path lie
+     */
+    vnet_sw_interface_set_flags(vnet_get_main(),
+                                tm->hw[0]->sw_if_index,
+                                0);
+
+    FIB_TEST(fib_test_validate_entry(fei,
+                                     FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+                                     2,
+                                     &ip_med_1,
+                                     &ip_med_2),
+             "1.1.1.1/32 via medium preference paths");
+
+    /*
+     * bring down the interface on which the medium preference path lie
+     */
+    vnet_sw_interface_set_flags(vnet_get_main(),
+                                tm->hw[1]->sw_if_index,
+                                0);
+
+    FIB_TEST(fib_test_validate_entry(fei,
+                                     FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+                                     2,
+                                     &ip_low_1,
+                                     &ip_low_2),
+             "1.1.1.1/32 via low preference paths");
+
+    /*
+     * bring up the interface on which the high preference path lie
+     */
+    vnet_sw_interface_set_flags(vnet_get_main(),
+                                tm->hw[0]->sw_if_index,
+                                VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+    FIB_TEST(fib_test_validate_entry(fei,
+                                     FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+                                     2,
+                                     &ip_hi_1,
+                                     &ip_hi_2),
+             "1.1.1.1/32 via high preference paths");
+
+    /*
+     * bring up the interface on which the medium preference path lie
+     */
+    vnet_sw_interface_set_flags(vnet_get_main(),
+                                tm->hw[1]->sw_if_index,
+                                VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+    FIB_TEST(fib_test_validate_entry(fei,
+                                     FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+                                     2,
+                                     &ip_hi_1,
+                                     &ip_hi_2),
+             "1.1.1.1/32 via high preference paths");
+
+    dpo_id_t ip_1_1_1_1 = DPO_INVALID;
+    fib_entry_contribute_forwarding(fei,
+                                    FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+                                    &ip_1_1_1_1);
+
+    /*
+     * 3 recursive paths of different preference
+     */
+    const fib_prefix_t pfx_1_1_1_2_s_32 = {
+        .fp_len = 32,
+        .fp_proto = FIB_PROTOCOL_IP4,
+        .fp_addr = {
+            .ip4 = {
+                .as_u32 = clib_host_to_net_u32(0x01010102),
+            },
+        },
+    };
+    const fib_prefix_t pfx_1_1_1_3_s_32 = {
+        .fp_len = 32,
+        .fp_proto = FIB_PROTOCOL_IP4,
+        .fp_addr = {
+            .ip4 = {
+                .as_u32 = clib_host_to_net_u32(0x01010103),
+            },
+        },
+    };
+    fei = fib_table_entry_path_add2(0,
+                                    &pfx_1_1_1_2_s_32,
+                                    FIB_SOURCE_API,
+                                    FIB_ENTRY_FLAG_NONE,
+                                    nr_paths);
+    dpo_id_t ip_1_1_1_2 = DPO_INVALID;
+    fib_entry_contribute_forwarding(fei,
+                                    FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+                                    &ip_1_1_1_2);
+    fei = fib_table_entry_path_add2(0,
+                                    &pfx_1_1_1_3_s_32,
+                                    FIB_SOURCE_API,
+                                    FIB_ENTRY_FLAG_NONE,
+                                    nr_paths);
+    dpo_id_t ip_1_1_1_3 = DPO_INVALID;
+    fib_entry_contribute_forwarding(fei,
+                                    FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+                                    &ip_1_1_1_3);
+
+    fib_test_lb_bucket_t ip_o_1_1_1_1 = {
+        .type = FT_LB_O_LB,
+        .lb = {
+            .lb = ip_1_1_1_1.dpoi_index,
+        },
+    };
+    fib_test_lb_bucket_t ip_o_1_1_1_2 = {
+        .type = FT_LB_O_LB,
+        .lb = {
+            .lb = ip_1_1_1_2.dpoi_index,
+        },
+    };
+    fib_test_lb_bucket_t ip_o_1_1_1_3 = {
+        .type = FT_LB_O_LB,
+        .lb = {
+            .lb = ip_1_1_1_3.dpoi_index,
+        },
+    };
+    fib_route_path_t r_path_hi = {
+        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_sw_if_index = ~0,
+        .frp_fib_index = 0,
+        .frp_weight = 1,
+        .frp_preference = 0,
+        .frp_flags = FIB_ROUTE_PATH_RESOLVE_VIA_HOST,
+        .frp_addr = pfx_1_1_1_1_s_32.fp_addr,
+    };
+    fib_route_path_t r_path_med = {
+        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_sw_if_index = ~0,
+        .frp_fib_index = 0,
+        .frp_weight = 1,
+        .frp_preference = 10,
+        .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+        .frp_addr = pfx_1_1_1_2_s_32.fp_addr,
+    };
+    fib_route_path_t r_path_low = {
+        .frp_proto = FIB_PROTOCOL_IP4,
+        .frp_sw_if_index = ~0,
+        .frp_fib_index = 0,
+        .frp_weight = 1,
+        .frp_preference = 1000,
+        .frp_flags = FIB_ROUTE_PATH_RESOLVE_VIA_HOST,
+        .frp_addr = pfx_1_1_1_3_s_32.fp_addr,
+    };
+    fib_route_path_t *r_paths = NULL;
+
+    vec_add1(r_paths, r_path_hi);
+    vec_add1(r_paths, r_path_low);
+    vec_add1(r_paths, r_path_med);
+
+    /*
+     * add many recursive so we get the LB MAp created
+     */
+    #define N_PFXS 64
+    fib_prefix_t pfx_r[N_PFXS];
+    uint32_t n_pfxs;
+    for (n_pfxs = 0; n_pfxs < N_PFXS; n_pfxs++)
+    {
+        pfx_r[n_pfxs].fp_len = 32;
+        pfx_r[n_pfxs].fp_proto = FIB_PROTOCOL_IP4;
+        pfx_r[n_pfxs].fp_addr.ip4.as_u32 =
+            clib_host_to_net_u32(0x02000000 + n_pfxs);
+
+        fei = fib_table_entry_path_add2(0,
+                                        &pfx_r[n_pfxs],
+                                        FIB_SOURCE_API,
+                                        FIB_ENTRY_FLAG_NONE,
+                                        r_paths);
+
+        FIB_TEST(fib_test_validate_entry(fei,
+                                         FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+                                         1,
+                                         &ip_o_1_1_1_1),
+                 "recursive via high preference paths");
+
+        /*
+         * withdraw hig pref resolving entry
+         */
+        fib_table_entry_delete(0,
+                               &pfx_1_1_1_1_s_32,
+                               FIB_SOURCE_API);
+
+        /* suspend so the update walk kicks int */
+        vlib_process_suspend(vlib_get_main(), 1e-5);
+
+        FIB_TEST(fib_test_validate_entry(fei,
+                                         FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+                                         1,
+                                         &ip_o_1_1_1_2),
+                 "recursive via medium preference paths");
+
+        /*
+         * withdraw medium pref resolving entry
+         */
+        fib_table_entry_delete(0,
+                               &pfx_1_1_1_2_s_32,
+                               FIB_SOURCE_API);
+
+        /* suspend so the update walk kicks int */
+        vlib_process_suspend(vlib_get_main(), 1e-5);
+
+        FIB_TEST(fib_test_validate_entry(fei,
+                                         FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+                                         1,
+                                         &ip_o_1_1_1_3),
+                 "recursive via low preference paths");
+
+        /*
+         * add back paths for next iteration
+         */
+        fei = fib_table_entry_update(0,
+                                     &pfx_1_1_1_2_s_32,
+                                     FIB_SOURCE_API,
+                                     FIB_ENTRY_FLAG_NONE,
+                                     nr_paths);
+        fei = fib_table_entry_update(0,
+                                     &pfx_1_1_1_1_s_32,
+                                     FIB_SOURCE_API,
+                                     FIB_ENTRY_FLAG_NONE,
+                                     nr_paths);
+
+        /* suspend so the update walk kicks int */
+        vlib_process_suspend(vlib_get_main(), 1e-5);
+
+        fei = fib_table_lookup_exact_match(0, &pfx_r[n_pfxs]);
+        FIB_TEST(fib_test_validate_entry(fei,
+                                         FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+                                         1,
+                                         &ip_o_1_1_1_1),
+                 "recursive via high preference paths");
+    }
+
+
+    fib_table_entry_delete(0,
+                           &pfx_1_1_1_1_s_32,
+                           FIB_SOURCE_API);
+
+    /* suspend so the update walk kicks int */
+    vlib_process_suspend(vlib_get_main(), 1e-5);
+
+    for (n_pfxs = 0; n_pfxs < N_PFXS; n_pfxs++)
+    {
+        fei = fib_table_lookup_exact_match(0, &pfx_r[n_pfxs]);
+
+        FIB_TEST(fib_test_validate_entry(fei,
+                                         FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
+                                         1,
+                                         &ip_o_1_1_1_2),
+                 "recursive via medium preference paths");
+    }
+    for (n_pfxs = 0; n_pfxs < N_PFXS; n_pfxs++)
+    {
+        fib_table_entry_delete(0,
+                               &pfx_r[n_pfxs],
+                               FIB_SOURCE_API);
+    }
+
+    /*
+     * Cleanup
+     */
+    fib_table_entry_delete(0,
+                           &pfx_1_1_1_2_s_32,
+                           FIB_SOURCE_API);
+    fib_table_entry_delete(0,
+                           &pfx_1_1_1_3_s_32,
+                           FIB_SOURCE_API);
+
+    dpo_reset(&ip_1_1_1_1);
+    dpo_reset(&ip_1_1_1_2);
+    dpo_reset(&ip_1_1_1_3);
+    adj_unlock(ai_low_2);
+    adj_unlock(ai_low_1);
+    adj_unlock(ai_med_2);
+    adj_unlock(ai_med_1);
+    adj_unlock(ai_hi_2);
+    adj_unlock(ai_hi_1);
+    return (0);
+}
 
 /*
  * Test the recursive route route handling for GRE tunnels
@@ -7996,8 +8499,8 @@
     FIB_TEST(fib_test_validate_entry(lfe,
     				     FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
     				     1,
-    				     &ip_o_1200),
-    	     "2.2.2.2.4/32 LB 1 buckets via: label 1200 EOS");
+    				     &bucket_drop),
+    	     "2.2.2.2.4/32 LB 1 buckets via: drop");
     lfe = fib_table_lookup(fib_index, &pfx_1200);
     FIB_TEST(fib_test_validate_entry(lfe,
 				     FIB_FORW_CHAIN_TYPE_UNICAST_IP4,
@@ -8199,6 +8702,10 @@
     {
 	res += fib_test_ae();
     }
+    else if (unformat (input, "pref"))
+    {
+	res += fib_test_pref();
+    }
     else if (unformat (input, "lfib"))
     {
 	res += lfib_test();
@@ -8217,6 +8724,7 @@
 	res += fib_test_v6();
 	res += fib_test_ae();
 	res += fib_test_bfd();
+	res += fib_test_pref();
 	res += fib_test_label();
 	res += lfib_test();
 
diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h
index 60873c4..a7a23d7 100644
--- a/src/vnet/fib/fib_types.h
+++ b/src/vnet/fib/fib_types.h
@@ -380,7 +380,13 @@
     /**
      * [un]equal cost path weight
      */
-    u32 frp_weight;
+    u16 frp_weight;
+    /**
+     * A path preference. 0 is the best.
+     * Only paths of the best preference, that are 'up', are considered
+     * for forwarding.
+     */
+    u16 frp_preference;
     /**
      * flags on the path
      */
diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api
index 7097a13..fa36337 100644
--- a/src/vnet/ip/ip.api
+++ b/src/vnet/ip/ip.api
@@ -31,6 +31,7 @@
 /** \brief FIB path
     @param sw_if_index - index of the interface
     @param weight - The weight, for UCMP
+    @param preference - The preference of the path. lowest preference is prefered
     @param is_local - local if non-zero, else remote
     @param is_drop - Drop the packet
     @param is_unreach - Drop the packet and rate limit send ICMP unreachable
@@ -43,7 +44,8 @@
 typeonly manual_print manual_endian define fib_path
 {
   u32 sw_if_index;
-  u32 weight;
+  u8 weight;
+  u8 preference;
   u8 is_local;
   u8 is_drop;
   u8 is_unreach;
@@ -374,6 +376,7 @@
   /* Is last/not-last message in group of multiple add/del messages. */
   u8 not_last;
   u8 next_hop_weight;
+  u8 next_hop_preference;
   u8 dst_address_length;
   u8 dst_address[16];
   u8 next_hop_address[16];
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
index 2680d60..d00ae76 100644
--- a/src/vnet/ip/ip_api.c
+++ b/src/vnet/ip/ip_api.c
@@ -716,7 +716,8 @@
 			 const ip46_address_t * next_hop,
 			 u32 next_hop_sw_if_index,
 			 u8 next_hop_fib_index,
-			 u32 next_hop_weight,
+			 u16 next_hop_weight,
+			 u16 next_hop_preference,
 			 mpls_label_t next_hop_via_label,
 			 mpls_label_t * next_hop_out_label_stack)
 {
@@ -729,6 +730,7 @@
     .frp_sw_if_index = next_hop_sw_if_index,
     .frp_fib_index = next_hop_fib_index,
     .frp_weight = next_hop_weight,
+    .frp_preference = next_hop_preference,
     .frp_label_stack = next_hop_out_label_stack,
   };
   fib_route_path_t *paths = NULL;
@@ -971,6 +973,7 @@
 				   ntohl (mp->next_hop_sw_if_index),
 				   next_hop_fib_index,
 				   mp->next_hop_weight,
+				   mp->next_hop_preference,
 				   ntohl (mp->next_hop_via_label),
 				   label_stack));
 }
@@ -1029,6 +1032,7 @@
 				   &nh, ntohl (mp->next_hop_sw_if_index),
 				   next_hop_fib_index,
 				   mp->next_hop_weight,
+				   mp->next_hop_preference,
 				   ntohl (mp->next_hop_via_label),
 				   label_stack));
 }
diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c
index 6547cad..533d010 100755
--- a/src/vnet/ip/lookup.c
+++ b/src/vnet/ip/lookup.c
@@ -360,6 +360,7 @@
   fib_prefix_t *prefixs = NULL, pfx;
   mpls_label_t out_label, via_label;
   clib_error_t *error = NULL;
+  u32 weight, preference;
   u32 table_id, is_del;
   vnet_main_t *vnm;
   u32 fib_index;
@@ -441,26 +442,6 @@
 	  pfx.fp_proto = FIB_PROTOCOL_IP6;
 	  vec_add1 (prefixs, pfx);
 	}
-      else if (unformat (line_input, "via %U %U weight %u",
-			 unformat_ip4_address,
-			 &rpath.frp_addr.ip4,
-			 unformat_vnet_sw_interface, vnm,
-			 &rpath.frp_sw_if_index, &rpath.frp_weight))
-	{
-	  rpath.frp_proto = FIB_PROTOCOL_IP4;
-	  vec_add1 (rpaths, rpath);
-	}
-
-      else if (unformat (line_input, "via %U %U weight %u",
-			 unformat_ip6_address,
-			 &rpath.frp_addr.ip6,
-			 unformat_vnet_sw_interface, vnm,
-			 &rpath.frp_sw_if_index, &rpath.frp_weight))
-	{
-	  rpath.frp_proto = FIB_PROTOCOL_IP6;
-	  vec_add1 (rpaths, rpath);
-	}
-
       else if (unformat (line_input, "via %U %U",
 			 unformat_ip4_address,
 			 &rpath.frp_addr.ip4,
@@ -482,6 +463,16 @@
 	  rpath.frp_proto = FIB_PROTOCOL_IP6;
 	  vec_add1 (rpaths, rpath);
 	}
+      else if (unformat (line_input, "weight %u", &weight))
+	{
+	  ASSERT (vec_len (rpaths));
+	  rpaths[vec_len (rpaths) - 1].frp_weight = weight;
+	}
+      else if (unformat (line_input, "preference %u", &preference))
+	{
+	  ASSERT (vec_len (rpaths));
+	  rpaths[vec_len (rpaths) - 1].frp_preference = preference;
+	}
       else if (unformat (line_input, "via %U next-hop-table %d",
 			 unformat_ip4_address,
 			 &rpath.frp_addr.ip4, &rpath.frp_fib_index))
diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api
index c8a3ffb..61d7fe6 100644
--- a/src/vnet/mpls/mpls.api
+++ b/src/vnet/mpls/mpls.api
@@ -48,6 +48,7 @@
     @param mt_is_multicast - Is the tunnel's underlying LSP multicast
     @param mt_next_hop_proto_is_ip4 - The next-hop is IPV4
     @param mt_next_hop_weight - The weight, for UCMP
+    @param mt_next_hop_preference - The preference
     @param mt_next_hop[16] - the nextop address
     @param mt_next_hop_sw_if_index - the next-hop SW interface
     @param mt_next_hop_table_id - the next-hop table-id (if appropriate)
@@ -64,6 +65,7 @@
   u8 mt_is_multicast;
   u8 mt_next_hop_proto_is_ip4;
   u8 mt_next_hop_weight;
+  u8 mt_next_hop_preference;
   u8 mt_next_hop[16];
   u8 mt_next_hop_n_out_labels;
   u32 mt_next_hop_sw_if_index;
@@ -181,6 +183,7 @@
   u8 mr_is_rpf_id;
   u8 mr_next_hop_proto_is_ip4;
   u8 mr_next_hop_weight;
+  u8 mr_next_hop_preference;
   u8 mr_next_hop[16];
   u8 mr_next_hop_n_out_labels;
   u32 mr_next_hop_sw_if_index;
diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c
index 6bfc491..22fb7d4 100644
--- a/src/vnet/mpls/mpls_api.c
+++ b/src/vnet/mpls/mpls_api.c
@@ -206,6 +206,7 @@
 				   &nh, ntohl (mp->mr_next_hop_sw_if_index),
 				   next_hop_fib_index,
 				   mp->mr_next_hop_weight,
+				   mp->mr_next_hop_preference,
 				   ntohl (mp->mr_next_hop_via_label),
 				   label_stack));
 }