MTRIE Optimisations 2
1) 16-8-8 stride. Reduce trie depth walk traded with increased memory in the top PLY.
2) separate the vector of protocol-independent (PI) fib_table_t with the vector of protocol dependent (PD) FIBs. PD FIBs are large structures, we don't want to burn the memory for ech PD type
3) Go straight to the PD FIB in the data-path thus avoiding an indirection through, e.g., a PLY pool.
Change-Id: I800d1ed0b2049040d5da95213f3ed6b12bdd78b7
Signed-off-by: Neale Ranns <nranns@cisco.com>
diff --git a/src/vnet/fib/fib.c b/src/vnet/fib/fib.c
index 413f93e..b430e11 100644
--- a/src/vnet/fib/fib.c
+++ b/src/vnet/fib/fib.c
@@ -28,6 +28,8 @@
return (error);
if ((error = vlib_call_init_function (vm, adj_module_init)))
return (error);
+ if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
+ return (error);
fib_entry_module_init();
fib_entry_src_module_init();
diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c
index 25005e1..6ac5461 100644
--- a/src/vnet/fib/fib_entry.c
+++ b/src/vnet/fib/fib_entry.c
@@ -924,10 +924,10 @@
/*
* no more sources left. this entry is toast.
*/
- fib_entry_src_action_uninstall(fib_entry);
fib_entry = fib_entry_post_flag_update_actions(fib_entry,
source,
bflags);
+ fib_entry_src_action_uninstall(fib_entry);
return (FIB_ENTRY_SRC_FLAG_NONE);
}
@@ -1014,10 +1014,10 @@
/*
* no more sources left. this entry is toast.
*/
- fib_entry_src_action_uninstall(fib_entry);
fib_entry = fib_entry_post_flag_update_actions(fib_entry,
source,
bflags);
+ fib_entry_src_action_uninstall(fib_entry);
return (FIB_ENTRY_SRC_FLAG_NONE);
}
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
index 3ed309f..928a9d4 100644
--- a/src/vnet/fib/fib_path.c
+++ b/src/vnet/fib/fib_path.c
@@ -32,6 +32,7 @@
#include <vnet/fib/fib_path_list.h>
#include <vnet/fib/fib_internal.h>
#include <vnet/fib/fib_urpf_list.h>
+#include <vnet/fib/mpls_fib.h>
/**
* Enurmeration of path types
diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c
index 7818d02..6c3162e 100644
--- a/src/vnet/fib/fib_table.c
+++ b/src/vnet/fib/fib_table.c
@@ -47,7 +47,7 @@
switch (prefix->fp_proto)
{
case FIB_PROTOCOL_IP4:
- return (ip4_fib_table_lookup(&fib_table->v4,
+ return (ip4_fib_table_lookup(ip4_fib_get(fib_table->ft_index),
&prefix->fp_addr.ip4,
prefix->fp_len));
case FIB_PROTOCOL_IP6:
@@ -55,7 +55,7 @@
&prefix->fp_addr.ip6,
prefix->fp_len));
case FIB_PROTOCOL_MPLS:
- return (mpls_fib_table_lookup(&fib_table->mpls,
+ return (mpls_fib_table_lookup(mpls_fib_get(fib_table->ft_index),
prefix->fp_label,
prefix->fp_eos));
}
@@ -76,7 +76,7 @@
switch (prefix->fp_proto)
{
case FIB_PROTOCOL_IP4:
- return (ip4_fib_table_lookup_exact_match(&fib_table->v4,
+ return (ip4_fib_table_lookup_exact_match(ip4_fib_get(fib_table->ft_index),
&prefix->fp_addr.ip4,
prefix->fp_len));
case FIB_PROTOCOL_IP6:
@@ -84,7 +84,7 @@
&prefix->fp_addr.ip6,
prefix->fp_len));
case FIB_PROTOCOL_MPLS:
- return (mpls_fib_table_lookup(&fib_table->mpls,
+ return (mpls_fib_table_lookup(mpls_fib_get(fib_table->ft_index),
prefix->fp_label,
prefix->fp_eos));
}
@@ -148,7 +148,7 @@
switch (prefix->fp_proto)
{
case FIB_PROTOCOL_IP4:
- ip4_fib_table_entry_remove(&fib_table->v4,
+ ip4_fib_table_entry_remove(ip4_fib_get(fib_table->ft_index),
&prefix->fp_addr.ip4,
prefix->fp_len);
break;
@@ -158,7 +158,7 @@
prefix->fp_len);
break;
case FIB_PROTOCOL_MPLS:
- mpls_fib_table_entry_remove(&fib_table->mpls,
+ mpls_fib_table_entry_remove(mpls_fib_get(fib_table->ft_index),
prefix->fp_label,
prefix->fp_eos);
break;
@@ -208,7 +208,7 @@
switch (prefix->fp_proto)
{
case FIB_PROTOCOL_IP4:
- ip4_fib_table_entry_insert(&fib_table->v4,
+ ip4_fib_table_entry_insert(ip4_fib_get(fib_table->ft_index),
&prefix->fp_addr.ip4,
prefix->fp_len,
fib_entry_index);
@@ -220,7 +220,7 @@
fib_entry_index);
break;
case FIB_PROTOCOL_MPLS:
- mpls_fib_table_entry_insert(&fib_table->mpls,
+ mpls_fib_table_entry_insert(mpls_fib_get(fib_table->ft_index),
prefix->fp_label,
prefix->fp_eos,
fib_entry_index);
@@ -270,7 +270,9 @@
return (ip4_fib_table_fwding_dpo_remove(ip4_fib_get(fib_index),
&prefix->fp_addr.ip4,
prefix->fp_len,
- dpo));
+ dpo,
+ fib_table_get_less_specific(fib_index,
+ prefix)));
case FIB_PROTOCOL_IP6:
return (ip6_fib_table_fwding_dpo_remove(fib_index,
&prefix->fp_addr.ip6,
@@ -1034,13 +1036,13 @@
switch (fib_table->ft_proto)
{
case FIB_PROTOCOL_IP4:
- ip4_fib_table_destroy(&fib_table->v4);
+ ip4_fib_table_destroy(fib_table->ft_index);
break;
case FIB_PROTOCOL_IP6:
ip6_fib_table_destroy(fib_table->ft_index);
break;
case FIB_PROTOCOL_MPLS:
- mpls_fib_table_destroy(&fib_table->mpls);
+ mpls_fib_table_destroy(fib_table->ft_index);
break;
}
}
diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h
index e7e66ac..b310aea 100644
--- a/src/vnet/fib/fib_table.h
+++ b/src/vnet/fib/fib_table.h
@@ -29,18 +29,6 @@
typedef struct fib_table_t_
{
/**
- * A union of the protocol specific FIBs that provide the
- * underlying LPM mechanism.
- * This element is first in the struct so that it is in the
- * first cache line.
- */
- union {
- ip4_fib_t v4;
- ip6_fib_t v6;
- mpls_fib_t mpls;
- };
-
- /**
* Which protocol this table serves. Used to switch on the union above.
*/
fib_protocol_t ft_proto;
diff --git a/src/vnet/fib/fib_test.c b/src/vnet/fib/fib_test.c
index 1a9cce2..92141dd 100644
--- a/src/vnet/fib/fib_test.c
+++ b/src/vnet/fib/fib_test.c
@@ -40,8 +40,6 @@
fformat(stderr, "FAIL:%d: " _comment "\n", \
__LINE__, ##_args); \
} else { \
- fformat(stderr, "PASS:%d: " _comment "\n", \
- __LINE__, ##_args); \
} \
_evald; \
})
@@ -5727,7 +5725,7 @@
&a_o_10_10_11_1,
&adj_o_10_10_11_2),
"1.1.1.1/32 LB 2 buckets via: "
- "adj over 10.10.11.1",
+ "adj over 10.10.11.1, "
"adj-v4 over 10.10.11.2");
fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
@@ -5738,7 +5736,7 @@
&a_o_10_10_11_1,
&adj_o_10_10_11_2),
"24001/eos LB 2 buckets via: "
- "adj over 10.10.11.1",
+ "adj over 10.10.11.1, "
"adj-v4 over 10.10.11.2");
fei = fib_table_lookup(MPLS_FIB_DEFAULT_TABLE_ID,
diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c
index a791562..98d4e52 100644
--- a/src/vnet/fib/ip4_fib.c
+++ b/src/vnet/fib/ip4_fib.c
@@ -104,29 +104,35 @@
ip4_create_fib_with_table_id (u32 table_id)
{
fib_table_t *fib_table;
+ ip4_fib_t *v4_fib;
pool_get_aligned(ip4_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES);
memset(fib_table, 0, sizeof(*fib_table));
+ pool_get_aligned(ip4_main.v4_fibs, v4_fib, CLIB_CACHE_LINE_BYTES);
+
+ ASSERT((fib_table - ip4_main.fibs) ==
+ (v4_fib - ip4_main.v4_fibs));
+
fib_table->ft_proto = FIB_PROTOCOL_IP4;
fib_table->ft_index =
- fib_table->v4.index =
+ v4_fib->index =
(fib_table - ip4_main.fibs);
hash_set (ip4_main.fib_index_by_table_id, table_id, fib_table->ft_index);
fib_table->ft_table_id =
- fib_table->v4.table_id =
+ v4_fib->table_id =
table_id;
fib_table->ft_flow_hash_config =
- fib_table->v4.flow_hash_config =
+ v4_fib->flow_hash_config =
IP_FLOW_HASH_DEFAULT;
- fib_table->v4.fwd_classify_table_index = ~0;
- fib_table->v4.rev_classify_table_index = ~0;
+ v4_fib->fwd_classify_table_index = ~0;
+ v4_fib->rev_classify_table_index = ~0;
fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4);
- ip4_mtrie_init(&fib_table->v4.mtrie);
+ ip4_mtrie_init(&v4_fib->mtrie);
/*
* add the special entries into the new FIB
@@ -151,9 +157,10 @@
}
void
-ip4_fib_table_destroy (ip4_fib_t *fib)
+ip4_fib_table_destroy (u32 fib_index)
{
- fib_table_t *fib_table = (fib_table_t*)fib;
+ fib_table_t *fib_table = pool_elt_at_index(ip4_main.fibs, fib_index);
+ ip4_fib_t *v4_fib = pool_elt_at_index(ip4_main.v4_fibs, fib_index);
int ii;
/*
@@ -185,6 +192,10 @@
{
hash_unset (ip4_main.fib_index_by_table_id, fib_table->ft_table_id);
}
+
+ ip4_mtrie_free(&v4_fib->mtrie);
+
+ pool_put(ip4_main.v4_fibs, v4_fib);
pool_put(ip4_main.fibs, fib_table);
}
@@ -367,16 +378,33 @@
u32 len,
const dpo_id_t *dpo)
{
- ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 0); // ADD
+ ip4_fib_mtrie_route_add(&fib->mtrie, addr, len, dpo->dpoi_index);
}
void
ip4_fib_table_fwding_dpo_remove (ip4_fib_t *fib,
const ip4_address_t *addr,
u32 len,
- const dpo_id_t *dpo)
+ const dpo_id_t *dpo,
+ u32 cover_index)
{
- ip4_fib_mtrie_add_del_route(fib, *addr, len, dpo->dpoi_index, 1); // DELETE
+ fib_prefix_t cover_prefix = {
+ .fp_len = 0,
+ };
+ const dpo_id_t *cover_dpo;
+
+ /*
+ * We need to pass the MTRIE the LB index and address length of the
+ * covering prefix, so it can fill the plys with the correct replacement
+ * for the entry being removed
+ */
+ fib_entry_get_prefix(cover_index, &cover_prefix);
+ cover_dpo = fib_entry_contribute_ip_forwarding(cover_index);
+
+ ip4_fib_mtrie_route_del(&fib->mtrie,
+ addr, len, dpo->dpoi_index,
+ cover_prefix.fp_len,
+ cover_dpo->dpoi_index);
}
void
@@ -498,7 +526,7 @@
pool_foreach (fib_table, im4->fibs,
({
- ip4_fib_t *fib = &fib_table->v4;
+ ip4_fib_t *fib = pool_elt_at_index(im4->v4_fibs, fib_table->ft_index);
if (table_id >= 0 && table_id != (int)fib->table_id)
continue;
@@ -523,6 +551,11 @@
}
continue;
}
+ if (mtrie)
+ {
+ vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie);
+ continue;
+ }
if (!matching)
{
@@ -532,9 +565,6 @@
{
ip4_fib_table_show_one(fib, vm, &matching_address, matching_mask);
}
-
- if (mtrie)
- vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie);
}));
return 0;
diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h
index 243fd77..4cf9e58 100644
--- a/src/vnet/fib/ip4_fib.h
+++ b/src/vnet/fib/ip4_fib.h
@@ -34,6 +34,33 @@
#include <vnet/ip/ip.h>
#include <vnet/fib/fib_entry.h>
#include <vnet/fib/fib_table.h>
+#include <vnet/ip/ip4_mtrie.h>
+
+typedef struct ip4_fib_t_
+{
+ /**
+ * Mtrie for fast lookups. Hash is used to maintain overlapping prefixes.
+ * First member so it's in the first cacheline.
+ */
+ ip4_fib_mtrie_t mtrie;
+
+ /* Hash table for each prefix length mapping. */
+ uword *fib_entry_by_dst_address[33];
+
+ /* Table ID (hash key) for this FIB. */
+ u32 table_id;
+
+ /* Index into FIB vector. */
+ u32 index;
+
+ /* flow hash configuration */
+ flow_hash_config_t flow_hash_config;
+
+ /* N-tuple classifier indices */
+ u32 fwd_classify_table_index;
+ u32 rev_classify_table_index;
+
+} ip4_fib_t;
extern fib_node_index_t ip4_fib_table_lookup(const ip4_fib_t *fib,
const ip4_address_t *addr,
@@ -50,7 +77,7 @@
const ip4_address_t *addr,
u32 len,
fib_node_index_t fib_entry_index);
-extern void ip4_fib_table_destroy(ip4_fib_t *fib);
+extern void ip4_fib_table_destroy(u32 fib_index);
extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib,
const ip4_address_t *addr,
@@ -60,7 +87,8 @@
extern void ip4_fib_table_fwding_dpo_remove(ip4_fib_t *fib,
const ip4_address_t *addr,
u32 len,
- const dpo_id_t *dpo);
+ const dpo_id_t *dpo,
+ fib_node_index_t cover_index);
extern u32 ip4_fib_table_lookup_lb (ip4_fib_t *fib,
const ip4_address_t * dst);
@@ -79,7 +107,7 @@
static inline ip4_fib_t *
ip4_fib_get (u32 index)
{
- return (&(pool_elt_at_index(ip4_main.fibs, index)->v4));
+ return (pool_elt_at_index(ip4_main.v4_fibs, index));
}
always_inline u32
@@ -134,7 +162,6 @@
mtrie = &ip4_fib_get(fib_index)->mtrie;
leaf = ip4_fib_mtrie_lookup_step_one (mtrie, addr);
- leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 1);
leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 2);
leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 3);
diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c
index 343ff55..0ee029d 100644
--- a/src/vnet/fib/ip6_fib.c
+++ b/src/vnet/fib/ip6_fib.c
@@ -55,22 +55,29 @@
create_fib_with_table_id (u32 table_id)
{
fib_table_t *fib_table;
+ ip6_fib_t *v6_fib;
pool_get_aligned(ip6_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES);
- memset(fib_table, 0, sizeof(*fib_table));
+ pool_get_aligned(ip6_main.v6_fibs, v6_fib, CLIB_CACHE_LINE_BYTES);
+ memset(fib_table, 0, sizeof(*fib_table));
+ memset(v6_fib, 0, sizeof(*v6_fib));
+
+ ASSERT((fib_table - ip6_main.fibs) ==
+ (v6_fib - ip6_main.v6_fibs));
+
fib_table->ft_proto = FIB_PROTOCOL_IP6;
fib_table->ft_index =
- fib_table->v6.index =
- (fib_table - ip6_main.fibs);
+ v6_fib->index =
+ (fib_table - ip6_main.fibs);
hash_set(ip6_main.fib_index_by_table_id, table_id, fib_table->ft_index);
fib_table->ft_table_id =
- fib_table->v6.table_id =
+ v6_fib->table_id =
table_id;
fib_table->ft_flow_hash_config =
- fib_table->v6.flow_hash_config =
+ v6_fib->flow_hash_config =
IP_FLOW_HASH_DEFAULT;
vnet_ip6_fib_init(fib_table->ft_index);
@@ -188,6 +195,7 @@
{
hash_unset (ip6_main.fib_index_by_table_id, fib_table->ft_table_id);
}
+ pool_put_index(ip6_main.v6_fibs, fib_table->ft_index);
pool_put(ip6_main.fibs, fib_table);
}
@@ -620,7 +628,7 @@
pool_foreach (fib_table, im6->fibs,
({
- fib = &(fib_table->v6);
+ fib = pool_elt_at_index(im6->v6_fibs, fib_table->ft_index);
if (table_id >= 0 && table_id != (int)fib->table_id)
continue;
if (fib_index != ~0 && fib_index != (int)fib->index)
diff --git a/src/vnet/fib/ip6_fib.h b/src/vnet/fib/ip6_fib.h
index af864a7..e2f2845 100644
--- a/src/vnet/fib/ip6_fib.h
+++ b/src/vnet/fib/ip6_fib.h
@@ -115,7 +115,7 @@
ip6_fib_get (fib_node_index_t index)
{
ASSERT(!pool_is_free_index(ip6_main.fibs, index));
- return (&pool_elt_at_index (ip6_main.fibs, index)->v6);
+ return (pool_elt_at_index (ip6_main.v6_fibs, index));
}
static inline
diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c
index 5cd0fd2..4b2b76e 100644
--- a/src/vnet/fib/mpls_fib.c
+++ b/src/vnet/fib/mpls_fib.c
@@ -97,11 +97,15 @@
int i;
pool_get_aligned(mpls_main.fibs, fib_table, CLIB_CACHE_LINE_BYTES);
+ pool_get_aligned(mpls_main.mpls_fibs, mf, CLIB_CACHE_LINE_BYTES);
+
+ ASSERT((fib_table - mpls_main.fibs) ==
+ (mf - mpls_main.mpls_fibs));
+
memset(fib_table, 0, sizeof(*fib_table));
fib_table->ft_proto = FIB_PROTOCOL_MPLS;
- fib_table->ft_index =
- (fib_table - mpls_main.fibs);
+ fib_table->ft_index = (fib_table - mpls_main.fibs);
hash_set (mpls_main.fib_index_by_table_id, table_id, fib_table->ft_index);
@@ -109,8 +113,6 @@
table_id;
fib_table->ft_flow_hash_config =
MPLS_FLOW_HASH_DEFAULT;
- fib_table->v4.fwd_classify_table_index = ~0;
- fib_table->v4.rev_classify_table_index = ~0;
fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_MPLS);
@@ -122,7 +124,6 @@
drop_dpo_get(DPO_PROTO_MPLS));
}
- mf = &fib_table->mpls;
mf->mf_entries = hash_create(0, sizeof(fib_node_index_t));
for (i = 0; i < MPLS_FIB_DB_SIZE; i++)
{
@@ -241,9 +242,10 @@
}
void
-mpls_fib_table_destroy (mpls_fib_t *mf)
+mpls_fib_table_destroy (u32 fib_index)
{
- fib_table_t *fib_table = (fib_table_t*)mf;
+ fib_table_t *fib_table = pool_elt_at_index(mpls_main.fibs, fib_index);
+ mpls_fib_t *mf = pool_elt_at_index(mpls_main.mpls_fibs, fib_index);
fib_prefix_t prefix = {
.fp_proto = FIB_PROTOCOL_MPLS,
};
@@ -274,6 +276,7 @@
}
hash_free(mf->mf_entries);
+ pool_put(mpls_main.mpls_fibs, mf);
pool_put(mpls_main.fibs, fib_table);
}
@@ -436,11 +439,11 @@
if (MPLS_LABEL_INVALID == label)
{
- mpls_fib_table_show_all(&(fib_table->mpls), vm);
+ mpls_fib_table_show_all(mpls_fib_get(fib_table->ft_index), vm);
}
else
{
- mpls_fib_table_show_one(&(fib_table->mpls), label, vm);
+ mpls_fib_table_show_one(mpls_fib_get(fib_table->ft_index), label, vm);
}
}));
diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h
index 779deca..78a61a1 100644
--- a/src/vnet/fib/mpls_fib.h
+++ b/src/vnet/fib/mpls_fib.h
@@ -25,10 +25,33 @@
#include <vnet/mpls/mpls.h>
#include <vnet/fib/fib_table.h>
+#define MPLS_FIB_DEFAULT_TABLE_ID 0
+
+/**
+ * Type exposure is to allow the DP fast/inlined access
+ */
+#define MPLS_FIB_KEY_SIZE 21
+#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1))
+
+typedef struct mpls_fib_t_
+{
+ /**
+ * A hash table of entries. 21 bit key
+ * Hash table for reduced memory footprint
+ */
+ uword * mf_entries;
+
+ /**
+ * The load-balance indices keyed by 21 bit label+eos bit.
+ * A flat array for maximum lookup performace.
+ */
+ index_t mf_lbs[MPLS_FIB_DB_SIZE];
+} mpls_fib_t;
+
static inline mpls_fib_t*
mpls_fib_get (fib_node_index_t index)
{
- return (&(pool_elt_at_index(mpls_main.fibs, index)->mpls));
+ return (pool_elt_at_index(mpls_main.mpls_fibs, index));
}
extern u32 mpls_fib_table_find_or_create_and_lock(u32 table_id);
@@ -56,8 +79,7 @@
mpls_label_t label,
mpls_eos_bit_t eos,
fib_node_index_t fei);
-extern void mpls_fib_table_destroy(mpls_fib_t *mf);
-
+extern void mpls_fib_table_destroy(u32 fib_index);
extern void mpls_fib_forwarding_table_update(mpls_fib_t *mf,