ip: [re]introduce the 8-8-8-8 stride MTRIE
Type: improvement
there's a time-space trade-off between the 16-8-8 and 8-8-8-8 stride.
FIB continues to use the 16-8-8. Other features are now free to make the
choice.
Signed-off-by: Neale Ranns <neale@graphiant.com>
Change-Id: I6691a163486ce62e75e629f6ef0c990f253df8e5
diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h
index 1f0c168..46bfcaf 100644
--- a/src/vnet/fib/ip4_fib.h
+++ b/src/vnet/fib/ip4_fib.h
@@ -166,8 +166,8 @@
mtrie = &ip4_fib_get(fib_index)->mtrie;
leaf = ip4_mtrie_16_lookup_step_one (mtrie, addr);
- leaf = ip4_mtrie_16_lookup_step (mtrie, leaf, addr, 2);
- leaf = ip4_mtrie_16_lookup_step (mtrie, leaf, addr, 3);
+ leaf = ip4_mtrie_16_lookup_step (leaf, addr, 2);
+ leaf = ip4_mtrie_16_lookup_step (leaf, addr, 3);
return (ip4_mtrie_leaf_get_adj_index(leaf));
}
@@ -188,10 +188,10 @@
leaf[0] = ip4_mtrie_16_lookup_step_one (mtrie[0], addr0);
leaf[1] = ip4_mtrie_16_lookup_step_one (mtrie[1], addr1);
- leaf[0] = ip4_mtrie_16_lookup_step (mtrie[0], leaf[0], addr0, 2);
- leaf[1] = ip4_mtrie_16_lookup_step (mtrie[1], leaf[1], addr1, 2);
- leaf[0] = ip4_mtrie_16_lookup_step (mtrie[0], leaf[0], addr0, 3);
- leaf[1] = ip4_mtrie_16_lookup_step (mtrie[1], leaf[1], addr1, 3);
+ leaf[0] = ip4_mtrie_16_lookup_step (leaf[0], addr0, 2);
+ leaf[1] = ip4_mtrie_16_lookup_step (leaf[1], addr1, 2);
+ leaf[0] = ip4_mtrie_16_lookup_step (leaf[0], addr0, 3);
+ leaf[1] = ip4_mtrie_16_lookup_step (leaf[1], addr1, 3);
*lb0 = ip4_mtrie_leaf_get_adj_index(leaf[0]);
*lb1 = ip4_mtrie_leaf_get_adj_index(leaf[1]);
@@ -224,15 +224,15 @@
leaf[2] = ip4_mtrie_16_lookup_step_one (mtrie[2], addr2);
leaf[3] = ip4_mtrie_16_lookup_step_one (mtrie[3], addr3);
- leaf[0] = ip4_mtrie_16_lookup_step (mtrie[0], leaf[0], addr0, 2);
- leaf[1] = ip4_mtrie_16_lookup_step (mtrie[1], leaf[1], addr1, 2);
- leaf[2] = ip4_mtrie_16_lookup_step (mtrie[2], leaf[2], addr2, 2);
- leaf[3] = ip4_mtrie_16_lookup_step (mtrie[3], leaf[3], addr3, 2);
+ leaf[0] = ip4_mtrie_16_lookup_step (leaf[0], addr0, 2);
+ leaf[1] = ip4_mtrie_16_lookup_step (leaf[1], addr1, 2);
+ leaf[2] = ip4_mtrie_16_lookup_step (leaf[2], addr2, 2);
+ leaf[3] = ip4_mtrie_16_lookup_step (leaf[3], addr3, 2);
- leaf[0] = ip4_mtrie_16_lookup_step (mtrie[0], leaf[0], addr0, 3);
- leaf[1] = ip4_mtrie_16_lookup_step (mtrie[1], leaf[1], addr1, 3);
- leaf[2] = ip4_mtrie_16_lookup_step (mtrie[2], leaf[2], addr2, 3);
- leaf[3] = ip4_mtrie_16_lookup_step (mtrie[3], leaf[3], addr3, 3);
+ leaf[0] = ip4_mtrie_16_lookup_step (leaf[0], addr0, 3);
+ leaf[1] = ip4_mtrie_16_lookup_step (leaf[1], addr1, 3);
+ leaf[2] = ip4_mtrie_16_lookup_step (leaf[2], addr2, 3);
+ leaf[3] = ip4_mtrie_16_lookup_step (leaf[3], addr3, 3);
*lb0 = ip4_mtrie_leaf_get_adj_index(leaf[0]);
*lb1 = ip4_mtrie_leaf_get_adj_index(leaf[1]);
diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c
index 7eaac59..0f4c47f 100644
--- a/src/vnet/ip/ip4_mtrie.c
+++ b/src/vnet/ip/ip4_mtrie.c
@@ -170,8 +170,7 @@
}
static ip4_mtrie_leaf_t
-ply_create (ip4_mtrie_16_t *m, ip4_mtrie_leaf_t init_leaf, u32 leaf_prefix_len,
- u32 ply_base_len)
+ply_create (ip4_mtrie_leaf_t init_leaf, u32 leaf_prefix_len, u32 ply_base_len)
{
ip4_mtrie_8_ply_t *p;
/* Get cache aligned ply. */
@@ -183,7 +182,7 @@
}
always_inline ip4_mtrie_8_ply_t *
-get_next_ply_for_leaf (ip4_mtrie_16_t *m, ip4_mtrie_leaf_t l)
+get_next_ply_for_leaf (ip4_mtrie_leaf_t l)
{
uword n = ip4_mtrie_leaf_get_next_ply_index (l);
@@ -212,6 +211,37 @@
ply_16_init (&m->root_ply, IP4_MTRIE_LEAF_EMPTY, 0);
}
+void
+ip4_mtrie_8_free (ip4_mtrie_8_t *m)
+{
+ /* the root ply is embedded so there is nothing to do,
+ * the assumption being that the IP4 FIB table has emptied the trie
+ * before deletion.
+ */
+ ip4_mtrie_8_ply_t *root = pool_elt_at_index (ip4_ply_pool, m->root_ply);
+
+#if CLIB_DEBUG > 0
+ int i;
+ for (i = 0; i < ARRAY_LEN (root->leaves); i++)
+ {
+ ASSERT (!ip4_mtrie_leaf_is_next_ply (root->leaves[i]));
+ }
+#endif
+
+ pool_put (ip4_ply_pool, root);
+}
+
+void
+ip4_mtrie_8_init (ip4_mtrie_8_t *m)
+{
+ ip4_mtrie_8_ply_t *root;
+
+ pool_get (ip4_ply_pool, root);
+ m->root_ply = root - ip4_ply_pool;
+
+ ply_8_init (root, IP4_MTRIE_LEAF_EMPTY, 0, 0);
+}
+
typedef struct
{
ip4_address_t dst_address;
@@ -222,7 +252,7 @@
} ip4_mtrie_set_unset_leaf_args_t;
static void
-set_ply_with_more_specific_leaf (ip4_mtrie_16_t *m, ip4_mtrie_8_ply_t *ply,
+set_ply_with_more_specific_leaf (ip4_mtrie_8_ply_t *ply,
ip4_mtrie_leaf_t new_leaf,
uword new_leaf_dst_address_bits)
{
@@ -238,8 +268,8 @@
/* Recurse into sub plies. */
if (!ip4_mtrie_leaf_is_terminal (old_leaf))
{
- ip4_mtrie_8_ply_t *sub_ply = get_next_ply_for_leaf (m, old_leaf);
- set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
+ ip4_mtrie_8_ply_t *sub_ply = get_next_ply_for_leaf (old_leaf);
+ set_ply_with_more_specific_leaf (sub_ply, new_leaf,
new_leaf_dst_address_bits);
}
@@ -255,8 +285,8 @@
}
static void
-set_leaf (ip4_mtrie_16_t *m, const ip4_mtrie_set_unset_leaf_args_t *a,
- u32 old_ply_index, u32 dst_address_byte_index)
+set_leaf (const ip4_mtrie_set_unset_leaf_args_t *a, u32 old_ply_index,
+ u32 dst_address_byte_index)
{
ip4_mtrie_leaf_t old_leaf, new_leaf;
i32 n_dst_bits_next_plies;
@@ -321,8 +351,8 @@
{
/* Existing leaf points to another ply. We need to place
* new_leaf into all more specific slots. */
- new_ply = get_next_ply_for_leaf (m, old_leaf);
- set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
+ new_ply = get_next_ply_for_leaf (old_leaf);
+ set_ply_with_more_specific_leaf (new_ply, new_leaf,
a->dst_address_length);
}
}
@@ -330,9 +360,8 @@
{
/* The current leaf is less specific and not termial (i.e. a ply),
* recurse on down the trie */
- new_ply = get_next_ply_for_leaf (m, old_leaf);
- set_leaf (m, a, new_ply - ip4_ply_pool,
- dst_address_byte_index + 1);
+ new_ply = get_next_ply_for_leaf (old_leaf);
+ set_leaf (a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
}
/*
* else
@@ -358,11 +387,10 @@
old_ply->n_non_empty_leafs -=
ip4_mtrie_leaf_is_non_empty (old_ply, dst_byte);
- new_leaf =
- ply_create (m, old_leaf,
- old_ply->dst_address_bits_of_leaves[dst_byte],
- ply_base_len);
- new_ply = get_next_ply_for_leaf (m, new_leaf);
+ new_leaf = ply_create (old_leaf,
+ old_ply->dst_address_bits_of_leaves[dst_byte],
+ ply_base_len);
+ new_ply = get_next_ply_for_leaf (new_leaf);
/* Refetch since ply_create may move pool. */
old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
@@ -375,9 +403,9 @@
ASSERT (old_ply->n_non_empty_leafs >= 0);
}
else
- new_ply = get_next_ply_for_leaf (m, old_leaf);
+ new_ply = get_next_ply_for_leaf (old_leaf);
- set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
+ set_leaf (a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
}
}
@@ -443,8 +471,8 @@
{
/* Existing leaf points to another ply. We need to place
* new_leaf into all more specific slots. */
- new_ply = get_next_ply_for_leaf (m, old_leaf);
- set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
+ new_ply = get_next_ply_for_leaf (old_leaf);
+ set_ply_with_more_specific_leaf (new_ply, new_leaf,
a->dst_address_length);
}
}
@@ -452,8 +480,8 @@
{
/* The current leaf is less specific and not termial (i.e. a ply),
* recurse on down the trie */
- new_ply = get_next_ply_for_leaf (m, old_leaf);
- set_leaf (m, a, new_ply - ip4_ply_pool, 2);
+ new_ply = get_next_ply_for_leaf (old_leaf);
+ set_leaf (a, new_ply - ip4_ply_pool, 2);
}
/*
* else
@@ -476,24 +504,23 @@
if (ip4_mtrie_leaf_is_terminal (old_leaf))
{
/* There is a leaf occupying the slot. Replace it with a new ply */
- new_leaf =
- ply_create (m, old_leaf,
- old_ply->dst_address_bits_of_leaves[dst_byte],
- ply_base_len);
- new_ply = get_next_ply_for_leaf (m, new_leaf);
+ new_leaf = ply_create (old_leaf,
+ old_ply->dst_address_bits_of_leaves[dst_byte],
+ ply_base_len);
+ new_ply = get_next_ply_for_leaf (new_leaf);
clib_atomic_store_rel_n (&old_ply->leaves[dst_byte], new_leaf);
old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
}
else
- new_ply = get_next_ply_for_leaf (m, old_leaf);
+ new_ply = get_next_ply_for_leaf (old_leaf);
- set_leaf (m, a, new_ply - ip4_ply_pool, 2);
+ set_leaf (a, new_ply - ip4_ply_pool, 2);
}
}
static uword
-unset_leaf (ip4_mtrie_16_t *m, const ip4_mtrie_set_unset_leaf_args_t *a,
+unset_leaf (const ip4_mtrie_set_unset_leaf_args_t *a,
ip4_mtrie_8_ply_t *old_ply, u32 dst_address_byte_index)
{
ip4_mtrie_leaf_t old_leaf, del_leaf;
@@ -522,10 +549,10 @@
old_leaf = old_ply->leaves[i];
old_leaf_is_terminal = ip4_mtrie_leaf_is_terminal (old_leaf);
- if (old_leaf == del_leaf
- || (!old_leaf_is_terminal
- && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf),
- dst_address_byte_index + 1)))
+ if (old_leaf == del_leaf ||
+ (!old_leaf_is_terminal &&
+ unset_leaf (a, get_next_ply_for_leaf (old_leaf),
+ dst_address_byte_index + 1)))
{
old_ply->n_non_empty_leafs -=
ip4_mtrie_leaf_is_non_empty (old_ply, i);
@@ -597,9 +624,9 @@
old_leaf = old_ply->leaves[slot];
old_leaf_is_terminal = ip4_mtrie_leaf_is_terminal (old_leaf);
- if (old_leaf == del_leaf
- || (!old_leaf_is_terminal
- && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2)))
+ if (old_leaf == del_leaf ||
+ (!old_leaf_is_terminal &&
+ unset_leaf (a, get_next_ply_for_leaf (old_leaf), 2)))
{
clib_atomic_store_rel_n (
&old_ply->leaves[slot],
@@ -626,6 +653,24 @@
}
void
+ip4_mtrie_8_route_add (ip4_mtrie_8_t *m, const ip4_address_t *dst_address,
+ u32 dst_address_length, u32 adj_index)
+{
+ ip4_mtrie_set_unset_leaf_args_t a;
+ ip4_main_t *im = &ip4_main;
+
+ /* Honor dst_address_length. Fib masks are in network byte order */
+ a.dst_address.as_u32 =
+ (dst_address->as_u32 & im->fib_masks[dst_address_length]);
+ a.dst_address_length = dst_address_length;
+ a.adj_index = adj_index;
+
+ ip4_mtrie_8_ply_t *root = pool_elt_at_index (ip4_ply_pool, m->root_ply);
+
+ set_leaf (&a, root - ip4_ply_pool, 0);
+}
+
+void
ip4_mtrie_16_route_del (ip4_mtrie_16_t *m, const ip4_address_t *dst_address,
u32 dst_address_length, u32 adj_index,
u32 cover_address_length, u32 cover_adj_index)
@@ -645,9 +690,32 @@
unset_root_leaf (m, &a);
}
+void
+ip4_mtrie_8_route_del (ip4_mtrie_8_t *m, const ip4_address_t *dst_address,
+ u32 dst_address_length, u32 adj_index,
+ u32 cover_address_length, u32 cover_adj_index)
+{
+ ip4_main_t *im = &ip4_main;
+
+ /* Honor dst_address_length. Fib masks are in network byte order */
+ ip4_mtrie_set_unset_leaf_args_t a = {
+ .dst_address.as_u32 =
+ (dst_address->as_u32 & im->fib_masks[dst_address_length]),
+ .dst_address_length = dst_address_length,
+ .adj_index = adj_index,
+ .cover_adj_index = cover_adj_index,
+ .cover_address_length = cover_address_length,
+ };
+
+ /* the top level ply is never removed */
+ ip4_mtrie_8_ply_t *root = pool_elt_at_index (ip4_ply_pool, m->root_ply);
+
+ unset_leaf (&a, root, 0);
+}
+
/* Returns number of bytes of memory used by mtrie. */
static uword
-mtrie_ply_memory_usage (ip4_mtrie_16_t *m, ip4_mtrie_8_ply_t *p)
+mtrie_ply_memory_usage (ip4_mtrie_8_ply_t *p)
{
uword bytes, i;
@@ -656,7 +724,7 @@
{
ip4_mtrie_leaf_t l = p->leaves[i];
if (ip4_mtrie_leaf_is_next_ply (l))
- bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
+ bytes += mtrie_ply_memory_usage (get_next_ply_for_leaf (l));
}
return bytes;
@@ -673,7 +741,23 @@
{
ip4_mtrie_leaf_t l = m->root_ply.leaves[i];
if (ip4_mtrie_leaf_is_next_ply (l))
- bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
+ bytes += mtrie_ply_memory_usage (get_next_ply_for_leaf (l));
+ }
+
+ return bytes;
+}
+uword
+ip4_mtrie_8_memory_usage (ip4_mtrie_8_t *m)
+{
+ ip4_mtrie_8_ply_t *root = pool_elt_at_index (ip4_ply_pool, m->root_ply);
+ uword bytes, i;
+
+ bytes = sizeof (*m);
+ for (i = 0; i < ARRAY_LEN (root->leaves); i++)
+ {
+ ip4_mtrie_leaf_t l = root->leaves[i];
+ if (ip4_mtrie_leaf_is_next_ply (l))
+ bytes += mtrie_ply_memory_usage (get_next_ply_for_leaf (l));
}
return bytes;
@@ -695,7 +779,7 @@
({ \
u32 a, ia_length; \
ip4_address_t ia; \
- ip4_mtrie_leaf_t _l = p->leaves[(_i)]; \
+ ip4_mtrie_leaf_t _l = (_p)->leaves[(_i)]; \
\
a = (_base_address) + ((_a) << (32 - (_ply_max_len))); \
ia.as_u32 = clib_host_to_net_u32 (a); \
@@ -745,9 +829,9 @@
u32 base_address = 0;
int i;
- s = format (s, "%d plies, memory usage %U\n", pool_elts (ip4_ply_pool),
- format_memory_size, ip4_mtrie_16_memory_usage (m));
- s = format (s, "root-ply");
+ s =
+ format (s, "16-8-8: %d plies, memory usage %U\n", pool_elts (ip4_ply_pool),
+ format_memory_size, ip4_mtrie_16_memory_usage (m));
p = &m->root_ply;
if (verbose)
@@ -771,6 +855,37 @@
return s;
}
+u8 *
+format_ip4_mtrie_8 (u8 *s, va_list *va)
+{
+ ip4_mtrie_8_t *m = va_arg (*va, ip4_mtrie_8_t *);
+ int verbose = va_arg (*va, int);
+ ip4_mtrie_8_ply_t *root;
+ u32 base_address = 0;
+ u16 slot;
+
+ root = pool_elt_at_index (ip4_ply_pool, m->root_ply);
+
+ s = format (s, "8-8-8-8; %d plies, memory usage %U\n",
+ pool_elts (ip4_ply_pool), format_memory_size,
+ ip4_mtrie_8_memory_usage (m));
+
+ if (verbose)
+ {
+ s = format (s, "root-ply");
+
+ for (slot = 0; slot < ARRAY_LEN (root->leaves); slot++)
+ {
+ if (root->dst_address_bits_of_leaves[slot] > 0)
+ {
+ s = FORMAT_PLY (s, root, slot, slot, base_address, 8, 0);
+ }
+ }
+ }
+
+ return s;
+}
+
/** Default heap size for the IPv4 mtries */
#define IP4_FIB_DEFAULT_MTRIE_HEAP_SIZE (32<<20)
#ifndef MAP_HUGE_SHIFT
diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h
index 332e34a..ec417c9 100644
--- a/src/vnet/ip/ip4_mtrie.h
+++ b/src/vnet/ip/ip4_mtrie.h
@@ -122,7 +122,7 @@
"IP4 Mtrie ply cache line");
/**
- * @brief The mutiway-TRIE.
+ * @brief The mutiway-TRIE with a 16-8-8 stride.
* There is no data associated with the mtrie apart from the top PLY
*/
typedef struct
@@ -136,14 +136,26 @@
} ip4_mtrie_16_t;
/**
+ * @brief The mutiway-TRIE with a 8-8-8-8 stride.
+ * There is no data associated with the mtrie apart from the top PLY
+ */
+typedef struct
+{
+ /* pool index of the root ply */
+ u32 root_ply;
+} ip4_mtrie_8_t;
+
+/**
* @brief Initialise an mtrie
*/
void ip4_mtrie_16_init (ip4_mtrie_16_t *m);
+void ip4_mtrie_8_init (ip4_mtrie_8_t *m);
/**
- * @brief Free an mtrie, It must be emty when free'd
+ * @brief Free an mtrie, It must be empty when free'd
*/
void ip4_mtrie_16_free (ip4_mtrie_16_t *m);
+void ip4_mtrie_8_free (ip4_mtrie_8_t *m);
/**
* @brief Add a route/entry to the mtrie
@@ -151,6 +163,9 @@
void ip4_mtrie_16_route_add (ip4_mtrie_16_t *m,
const ip4_address_t *dst_address,
u32 dst_address_length, u32 adj_index);
+void ip4_mtrie_8_route_add (ip4_mtrie_8_t *m, const ip4_address_t *dst_address,
+ u32 dst_address_length, u32 adj_index);
+
/**
* @brief remove a route/entry to the mtrie
*/
@@ -158,16 +173,21 @@
const ip4_address_t *dst_address,
u32 dst_address_length, u32 adj_index,
u32 cover_address_length, u32 cover_adj_index);
+void ip4_mtrie_8_route_del (ip4_mtrie_8_t *m, const ip4_address_t *dst_address,
+ u32 dst_address_length, u32 adj_index,
+ u32 cover_address_length, u32 cover_adj_index);
/**
* @brief return the memory used by the table
*/
uword ip4_mtrie_16_memory_usage (ip4_mtrie_16_t *m);
+uword ip4_mtrie_8_memory_usage (ip4_mtrie_8_t *m);
/**
* @brief Format/display the contents of the mtrie
*/
format_function_t format_ip4_mtrie_16;
+format_function_t format_ip4_mtrie_8;
/**
* @brief A global pool of 8bit stride plys
@@ -197,8 +217,7 @@
* @brief Lookup step. Processes 1 byte of 4 byte ip4 address.
*/
always_inline ip4_mtrie_leaf_t
-ip4_mtrie_16_lookup_step (const ip4_mtrie_16_t *m,
- ip4_mtrie_leaf_t current_leaf,
+ip4_mtrie_16_lookup_step (ip4_mtrie_leaf_t current_leaf,
const ip4_address_t *dst_address,
u32 dst_address_byte_index)
{
@@ -229,6 +248,37 @@
return next_leaf;
}
+always_inline ip4_mtrie_leaf_t
+ip4_mtrie_8_lookup_step (ip4_mtrie_leaf_t current_leaf,
+ const ip4_address_t *dst_address,
+ u32 dst_address_byte_index)
+{
+ ip4_mtrie_8_ply_t *ply;
+
+ uword current_is_terminal = ip4_mtrie_leaf_is_terminal (current_leaf);
+
+ if (!current_is_terminal)
+ {
+ ply = ip4_ply_pool + (current_leaf >> 1);
+ return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]);
+ }
+
+ return current_leaf;
+}
+
+always_inline ip4_mtrie_leaf_t
+ip4_mtrie_8_lookup_step_one (const ip4_mtrie_8_t *m,
+ const ip4_address_t *dst_address)
+{
+ ip4_mtrie_leaf_t next_leaf;
+ ip4_mtrie_8_ply_t *ply;
+
+ ply = pool_elt_at_index (ip4_ply_pool, m->root_ply);
+ next_leaf = ply->leaves[dst_address->as_u8[0]];
+
+ return next_leaf;
+}
+
#endif /* included_ip_ip4_fib_h */
/*