MPLS performance improvments.
1 - Quad loop lookup and label imposition.
2 - optimise imposition for the 1 label case
3 - input gets TTL from header directly (no byte swap)
Change-Id: I59204c9e5d134b0df75d7afa43e360f946d1ffe7
Signed-off-by: Neale Ranns <nranns@cisco.com>
diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c
index bbdc966..be9b285 100644
--- a/src/vnet/dpo/mpls_label_dpo.c
+++ b/src/vnet/dpo/mpls_label_dpo.c
@@ -160,6 +160,33 @@
mpls_unicast_header_t hdr;
} mpls_label_imposition_trace_t;
+always_inline mpls_unicast_header_t *
+mpls_label_paint (vlib_buffer_t * b0,
+ mpls_label_dpo_t *mld0,
+ u8 ttl0)
+{
+ mpls_unicast_header_t *hdr0;
+
+ vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes));
+
+ hdr0 = vlib_buffer_get_current(b0);
+
+ if (PREDICT_TRUE(1 == mld0->mld_n_labels))
+ {
+ /* optimise for the common case of one label */
+ *hdr0 = mld0->mld_hdr[0];
+ }
+ else
+ {
+ clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes);
+ hdr0 = hdr0 + (mld0->mld_n_labels - 1);
+ }
+ /* fixup the TTL for the inner most label */
+ ((char*)hdr0)[3] = ttl0;
+
+ return (hdr0);
+}
+
always_inline uword
mpls_label_imposition_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -180,45 +207,59 @@
vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
- while (n_left_from >= 4 && n_left_to_next >= 2)
+ while (n_left_from >= 8 && n_left_to_next >= 4)
{
- mpls_unicast_header_t *hdr0, *hdr1;
- mpls_label_dpo_t *mld0, *mld1;
- u32 bi0, mldi0, bi1, mldi1;
- vlib_buffer_t * b0, *b1;
- u32 next0, next1;
- u8 ttl0, ttl1;
+ u32 bi0, mldi0, bi1, mldi1, bi2, mldi2, bi3, mldi3;
+ mpls_unicast_header_t *hdr0, *hdr1, *hdr2, *hdr3;
+ mpls_label_dpo_t *mld0, *mld1, *mld2, *mld3;
+ vlib_buffer_t * b0, *b1, * b2, *b3;
+ u32 next0, next1, next2, next3;
+ u8 ttl0, ttl1,ttl2, ttl3 ;
bi0 = to_next[0] = from[0];
bi1 = to_next[1] = from[1];
+ bi2 = to_next[2] = from[2];
+ bi3 = to_next[3] = from[3];
/* Prefetch next iteration. */
{
- vlib_buffer_t * p2, * p3;
+ vlib_buffer_t * p2, * p3, *p4, *p5;
p2 = vlib_get_buffer (vm, from[2]);
p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
vlib_prefetch_buffer_header (p2, STORE);
vlib_prefetch_buffer_header (p3, STORE);
+ vlib_prefetch_buffer_header (p4, STORE);
+ vlib_prefetch_buffer_header (p5, STORE);
CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE);
CLIB_PREFETCH (p3->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p4->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p5->data, sizeof (hdr0[0]), STORE);
}
- from += 2;
- to_next += 2;
- n_left_from -= 2;
- n_left_to_next -= 2;
+ from += 4;
+ to_next += 4;
+ n_left_from -= 4;
+ n_left_to_next -= 4;
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
/* dst lookup was done by ip4 lookup */
mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
mldi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+ mldi2 = vnet_buffer(b2)->ip.adj_index[VLIB_TX];
+ mldi3 = vnet_buffer(b3)->ip.adj_index[VLIB_TX];
mld0 = mpls_label_dpo_get(mldi0);
mld1 = mpls_label_dpo_get(mldi1);
+ mld2 = mpls_label_dpo_get(mldi2);
+ mld3 = mpls_label_dpo_get(mldi3);
if (payload_is_ip4)
{
@@ -227,23 +268,37 @@
*/
ip4_header_t * ip0 = vlib_buffer_get_current(b0);
ip4_header_t * ip1 = vlib_buffer_get_current(b1);
+ ip4_header_t * ip2 = vlib_buffer_get_current(b2);
+ ip4_header_t * ip3 = vlib_buffer_get_current(b3);
u32 checksum0;
u32 checksum1;
+ u32 checksum2;
+ u32 checksum3;
checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
+ checksum2 = ip2->checksum + clib_host_to_net_u16 (0x0100);
+ checksum3 = ip3->checksum + clib_host_to_net_u16 (0x0100);
checksum0 += checksum0 >= 0xffff;
checksum1 += checksum1 >= 0xffff;
+ checksum2 += checksum2 >= 0xffff;
+ checksum3 += checksum3 >= 0xffff;
ip0->checksum = checksum0;
ip1->checksum = checksum1;
+ ip2->checksum = checksum2;
+ ip3->checksum = checksum3;
ip0->ttl -= 1;
ip1->ttl -= 1;
+ ip2->ttl -= 1;
+ ip3->ttl -= 1;
ttl1 = ip1->ttl;
ttl0 = ip0->ttl;
+ ttl3 = ip3->ttl;
+ ttl2 = ip2->ttl;
}
else if (payload_is_ip6)
{
@@ -252,13 +307,18 @@
*/
ip6_header_t * ip0 = vlib_buffer_get_current(b0);
ip6_header_t * ip1 = vlib_buffer_get_current(b1);
-
+ ip6_header_t * ip2 = vlib_buffer_get_current(b2);
+ ip6_header_t * ip3 = vlib_buffer_get_current(b3);
ip0->hop_limit -= 1;
ip1->hop_limit -= 1;
+ ip2->hop_limit -= 1;
+ ip3->hop_limit -= 1;
ttl0 = ip0->hop_limit;
ttl1 = ip1->hop_limit;
+ ttl2 = ip2->hop_limit;
+ ttl3 = ip3->hop_limit;
}
else
{
@@ -294,30 +354,45 @@
{
ttl1 = 255;
}
+ if (PREDICT_TRUE(vnet_buffer(b2)->mpls.first))
+ {
+ ASSERT(2 != vnet_buffer (b2)->mpls.ttl);
+
+ ttl2 = vnet_buffer(b2)->mpls.ttl - 1;
+ }
+ else
+ {
+ ttl2 = 255;
+ }
+ if (PREDICT_TRUE(vnet_buffer(b3)->mpls.first))
+ {
+ ASSERT(1 != vnet_buffer (b3)->mpls.ttl);
+ ttl3 = vnet_buffer(b3)->mpls.ttl - 1;
+ }
+ else
+ {
+ ttl3 = 255;
+ }
}
vnet_buffer(b0)->mpls.first = 0;
vnet_buffer(b1)->mpls.first = 0;
+ vnet_buffer(b2)->mpls.first = 0;
+ vnet_buffer(b3)->mpls.first = 0;
/* Paint the MPLS header */
- vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes));
- vlib_buffer_advance(b1, -(mld1->mld_n_hdr_bytes));
-
- hdr0 = vlib_buffer_get_current(b0);
- hdr1 = vlib_buffer_get_current(b1);
-
- clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes);
- clib_memcpy(hdr1, mld1->mld_hdr, mld1->mld_n_hdr_bytes);
-
- /* fixup the TTL for the inner most label */
- hdr0 = hdr0 + (mld0->mld_n_labels - 1);
- hdr1 = hdr1 + (mld1->mld_n_labels - 1);
- ((char*)hdr0)[3] = ttl0;
- ((char*)hdr1)[3] = ttl1;
+ hdr0 = mpls_label_paint(b0, mld0, ttl0);
+ hdr1 = mpls_label_paint(b1, mld1, ttl1);
+ hdr2 = mpls_label_paint(b2, mld2, ttl2);
+ hdr3 = mpls_label_paint(b3, mld3, ttl3);
next0 = mld0->mld_dpo.dpoi_next_node;
next1 = mld1->mld_dpo.dpoi_next_node;
+ next2 = mld2->mld_dpo.dpoi_next_node;
+ next3 = mld3->mld_dpo.dpoi_next_node;
vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index;
vnet_buffer(b1)->ip.adj_index[VLIB_TX] = mld1->mld_dpo.dpoi_index;
+ vnet_buffer(b2)->ip.adj_index[VLIB_TX] = mld2->mld_dpo.dpoi_index;
+ vnet_buffer(b3)->ip.adj_index[VLIB_TX] = mld3->mld_dpo.dpoi_index;
if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -331,10 +406,23 @@
vlib_add_trace (vm, node, b1, sizeof (*tr));
tr->hdr = *hdr1;
}
+ if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_label_imposition_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof (*tr));
+ tr->hdr = *hdr2;
+ }
+ if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_label_imposition_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ tr->hdr = *hdr3;
+ }
- vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
+ vlib_validate_buffer_enqueue_x4(vm, node, next_index, to_next,
n_left_to_next,
- bi0, bi1, next0, next1);
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
}
while (n_left_from > 0 && n_left_to_next > 0)
diff --git a/src/vnet/dpo/mpls_label_dpo.h b/src/vnet/dpo/mpls_label_dpo.h
index 89bcb09..e23f3d2 100644
--- a/src/vnet/dpo/mpls_label_dpo.h
+++ b/src/vnet/dpo/mpls_label_dpo.h
@@ -61,8 +61,8 @@
* Should this get any bigger then we will need to reconsider how many labels
* can be pushed in one object.
*/
-_Static_assert((sizeof(mpls_label_dpo_t) <= CLIB_CACHE_LINE_BYTES),
- "MPLS label DPO is larger than one cache line.");
+STATIC_ASSERT((sizeof(mpls_label_dpo_t) <= CLIB_CACHE_LINE_BYTES),
+ "MPLS label DPO is larger than one cache line.");
/**
* @brief Create an MPLS label object
diff --git a/src/vnet/mpls/error.def b/src/vnet/mpls/error.def
index de8b966..34a4652 100644
--- a/src/vnet/mpls/error.def
+++ b/src/vnet/mpls/error.def
@@ -18,11 +18,11 @@
mpls_error (NONE, "no error")
mpls_error (UNKNOWN_PROTOCOL, "unknown protocol")
mpls_error (UNSUPPORTED_VERSION, "unsupported version")
-mpls_error (PKTS_DECAP, "MPLS-GRE input packets decapsulated")
-mpls_error (PKTS_ENCAP, "MPLS-GRE output packets encapsulated")
-mpls_error (NO_LABEL, "MPLS-GRE no label for fib/dst")
-mpls_error (TTL_EXPIRED, "MPLS-GRE ttl expired")
-mpls_error (S_NOT_SET, "MPLS-GRE s-bit not set")
+mpls_error (PKTS_DECAP, "MPLS input packets decapsulated")
+mpls_error (PKTS_ENCAP, "MPLS output packets encapsulated")
+mpls_error (NO_LABEL, "MPLS no label for fib/dst")
+mpls_error (TTL_EXPIRED, "MPLS ttl expired")
+mpls_error (S_NOT_SET, "MPLS s-bit not set")
mpls_error (BAD_LABEL, "invalid FIB id in label")
mpls_error (NOT_IP4, "non-ip4 packets dropped")
mpls_error (DISALLOWED_FIB, "disallowed FIB id")
diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c
index 7ae4aa0..482577b 100644
--- a/src/vnet/mpls/mpls.c
+++ b/src/vnet/mpls/mpls.c
@@ -161,6 +161,14 @@
&h_host);
}
+typedef struct {
+ u32 fib_index;
+ u32 entry_index;
+ u32 dest;
+ u32 s_bit;
+ u32 label;
+} show_mpls_fib_t;
+
int
mpls_dest_cmp(void * a1, void * a2)
{
diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h
index b6fdbce..300f2cf 100644
--- a/src/vnet/mpls/mpls.h
+++ b/src/vnet/mpls/mpls.h
@@ -86,16 +86,12 @@
extern clib_error_t * mpls_feature_init(vlib_main_t * vm);
-format_function_t format_mpls_protocol;
-format_function_t format_mpls_encap_index;
-
format_function_t format_mpls_eos_bit;
format_function_t format_mpls_unicast_header_net_byte_order;
format_function_t format_mpls_unicast_label;
format_function_t format_mpls_header;
extern vlib_node_registration_t mpls_input_node;
-extern vlib_node_registration_t mpls_policy_encap_node;
extern vlib_node_registration_t mpls_output_node;
extern vlib_node_registration_t mpls_midchain_node;
@@ -118,48 +114,6 @@
int mpls_fib_reset_labels (u32 fib_id);
-#define foreach_mpls_input_next \
-_(DROP, "error-drop") \
-_(LOOKUP, "mpls-lookup")
-
-typedef enum {
-#define _(s,n) MPLS_INPUT_NEXT_##s,
- foreach_mpls_input_next
-#undef _
- MPLS_INPUT_N_NEXT,
-} mpls_input_next_t;
-
-#define foreach_mpls_lookup_next \
-_(DROP, "error-drop") \
-_(IP4_INPUT, "ip4-input") \
-_(L2_OUTPUT, "l2-output")
-
-// FIXME remove.
-typedef enum {
-#define _(s,n) MPLS_LOOKUP_NEXT_##s,
- foreach_mpls_lookup_next
-#undef _
- MPLS_LOOKUP_N_NEXT,
-} mpls_lookup_next_t;
-
-#define foreach_mpls_output_next \
-_(DROP, "error-drop")
-
-typedef enum {
-#define _(s,n) MPLS_OUTPUT_NEXT_##s,
- foreach_mpls_output_next
-#undef _
- MPLS_OUTPUT_N_NEXT,
-} mpls_output_next_t;
-
-typedef struct {
- u32 fib_index;
- u32 entry_index;
- u32 dest;
- u32 s_bit;
- u32 label;
-} show_mpls_fib_t;
-
int
mpls_dest_cmp(void * a1, void * a2);
diff --git a/src/vnet/mpls/node.c b/src/vnet/mpls/mpls_input.c
similarity index 82%
rename from src/vnet/mpls/node.c
rename to src/vnet/mpls/mpls_input.c
index 5b407fa..893c451 100644
--- a/src/vnet/mpls/node.c
+++ b/src/vnet/mpls/mpls_input.c
@@ -22,9 +22,20 @@
typedef struct {
u32 next_index;
- u32 label_host_byte_order;
+ u32 label_net_byte_order;
} mpls_input_trace_t;
+#define foreach_mpls_input_next \
+_(DROP, "error-drop") \
+_(LOOKUP, "mpls-lookup")
+
+typedef enum {
+#define _(s,n) MPLS_INPUT_NEXT_##s,
+ foreach_mpls_input_next
+#undef _
+ MPLS_INPUT_N_NEXT,
+} mpls_input_next_t;
+
static u8 *
format_mpls_input_trace (u8 * s, va_list * args)
{
@@ -32,8 +43,9 @@
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
mpls_input_trace_t * t = va_arg (*args, mpls_input_trace_t *);
char * next_name;
-
+ u32 label;
next_name = "BUG!";
+ label = clib_net_to_host_u32(t->label_net_byte_order);
#define _(a,b) if (t->next_index == MPLS_INPUT_NEXT_##a) next_name = b;
foreach_mpls_input_next;
@@ -41,8 +53,8 @@
s = format (s, "MPLS: next %s[%d] label %d ttl %d",
next_name, t->next_index,
- vnet_mpls_uc_get_label(t->label_host_byte_order),
- vnet_mpls_uc_get_ttl(t->label_host_byte_order));
+ vnet_mpls_uc_get_label(label),
+ vnet_mpls_uc_get_ttl(label));
return s;
}
@@ -88,30 +100,29 @@
u32 n_left_to_next;
vlib_get_next_frame (vm, node, next_index,
- to_next, n_left_to_next);
+ to_next, n_left_to_next);
while (n_left_from >= 4 && n_left_to_next >= 2)
{
- u32 label0, bi0, next0, sw_if_index0;
- u32 label1, bi1, next1, sw_if_index1;
- mpls_unicast_header_t *h0, *h1;
+ u32 bi0, next0, sw_if_index0;
+ u32 bi1, next1, sw_if_index1;
vlib_buffer_t *b0, *b1;
+ char *h0, *h1;
/* Prefetch next iteration. */
{
- vlib_buffer_t * p2, * p3;
+ vlib_buffer_t * p2, * p3;
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
- vlib_prefetch_buffer_header (p2, STORE);
- vlib_prefetch_buffer_header (p3, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
- CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE);
- CLIB_PREFETCH (p3->data, sizeof (h1[0]), STORE);
+ CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (h1[0]), STORE);
}
-
bi0 = to_next[0] = from[0];
bi1 = to_next[1] = from[1];
@@ -129,62 +140,59 @@
sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
- label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl);
- label1 = clib_net_to_host_u32 (h1->label_exp_s_ttl);
-
/* TTL expired? */
- if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0))
- {
+ if (PREDICT_FALSE(h0[3] == 0))
+ {
next0 = MPLS_INPUT_NEXT_DROP;
b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
- }
+ }
else
- {
+ {
next0 = MPLS_INPUT_NEXT_LOOKUP;
- vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index0, &next0, b0);
+ vnet_feature_arc_start(mm->input_feature_arc_index,
+ sw_if_index0, &next0, b0);
vlib_increment_simple_counter (cm, cpu_index, sw_if_index0, 1);
- }
+ }
- if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label1) == 0))
- {
+ if (PREDICT_FALSE(h1[3] == 0))
+ {
next1 = MPLS_INPUT_NEXT_DROP;
b1->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
- }
+ }
else
- {
+ {
next1 = MPLS_INPUT_NEXT_LOOKUP;
- vnet_feature_arc_start(mm->input_feature_arc_index, sw_if_index1, &next1, b1);
+ vnet_feature_arc_start(mm->input_feature_arc_index,
+ sw_if_index1, &next1, b1);
vlib_increment_simple_counter (cm, cpu_index, sw_if_index1, 1);
- }
+ }
if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
- {
+ {
mpls_input_trace_t *tr = vlib_add_trace (vm, node,
b0, sizeof (*tr));
tr->next_index = next0;
- tr->label_host_byte_order = label0;
- }
+ tr->label_net_byte_order = *((u32*)h0);
+ }
if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
- {
+ {
mpls_input_trace_t *tr = vlib_add_trace (vm, node,
b1, sizeof (*tr));
tr->next_index = next1;
- tr->label_host_byte_order = label1;
- }
+ tr->label_net_byte_order = *((u32*)h1);
+ }
vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
to_next, n_left_to_next,
- bi0, bi1, next0, next1);
+ bi0, bi1,
+ next0, next1);
}
while (n_left_from > 0 && n_left_to_next > 0)
{
- u32 bi0;
+ u32 sw_if_index0, next0, bi0;
vlib_buffer_t * b0;
- mpls_unicast_header_t * h0;
- u32 label0;
- u32 next0 = 0;
- u32 sw_if_index0;
+ char * h0;
bi0 = from[0];
to_next[0] = bi0;
@@ -197,9 +205,8 @@
h0 = vlib_buffer_get_current (b0);
sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- label0 = clib_net_to_host_u32 (h0->label_exp_s_ttl);
/* TTL expired? */
- if (PREDICT_FALSE(vnet_mpls_uc_get_ttl (label0) == 0))
+ if (PREDICT_FALSE(h0[3] == 0))
{
next0 = MPLS_INPUT_NEXT_DROP;
b0->error = node->errors[MPLS_ERROR_TTL_EXPIRED];
@@ -216,7 +223,7 @@
mpls_input_trace_t *tr = vlib_add_trace (vm, node,
b0, sizeof (*tr));
tr->next_index = next0;
- tr->label_host_byte_order = label0;
+ tr->label_net_byte_order = *(u32*)h0;
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
index 2d34cbd..475bb20 100644
--- a/src/vnet/mpls/mpls_lookup.c
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -80,7 +80,7 @@
vlib_get_next_frame (vm, node, next_index,
to_next, n_left_to_next);
- while (n_left_from >= 4 && n_left_to_next >= 2)
+ while (n_left_from >= 8 && n_left_to_next >= 4)
{
u32 lbi0, next0, lfib_index0, bi0, hash_c0;
const mpls_unicast_header_t * h0;
@@ -92,46 +92,79 @@
const load_balance_t *lb1;
const dpo_id_t *dpo1;
vlib_buffer_t * b1;
+ u32 lbi2, next2, lfib_index2, bi2, hash_c2;
+ const mpls_unicast_header_t * h2;
+ const load_balance_t *lb2;
+ const dpo_id_t *dpo2;
+ vlib_buffer_t * b2;
+ u32 lbi3, next3, lfib_index3, bi3, hash_c3;
+ const mpls_unicast_header_t * h3;
+ const load_balance_t *lb3;
+ const dpo_id_t *dpo3;
+ vlib_buffer_t * b3;
/* Prefetch next iteration. */
{
- vlib_buffer_t * p2, * p3;
+ vlib_buffer_t * p2, * p3, *p4, *p5;
p2 = vlib_get_buffer (vm, from[2]);
p3 = vlib_get_buffer (vm, from[3]);
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
vlib_prefetch_buffer_header (p2, STORE);
vlib_prefetch_buffer_header (p3, STORE);
+ vlib_prefetch_buffer_header (p4, STORE);
+ vlib_prefetch_buffer_header (p5, STORE);
CLIB_PREFETCH (p2->data, sizeof (h0[0]), STORE);
CLIB_PREFETCH (p3->data, sizeof (h0[0]), STORE);
+ CLIB_PREFETCH (p4->data, sizeof (h0[0]), STORE);
+ CLIB_PREFETCH (p5->data, sizeof (h0[0]), STORE);
}
bi0 = to_next[0] = from[0];
bi1 = to_next[1] = from[1];
+ bi2 = to_next[2] = from[2];
+ bi3 = to_next[3] = from[3];
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
+ from += 4;
+ n_left_from -= 4;
+ to_next += 4;
+ n_left_to_next -= 4;
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
h0 = vlib_buffer_get_current (b0);
h1 = vlib_buffer_get_current (b1);
+ h2 = vlib_buffer_get_current (b2);
+ h3 = vlib_buffer_get_current (b3);
lfib_index0 = vec_elt(mm->fib_index_by_sw_if_index,
vnet_buffer(b0)->sw_if_index[VLIB_RX]);
lfib_index1 = vec_elt(mm->fib_index_by_sw_if_index,
vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+ lfib_index2 = vec_elt(mm->fib_index_by_sw_if_index,
+ vnet_buffer(b2)->sw_if_index[VLIB_RX]);
+ lfib_index3 = vec_elt(mm->fib_index_by_sw_if_index,
+ vnet_buffer(b3)->sw_if_index[VLIB_RX]);
lbi0 = mpls_fib_table_forwarding_lookup (lfib_index0, h0);
lbi1 = mpls_fib_table_forwarding_lookup (lfib_index1, h1);
+ lbi2 = mpls_fib_table_forwarding_lookup (lfib_index2, h2);
+ lbi3 = mpls_fib_table_forwarding_lookup (lfib_index3, h3);
+
lb0 = load_balance_get(lbi0);
lb1 = load_balance_get(lbi1);
+ lb2 = load_balance_get(lbi2);
+ lb3 = load_balance_get(lbi3);
hash_c0 = vnet_buffer(b0)->ip.flow_hash = 0;
hash_c1 = vnet_buffer(b1)->ip.flow_hash = 0;
+ hash_c2 = vnet_buffer(b2)->ip.flow_hash = 0;
+ hash_c3 = vnet_buffer(b3)->ip.flow_hash = 0;
if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
{
@@ -143,11 +176,25 @@
hash_c1 = vnet_buffer (b1)->ip.flow_hash =
mpls_compute_flow_hash(h1, lb1->lb_hash_config);
}
+ if (PREDICT_FALSE(lb2->lb_n_buckets > 1))
+ {
+ hash_c2 = vnet_buffer (b2)->ip.flow_hash =
+ mpls_compute_flow_hash(h2, lb2->lb_hash_config);
+ }
+ if (PREDICT_FALSE(lb3->lb_n_buckets > 1))
+ {
+ hash_c3 = vnet_buffer (b3)->ip.flow_hash =
+ mpls_compute_flow_hash(h3, lb3->lb_hash_config);
+ }
ASSERT (lb0->lb_n_buckets > 0);
ASSERT (is_pow2 (lb0->lb_n_buckets));
ASSERT (lb1->lb_n_buckets > 0);
ASSERT (is_pow2 (lb1->lb_n_buckets));
+ ASSERT (lb2->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb2->lb_n_buckets));
+ ASSERT (lb3->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb3->lb_n_buckets));
dpo0 = load_balance_get_bucket_i(lb0,
(hash_c0 &
@@ -155,12 +202,22 @@
dpo1 = load_balance_get_bucket_i(lb1,
(hash_c1 &
(lb1->lb_n_buckets_minus_1)));
+ dpo2 = load_balance_get_bucket_i(lb2,
+ (hash_c2 &
+ (lb2->lb_n_buckets_minus_1)));
+ dpo3 = load_balance_get_bucket_i(lb3,
+ (hash_c3 &
+ (lb3->lb_n_buckets_minus_1)));
next0 = dpo0->dpoi_next_node;
next1 = dpo1->dpoi_next_node;
+ next2 = dpo2->dpoi_next_node;
+ next3 = dpo3->dpoi_next_node;
vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+ vnet_buffer (b2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
+ vnet_buffer (b3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
vlib_increment_combined_counter
(cm, cpu_index, lbi0, 1,
@@ -168,6 +225,12 @@
vlib_increment_combined_counter
(cm, cpu_index, lbi1, 1,
vlib_buffer_length_in_chain (vm, b1));
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi2, 1,
+ vlib_buffer_length_in_chain (vm, b2));
+ vlib_increment_combined_counter
+ (cm, cpu_index, lbi3, 1,
+ vlib_buffer_length_in_chain (vm, b3));
/*
* before we pop the label copy th values we need to maintain.
@@ -181,12 +244,20 @@
vnet_buffer (b1)->mpls.ttl = ((char*)h1)[3];
vnet_buffer (b1)->mpls.exp = (((char*)h1)[2] & 0xe) >> 1;
vnet_buffer (b1)->mpls.first = 1;
+ vnet_buffer (b2)->mpls.ttl = ((char*)h2)[3];
+ vnet_buffer (b2)->mpls.exp = (((char*)h2)[2] & 0xe) >> 1;
+ vnet_buffer (b2)->mpls.first = 1;
+ vnet_buffer (b3)->mpls.ttl = ((char*)h3)[3];
+ vnet_buffer (b3)->mpls.exp = (((char*)h3)[2] & 0xe) >> 1;
+ vnet_buffer (b3)->mpls.first = 1;
/*
* pop the label that was just used in the lookup
*/
vlib_buffer_advance(b0, sizeof(*h0));
vlib_buffer_advance(b1, sizeof(*h1));
+ vlib_buffer_advance(b2, sizeof(*h2));
+ vlib_buffer_advance(b3, sizeof(*h3));
if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -210,9 +281,32 @@
tr->label_net_byte_order = h1->label_exp_s_ttl;
}
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b2, sizeof (*tr));
+ tr->next_index = next2;
+ tr->lb_index = lbi2;
+ tr->lfib_index = lfib_index2;
+ tr->hash = hash_c2;
+ tr->label_net_byte_order = h2->label_exp_s_ttl;
+ }
+
+ if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_lookup_trace_t *tr = vlib_add_trace (vm, node,
+ b3, sizeof (*tr));
+ tr->next_index = next3;
+ tr->lb_index = lbi3;
+ tr->lfib_index = lfib_index3;
+ tr->hash = hash_c3;
+ tr->label_net_byte_order = h3->label_exp_s_ttl;
+ }
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
to_next, n_left_to_next,
- bi0, bi1, next0, next1);
+ bi0, bi1, bi2, bi3,
+ next0, next1, next2, next3);
}
while (n_left_from > 0 && n_left_to_next > 0)
@@ -361,10 +455,9 @@
while (n_left_from >= 4 && n_left_to_next >= 2)
{
- mpls_lookup_next_t next0, next1;
const load_balance_t *lb0, *lb1;
vlib_buffer_t * p0, *p1;
- u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
+ u32 pi0, lbi0, hc0, pi1, lbi1, hc1, next0, next1;
const mpls_unicast_header_t *mpls0, *mpls1;
const dpo_id_t *dpo0, *dpo1;
@@ -465,10 +558,9 @@
while (n_left_from > 0 && n_left_to_next > 0)
{
- mpls_lookup_next_t next0;
const load_balance_t *lb0;
vlib_buffer_t * p0;
- u32 pi0, lbi0, hc0;
+ u32 pi0, lbi0, hc0, next0;
const mpls_unicast_header_t *mpls0;
const dpo_id_t *dpo0;
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
index cf35400..2d8bd0c 100644
--- a/src/vnet/mpls/mpls_output.c
+++ b/src/vnet/mpls/mpls_output.c
@@ -29,6 +29,16 @@
u8 packet_data[64 - 1*sizeof(u32)];
} mpls_output_trace_t;
+#define foreach_mpls_output_next \
+_(DROP, "error-drop")
+
+typedef enum {
+#define _(s,n) MPLS_OUTPUT_NEXT_##s,
+ foreach_mpls_output_next
+#undef _
+ MPLS_OUTPUT_N_NEXT,
+} mpls_output_next_t;
+
static u8 *
format_mpls_output_trace (u8 * s, va_list * args)
{