blob: fa5177ab9ea1109f1d7127a2e6e59bee714a2e59 [file] [log] [blame]
/*
* Copyright (c) 2016 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vnet/ip/ip.h>
#include <vnet/dpo/mpls_label_dpo.h>
#include <vnet/mpls/mpls.h>
#include <vnet/dpo/drop_dpo.h>
/*
* pool of all MPLS Label DPOs
*/
mpls_label_dpo_t *mpls_label_dpo_pool;
static mpls_label_dpo_t *
mpls_label_dpo_alloc (void)
{
mpls_label_dpo_t *mld;
pool_get_aligned(mpls_label_dpo_pool, mld, CLIB_CACHE_LINE_BYTES);
memset(mld, 0, sizeof(*mld));
dpo_reset(&mld->mld_dpo);
return (mld);
}
static index_t
mpls_label_dpo_get_index (mpls_label_dpo_t *mld)
{
return (mld - mpls_label_dpo_pool);
}
index_t
mpls_label_dpo_create (mpls_label_t *label_stack,
mpls_eos_bit_t eos,
u8 ttl,
u8 exp,
dpo_proto_t payload_proto,
const dpo_id_t *dpo)
{
mpls_label_dpo_t *mld;
u32 ii;
mld = mpls_label_dpo_alloc();
if (MPLS_LABEL_DPO_MAX_N_LABELS < vec_len(label_stack))
{
clib_warning("Label stack size exceeded");
dpo_stack(DPO_MPLS_LABEL,
mld->mld_payload_proto,
&mld->mld_dpo,
drop_dpo_get(DPO_PROTO_MPLS));
return (mpls_label_dpo_get_index(mld));
}
mld->mld_n_labels = vec_len(label_stack);
mld->mld_n_hdr_bytes = mld->mld_n_labels * sizeof(mld->mld_hdr[0]);
mld->mld_payload_proto = payload_proto;
/*
* construct label rewrite headers for each value value passed.
* get the header in network byte order since we will paint it
* on a packet in the data-plane
*/
for (ii = 0; ii < mld->mld_n_labels-1; ii++)
{
vnet_mpls_uc_set_label(&mld->mld_hdr[ii].label_exp_s_ttl, label_stack[ii]);
vnet_mpls_uc_set_ttl(&mld->mld_hdr[ii].label_exp_s_ttl, 255);
vnet_mpls_uc_set_exp(&mld->mld_hdr[ii].label_exp_s_ttl, 0);
vnet_mpls_uc_set_s(&mld->mld_hdr[ii].label_exp_s_ttl, MPLS_NON_EOS);
mld->mld_hdr[ii].label_exp_s_ttl =
clib_host_to_net_u32(mld->mld_hdr[ii].label_exp_s_ttl);
}
/*
* the inner most label
*/
ii = mld->mld_n_labels-1;
vnet_mpls_uc_set_label(&mld->mld_hdr[ii].label_exp_s_ttl, label_stack[ii]);
vnet_mpls_uc_set_ttl(&mld->mld_hdr[ii].label_exp_s_ttl, ttl);
vnet_mpls_uc_set_exp(&mld->mld_hdr[ii].label_exp_s_ttl, exp);
vnet_mpls_uc_set_s(&mld->mld_hdr[ii].label_exp_s_ttl, eos);
mld->mld_hdr[ii].label_exp_s_ttl =
clib_host_to_net_u32(mld->mld_hdr[ii].label_exp_s_ttl);
/*
* stack this label objct on its parent.
*/
dpo_stack(DPO_MPLS_LABEL,
mld->mld_payload_proto,
&mld->mld_dpo,
dpo);
return (mpls_label_dpo_get_index(mld));
}
u8*
format_mpls_label_dpo (u8 *s, va_list *args)
{
index_t index = va_arg (*args, index_t);
u32 indent = va_arg (*args, u32);
mpls_unicast_header_t hdr;
mpls_label_dpo_t *mld;
u32 ii;
s = format(s, "mpls-label:[%d]:", index);
if (pool_is_free_index(mpls_label_dpo_pool, index))
{
/*
* the packet trace can be printed after the DPO has been deleted
*/
return (s);
}
mld = mpls_label_dpo_get(index);
for (ii = 0; ii < mld->mld_n_labels; ii++)
{
hdr.label_exp_s_ttl =
clib_net_to_host_u32(mld->mld_hdr[ii].label_exp_s_ttl);
s = format(s, "%U", format_mpls_header, hdr);
}
s = format(s, "\n%U", format_white_space, indent);
s = format(s, "%U", format_dpo_id, &mld->mld_dpo, indent+2);
return (s);
}
static void
mpls_label_dpo_lock (dpo_id_t *dpo)
{
mpls_label_dpo_t *mld;
mld = mpls_label_dpo_get(dpo->dpoi_index);
mld->mld_locks++;
}
static void
mpls_label_dpo_unlock (dpo_id_t *dpo)
{
mpls_label_dpo_t *mld;
mld = mpls_label_dpo_get(dpo->dpoi_index);
mld->mld_locks--;
if (0 == mld->mld_locks)
{
dpo_reset(&mld->mld_dpo);
pool_put(mpls_label_dpo_pool, mld);
}
}
/**
* @brief A struct to hold tracing information for the MPLS label imposition
* node.
*/
typedef struct mpls_label_imposition_trace_t_
{
/**
* The MPLS header imposed
*/
mpls_unicast_header_t hdr;
} mpls_label_imposition_trace_t;
always_inline mpls_unicast_header_t *
mpls_label_paint (vlib_buffer_t * b0,
mpls_label_dpo_t *mld0,
u8 ttl0)
{
mpls_unicast_header_t *hdr0;
vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes));
hdr0 = vlib_buffer_get_current(b0);
if (1 == mld0->mld_n_labels)
{
/* optimise for the common case of one label */
*hdr0 = mld0->mld_hdr[0];
}
else
{
clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes);
hdr0 = hdr0 + (mld0->mld_n_labels - 1);
}
/* fixup the TTL for the inner most label */
((char*)hdr0)[3] = ttl0;
return (hdr0);
}
always_inline uword
mpls_label_imposition_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame,
u8 payload_is_ip4,
u8 payload_is_ip6,
u8 payload_is_ethernet)
{
u32 n_left_from, next_index, * from, * to_next;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
next_index = node->cached_next_index;
while (n_left_from > 0)
{
u32 n_left_to_next;
vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
while (n_left_from >= 8 && n_left_to_next >= 4)
{
u32 bi0, mldi0, bi1, mldi1, bi2, mldi2, bi3, mldi3;
mpls_unicast_header_t *hdr0, *hdr1, *hdr2, *hdr3;
mpls_label_dpo_t *mld0, *mld1, *mld2, *mld3;
vlib_buffer_t * b0, *b1, * b2, *b3;
u32 next0, next1, next2, next3;
u8 ttl0, ttl1,ttl2, ttl3 ;
bi0 = to_next[0] = from[0];
bi1 = to_next[1] = from[1];
bi2 = to_next[2] = from[2];
bi3 = to_next[3] = from[3];
/* Prefetch next iteration. */
{
vlib_buffer_t * p2, * p3, *p4, *p5;
p2 = vlib_get_buffer (vm, from[2]);
p3 = vlib_get_buffer (vm, from[3]);
p4 = vlib_get_buffer (vm, from[4]);
p5 = vlib_get_buffer (vm, from[5]);
vlib_prefetch_buffer_header (p2, STORE);
vlib_prefetch_buffer_header (p3, STORE);
vlib_prefetch_buffer_header (p4, STORE);
vlib_prefetch_buffer_header (p5, STORE);
CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE);
CLIB_PREFETCH (p3->data, sizeof (hdr0[0]), STORE);
CLIB_PREFETCH (p4->data, sizeof (hdr0[0]), STORE);
CLIB_PREFETCH (p5->data, sizeof (hdr0[0]), STORE);
}
from += 4;
to_next += 4;
n_left_from -= 4;
n_left_to_next -= 4;
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
b2 = vlib_get_buffer (vm, bi2);
b3 = vlib_get_buffer (vm, bi3);
/* dst lookup was done by ip4 lookup */
mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
mldi1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
mldi2 = vnet_buffer(b2)->ip.adj_index[VLIB_TX];
mldi3 = vnet_buffer(b3)->ip.adj_index[VLIB_TX];
mld0 = mpls_label_dpo_get(mldi0);
mld1 = mpls_label_dpo_get(mldi1);
mld2 = mpls_label_dpo_get(mldi2);
mld3 = mpls_label_dpo_get(mldi3);
if (payload_is_ip4)
{
/*
* decrement the TTL on ingress to the LSP
*/
ip4_header_t * ip0 = vlib_buffer_get_current(b0);
ip4_header_t * ip1 = vlib_buffer_get_current(b1);
ip4_header_t * ip2 = vlib_buffer_get_current(b2);
ip4_header_t * ip3 = vlib_buffer_get_current(b3);
u32 checksum0;
u32 checksum1;
u32 checksum2;
u32 checksum3;
checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
checksum2 = ip2->checksum + clib_host_to_net_u16 (0x0100);
checksum3 = ip3->checksum + clib_host_to_net_u16 (0x0100);
checksum0 += checksum0 >= 0xffff;
checksum1 += checksum1 >= 0xffff;
checksum2 += checksum2 >= 0xffff;
checksum3 += checksum3 >= 0xffff;
ip0->checksum = checksum0;
ip1->checksum = checksum1;
ip2->checksum = checksum2;
ip3->checksum = checksum3;
ip0->ttl -= 1;
ip1->ttl -= 1;
ip2->ttl -= 1;
ip3->ttl -= 1;
ttl1 = ip1->ttl;
ttl0 = ip0->ttl;
ttl3 = ip3->ttl;
ttl2 = ip2->ttl;
}
else if (payload_is_ip6)
{
/*
* decrement the TTL on ingress to the LSP
*/
ip6_header_t * ip0 = vlib_buffer_get_current(b0);
ip6_header_t * ip1 = vlib_buffer_get_current(b1);
ip6_header_t * ip2 = vlib_buffer_get_current(b2);
ip6_header_t * ip3 = vlib_buffer_get_current(b3);
ip0->hop_limit -= 1;
ip1->hop_limit -= 1;
ip2->hop_limit -= 1;
ip3->hop_limit -= 1;
ttl0 = ip0->hop_limit;
ttl1 = ip1->hop_limit;
ttl2 = ip2->hop_limit;
ttl3 = ip3->hop_limit;
}
else if (payload_is_ethernet)
{
/*
* nothing to chang ein the ethernet header
*/
ttl0 = ttl1 = ttl2 = ttl3 = 255;
}
else
{
/*
* else, the packet to be encapped is an MPLS packet
*/
if (PREDICT_TRUE(vnet_buffer(b0)->mpls.first))
{
/*
* The first label to be imposed on the packet. this is a label swap.
* in which case we stashed the TTL and EXP bits in the
* packet in the lookup node
*/
ASSERT(0 != vnet_buffer (b0)->mpls.ttl);
ttl0 = vnet_buffer(b0)->mpls.ttl - 1;
}
else
{
/*
* not the first label. implying we are recusring down a chain of
* output labels.
* Each layer is considered a new LSP - hence the TTL is reset.
*/
ttl0 = 255;
}
if (PREDICT_TRUE(vnet_buffer(b1)->mpls.first))
{
ASSERT(1 != vnet_buffer (b1)->mpls.ttl);
ttl1 = vnet_buffer(b1)->mpls.ttl - 1;
}
else
{
ttl1 = 255;
}
if (PREDICT_TRUE(vnet_buffer(b2)->mpls.first))
{
ASSERT(1 != vnet_buffer (b2)->mpls.ttl);
ttl2 = vnet_buffer(b2)->mpls.ttl - 1;
}
else
{
ttl2 = 255;
}
if (PREDICT_TRUE(vnet_buffer(b3)->mpls.first))
{
ASSERT(1 != vnet_buffer (b3)->mpls.ttl);
ttl3 = vnet_buffer(b3)->mpls.ttl - 1;
}
else
{
ttl3 = 255;
}
}
vnet_buffer(b0)->mpls.first = 0;
vnet_buffer(b1)->mpls.first = 0;
vnet_buffer(b2)->mpls.first = 0;
vnet_buffer(b3)->mpls.first = 0;
/* Paint the MPLS header */
hdr0 = mpls_label_paint(b0, mld0, ttl0);
hdr1 = mpls_label_paint(b1, mld1, ttl1);
hdr2 = mpls_label_paint(b2, mld2, ttl2);
hdr3 = mpls_label_paint(b3, mld3, ttl3);
next0 = mld0->mld_dpo.dpoi_next_node;
next1 = mld1->mld_dpo.dpoi_next_node;
next2 = mld2->mld_dpo.dpoi_next_node;
next3 = mld3->mld_dpo.dpoi_next_node;
vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index;
vnet_buffer(b1)->ip.adj_index[VLIB_TX] = mld1->mld_dpo.dpoi_index;
vnet_buffer(b2)->ip.adj_index[VLIB_TX] = mld2->mld_dpo.dpoi_index;
vnet_buffer(b3)->ip.adj_index[VLIB_TX] = mld3->mld_dpo.dpoi_index;
if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
{
mpls_label_imposition_trace_t *tr =
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->hdr = *hdr0;
}
if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
{
mpls_label_imposition_trace_t *tr =
vlib_add_trace (vm, node, b1, sizeof (*tr));
tr->hdr = *hdr1;
}
if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED))
{
mpls_label_imposition_trace_t *tr =
vlib_add_trace (vm, node, b2, sizeof (*tr));
tr->hdr = *hdr2;
}
if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED))
{
mpls_label_imposition_trace_t *tr =
vlib_add_trace (vm, node, b3, sizeof (*tr));
tr->hdr = *hdr3;
}
vlib_validate_buffer_enqueue_x4(vm, node, next_index, to_next,
n_left_to_next,
bi0, bi1, bi2, bi3,
next0, next1, next2, next3);
}
while (n_left_from > 0 && n_left_to_next > 0)
{
mpls_unicast_header_t *hdr0;
mpls_label_dpo_t *mld0;
vlib_buffer_t * b0;
u32 bi0, mldi0;
u32 next0;
u8 ttl;
bi0 = from[0];
to_next[0] = bi0;
from += 1;
to_next += 1;
n_left_from -= 1;
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
/* dst lookup was done by ip4 lookup */
mldi0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
mld0 = mpls_label_dpo_get(mldi0);
if (payload_is_ip4)
{
/*
* decrement the TTL on ingress to the LSP
*/
ip4_header_t * ip0 = vlib_buffer_get_current(b0);
u32 checksum0;
checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
checksum0 += checksum0 >= 0xffff;
ip0->checksum = checksum0;
ip0->ttl -= 1;
ttl = ip0->ttl;
}
else if (payload_is_ip6)
{
/*
* decrement the TTL on ingress to the LSP
*/
ip6_header_t * ip0 = vlib_buffer_get_current(b0);
ip0->hop_limit -= 1;
ttl = ip0->hop_limit;
}
else
{
/*
* else, the packet to be encapped is an MPLS packet
*/
if (vnet_buffer(b0)->mpls.first)
{
/*
* The first label to be imposed on the packet. this is a label swap.
* in which case we stashed the TTL and EXP bits in the
* packet in the lookup node
*/
ASSERT(0 != vnet_buffer (b0)->mpls.ttl);
ttl = vnet_buffer(b0)->mpls.ttl - 1;
}
else
{
/*
* not the first label. implying we are recusring down a chain of
* output labels.
* Each layer is considered a new LSP - hence the TTL is reset.
*/
ttl = 255;
}
}
vnet_buffer(b0)->mpls.first = 0;
/* Paint the MPLS header */
vlib_buffer_advance(b0, -(mld0->mld_n_hdr_bytes));
hdr0 = vlib_buffer_get_current(b0);
clib_memcpy(hdr0, mld0->mld_hdr, mld0->mld_n_hdr_bytes);
/* fixup the TTL for the inner most label */
hdr0 = hdr0 + (mld0->mld_n_labels - 1);
((char*)hdr0)[3] = ttl;
next0 = mld0->mld_dpo.dpoi_next_node;
vnet_buffer(b0)->ip.adj_index[VLIB_TX] = mld0->mld_dpo.dpoi_index;
if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
{
mpls_label_imposition_trace_t *tr =
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->hdr = *hdr0;
}
vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
return from_frame->n_vectors;
}
static u8 *
format_mpls_label_imposition_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
mpls_label_imposition_trace_t * t;
mpls_unicast_header_t hdr;
u32 indent;
t = va_arg (*args, mpls_label_imposition_trace_t *);
indent = format_get_indent (s);
hdr.label_exp_s_ttl = clib_net_to_host_u32(t->hdr.label_exp_s_ttl);
s = format (s, "%Umpls-header:%U",
format_white_space, indent,
format_mpls_header, hdr);
return (s);
}
static uword
mpls_label_imposition (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
return (mpls_label_imposition_inline(vm, node, frame, 0, 0, 0));
}
VLIB_REGISTER_NODE (mpls_label_imposition_node) = {
.function = mpls_label_imposition,
.name = "mpls-label-imposition",
.vector_size = sizeof (u32),
.format_trace = format_mpls_label_imposition_trace,
.n_next_nodes = 1,
.next_nodes = {
[0] = "mpls-drop",
}
};
VLIB_NODE_FUNCTION_MULTIARCH (mpls_label_imposition_node,
mpls_label_imposition)
static uword
ip4_mpls_label_imposition (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
return (mpls_label_imposition_inline(vm, node, frame, 1, 0, 0));
}
VLIB_REGISTER_NODE (ip4_mpls_label_imposition_node) = {
.function = ip4_mpls_label_imposition,
.name = "ip4-mpls-label-imposition",
.vector_size = sizeof (u32),
.format_trace = format_mpls_label_imposition_trace,
.n_next_nodes = 1,
.next_nodes = {
[0] = "ip4-drop",
}
};
VLIB_NODE_FUNCTION_MULTIARCH (ip4_mpls_label_imposition_node,
ip4_mpls_label_imposition)
static uword
ip6_mpls_label_imposition (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
return (mpls_label_imposition_inline(vm, node, frame, 0, 1, 0));
}
VLIB_REGISTER_NODE (ip6_mpls_label_imposition_node) = {
.function = ip6_mpls_label_imposition,
.name = "ip6-mpls-label-imposition",
.vector_size = sizeof (u32),
.format_trace = format_mpls_label_imposition_trace,
.n_next_nodes = 1,
.next_nodes = {
[0] = "ip6-drop",
}
};
VLIB_NODE_FUNCTION_MULTIARCH (ip6_mpls_label_imposition_node,
ip6_mpls_label_imposition)
static uword
ethernet_mpls_label_imposition (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
return (mpls_label_imposition_inline(vm, node, frame, 0, 0, 1));
}
VLIB_REGISTER_NODE (ethernet_mpls_label_imposition_node) = {
.function = ethernet_mpls_label_imposition,
.name = "ethernet-mpls-label-imposition",
.vector_size = sizeof (u32),
.format_trace = format_mpls_label_imposition_trace,
.n_next_nodes = 1,
.next_nodes = {
[0] = "error-drop",
}
};
VLIB_NODE_FUNCTION_MULTIARCH (ethernet_mpls_label_imposition_node,
ethernet_mpls_label_imposition)
static void
mpls_label_dpo_mem_show (void)
{
fib_show_memory_usage("MPLS label",
pool_elts(mpls_label_dpo_pool),
pool_len(mpls_label_dpo_pool),
sizeof(mpls_label_dpo_t));
}
const static dpo_vft_t mld_vft = {
.dv_lock = mpls_label_dpo_lock,
.dv_unlock = mpls_label_dpo_unlock,
.dv_format = format_mpls_label_dpo,
.dv_mem_show = mpls_label_dpo_mem_show,
};
const static char* const mpls_label_imp_ip4_nodes[] =
{
"ip4-mpls-label-imposition",
NULL,
};
const static char* const mpls_label_imp_ip6_nodes[] =
{
"ip6-mpls-label-imposition",
NULL,
};
const static char* const mpls_label_imp_mpls_nodes[] =
{
"mpls-label-imposition",
NULL,
};
const static char* const mpls_label_imp_ethernet_nodes[] =
{
"ethernet-mpls-label-imposition",
NULL,
};
const static char* const * const mpls_label_imp_nodes[DPO_PROTO_NUM] =
{
[DPO_PROTO_IP4] = mpls_label_imp_ip4_nodes,
[DPO_PROTO_IP6] = mpls_label_imp_ip6_nodes,
[DPO_PROTO_MPLS] = mpls_label_imp_mpls_nodes,
[DPO_PROTO_ETHERNET] = mpls_label_imp_ethernet_nodes,
};
void
mpls_label_dpo_module_init (void)
{
dpo_register(DPO_MPLS_LABEL, &mld_vft, mpls_label_imp_nodes);
}