blob: a9f334be7cfdb6e1c307bfb4e5775c18af4e2a28 [file] [log] [blame]
/*
* Copyright (c) 2016 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vnet/ip/lookup.h>
#include <vnet/dpo/replicate_dpo.h>
#include <vnet/dpo/drop_dpo.h>
#include <vnet/adj/adj.h>
#undef REP_DEBUG
#ifdef REP_DEBUG
#define REP_DBG(_rep, _fmt, _args...) \
{ \
u8* _tmp =NULL; \
clib_warning("rep:[%s]:" _fmt, \
replicate_format(replicate_get_index((_rep)), \
0, _tmp), \
##_args); \
vec_free(_tmp); \
}
#else
#define REP_DBG(_p, _fmt, _args...)
#endif
#define foreach_replicate_dpo_error \
_(BUFFER_ALLOCATION_FAILURE, "Buffer Allocation Failure")
typedef enum {
#define _(sym,str) REPLICATE_DPO_ERROR_##sym,
foreach_replicate_dpo_error
#undef _
REPLICATE_DPO_N_ERROR,
} replicate_dpo_error_t;
static char * replicate_dpo_error_strings[] = {
#define _(sym,string) string,
foreach_replicate_dpo_error
#undef _
};
/**
* Pool of all DPOs. It's not static so the DP can have fast access
*/
replicate_t *replicate_pool;
/**
* The one instance of replicate main
*/
replicate_main_t replicate_main;
static inline index_t
replicate_get_index (const replicate_t *rep)
{
return (rep - replicate_pool);
}
static inline dpo_id_t*
replicate_get_buckets (replicate_t *rep)
{
if (REP_HAS_INLINE_BUCKETS(rep))
{
return (rep->rep_buckets_inline);
}
else
{
return (rep->rep_buckets);
}
}
static replicate_t *
replicate_alloc_i (void)
{
replicate_t *rep;
pool_get_aligned(replicate_pool, rep, CLIB_CACHE_LINE_BYTES);
memset(rep, 0, sizeof(*rep));
vlib_validate_combined_counter(&(replicate_main.repm_counters),
replicate_get_index(rep));
vlib_zero_combined_counter(&(replicate_main.repm_counters),
replicate_get_index(rep));
return (rep);
}
static u8*
replicate_format (index_t repi,
replicate_format_flags_t flags,
u32 indent,
u8 *s)
{
vlib_counter_t to;
replicate_t *rep;
dpo_id_t *buckets;
u32 i;
rep = replicate_get(repi);
vlib_get_combined_counter(&(replicate_main.repm_counters), repi, &to);
buckets = replicate_get_buckets(rep);
s = format(s, "%U: ", format_dpo_type, DPO_REPLICATE);
s = format(s, "[index:%d buckets:%d ", repi, rep->rep_n_buckets);
s = format(s, "to:[%Ld:%Ld]]", to.packets, to.bytes);
for (i = 0; i < rep->rep_n_buckets; i++)
{
s = format(s, "\n%U", format_white_space, indent+2);
s = format(s, "[%d]", i);
s = format(s, " %U", format_dpo_id, &buckets[i], indent+6);
}
return (s);
}
u8*
format_replicate (u8 * s, va_list * args)
{
index_t repi = va_arg(*args, index_t);
replicate_format_flags_t flags = va_arg(*args, replicate_format_flags_t);
return (replicate_format(repi, flags, 0, s));
}
static u8*
format_replicate_dpo (u8 * s, va_list * args)
{
index_t repi = va_arg(*args, index_t);
u32 indent = va_arg(*args, u32);
return (replicate_format(repi, REPLICATE_FORMAT_DETAIL, indent, s));
}
static replicate_t *
replicate_create_i (u32 num_buckets,
dpo_proto_t rep_proto)
{
replicate_t *rep;
rep = replicate_alloc_i();
rep->rep_n_buckets = num_buckets;
rep->rep_proto = rep_proto;
if (!REP_HAS_INLINE_BUCKETS(rep))
{
vec_validate_aligned(rep->rep_buckets,
rep->rep_n_buckets - 1,
CLIB_CACHE_LINE_BYTES);
}
REP_DBG(rep, "create");
return (rep);
}
index_t
replicate_create (u32 n_buckets,
dpo_proto_t rep_proto)
{
return (replicate_get_index(replicate_create_i(n_buckets, rep_proto)));
}
static inline void
replicate_set_bucket_i (replicate_t *rep,
u32 bucket,
dpo_id_t *buckets,
const dpo_id_t *next)
{
dpo_stack(DPO_REPLICATE, rep->rep_proto, &buckets[bucket], next);
}
void
replicate_set_bucket (index_t repi,
u32 bucket,
const dpo_id_t *next)
{
replicate_t *rep;
dpo_id_t *buckets;
rep = replicate_get(repi);
buckets = replicate_get_buckets(rep);
ASSERT(bucket < rep->rep_n_buckets);
replicate_set_bucket_i(rep, bucket, buckets, next);
}
int
replicate_is_drop (const dpo_id_t *dpo)
{
replicate_t *rep;
if (DPO_REPLICATE != dpo->dpoi_type)
return (0);
rep = replicate_get(dpo->dpoi_index);
if (1 == rep->rep_n_buckets)
{
return (dpo_is_drop(replicate_get_bucket_i(rep, 0)));
}
return (0);
}
const dpo_id_t *
replicate_get_bucket (index_t repi,
u32 bucket)
{
replicate_t *rep;
rep = replicate_get(repi);
return (replicate_get_bucket_i(rep, bucket));
}
static load_balance_path_t *
replicate_multipath_next_hop_fixup (load_balance_path_t *nhs,
dpo_proto_t drop_proto)
{
if (0 == vec_len(nhs))
{
load_balance_path_t *nh;
/*
* we need something for the replicate. so use the drop
*/
vec_add2(nhs, nh, 1);
nh->path_weight = 1;
dpo_copy(&nh->path_dpo, drop_dpo_get(drop_proto));
}
return (nhs);
}
/*
* Fill in adjacencies in block based on corresponding
* next hop adjacencies.
*/
static void
replicate_fill_buckets (replicate_t *rep,
load_balance_path_t *nhs,
dpo_id_t *buckets,
u32 n_buckets)
{
load_balance_path_t * nh;
u16 ii, bucket;
bucket = 0;
/*
* the next-hops have normalised weights. that means their sum is the number
* of buckets we need to fill.
*/
vec_foreach (nh, nhs)
{
for (ii = 0; ii < nh->path_weight; ii++)
{
ASSERT(bucket < n_buckets);
replicate_set_bucket_i(rep, bucket++, buckets, &nh->path_dpo);
}
}
}
static inline void
replicate_set_n_buckets (replicate_t *rep,
u32 n_buckets)
{
rep->rep_n_buckets = n_buckets;
}
void
replicate_multipath_update (const dpo_id_t *dpo,
load_balance_path_t * next_hops)
{
load_balance_path_t * nh, * nhs;
dpo_id_t *tmp_dpo;
u32 ii, n_buckets;
replicate_t *rep;
ASSERT(DPO_REPLICATE == dpo->dpoi_type);
rep = replicate_get(dpo->dpoi_index);
nhs = replicate_multipath_next_hop_fixup(next_hops,
rep->rep_proto);
n_buckets = vec_len(nhs);
if (0 == rep->rep_n_buckets)
{
/*
* first time initialisation. no packets inflight, so we can write
* at leisure.
*/
replicate_set_n_buckets(rep, n_buckets);
if (!REP_HAS_INLINE_BUCKETS(rep))
vec_validate_aligned(rep->rep_buckets,
rep->rep_n_buckets - 1,
CLIB_CACHE_LINE_BYTES);
replicate_fill_buckets(rep, nhs,
replicate_get_buckets(rep),
n_buckets);
}
else
{
/*
* This is a modification of an existing replicate.
* We need to ensure that packets in flight see a consistent state, that
* is the number of reported buckets the REP has
* is not more than it actually has. So if the
* number of buckets is increasing, we must update the bucket array first,
* then the reported number. vice-versa if the number of buckets goes down.
*/
if (n_buckets == rep->rep_n_buckets)
{
/*
* no change in the number of buckets. we can simply fill what
* is new over what is old.
*/
replicate_fill_buckets(rep, nhs,
replicate_get_buckets(rep),
n_buckets);
}
else if (n_buckets > rep->rep_n_buckets)
{
/*
* we have more buckets. the old replicate map (if there is one)
* will remain valid, i.e. mapping to indices within range, so we
* update it last.
*/
if (n_buckets > REP_NUM_INLINE_BUCKETS &&
rep->rep_n_buckets <= REP_NUM_INLINE_BUCKETS)
{
/*
* the new increased number of buckets is crossing the threshold
* from the inline storage to out-line. Alloc the outline buckets
* first, then fixup the number. then reset the inlines.
*/
ASSERT(NULL == rep->rep_buckets);
vec_validate_aligned(rep->rep_buckets,
n_buckets - 1,
CLIB_CACHE_LINE_BYTES);
replicate_fill_buckets(rep, nhs,
rep->rep_buckets,
n_buckets);
CLIB_MEMORY_BARRIER();
replicate_set_n_buckets(rep, n_buckets);
CLIB_MEMORY_BARRIER();
for (ii = 0; ii < REP_NUM_INLINE_BUCKETS; ii++)
{
dpo_reset(&rep->rep_buckets_inline[ii]);
}
}
else
{
if (n_buckets <= REP_NUM_INLINE_BUCKETS)
{
/*
* we are not crossing the threshold and it's still inline buckets.
* we can write the new on the old..
*/
replicate_fill_buckets(rep, nhs,
replicate_get_buckets(rep),
n_buckets);
CLIB_MEMORY_BARRIER();
replicate_set_n_buckets(rep, n_buckets);
}
else
{
/*
* we are not crossing the threshold. We need a new bucket array to
* hold the increased number of choices.
*/
dpo_id_t *new_buckets, *old_buckets, *tmp_dpo;
new_buckets = NULL;
old_buckets = replicate_get_buckets(rep);
vec_validate_aligned(new_buckets,
n_buckets - 1,
CLIB_CACHE_LINE_BYTES);
replicate_fill_buckets(rep, nhs, new_buckets, n_buckets);
CLIB_MEMORY_BARRIER();
rep->rep_buckets = new_buckets;
CLIB_MEMORY_BARRIER();
replicate_set_n_buckets(rep, n_buckets);
vec_foreach(tmp_dpo, old_buckets)
{
dpo_reset(tmp_dpo);
}
vec_free(old_buckets);
}
}
}
else
{
/*
* bucket size shrinkage.
*/
if (n_buckets <= REP_NUM_INLINE_BUCKETS &&
rep->rep_n_buckets > REP_NUM_INLINE_BUCKETS)
{
/*
* the new decreased number of buckets is crossing the threshold
* from out-line storage to inline:
* 1 - Fill the inline buckets,
* 2 - fixup the number (and this point the inline buckets are
* used).
* 3 - free the outline buckets
*/
replicate_fill_buckets(rep, nhs,
rep->rep_buckets_inline,
n_buckets);
CLIB_MEMORY_BARRIER();
replicate_set_n_buckets(rep, n_buckets);
CLIB_MEMORY_BARRIER();
vec_foreach(tmp_dpo, rep->rep_buckets)
{
dpo_reset(tmp_dpo);
}
vec_free(rep->rep_buckets);
}
else
{
/*
* not crossing the threshold.
* 1 - update the number to the smaller size
* 2 - write the new buckets
* 3 - reset those no longer used.
*/
dpo_id_t *buckets;
u32 old_n_buckets;
old_n_buckets = rep->rep_n_buckets;
buckets = replicate_get_buckets(rep);
replicate_set_n_buckets(rep, n_buckets);
CLIB_MEMORY_BARRIER();
replicate_fill_buckets(rep, nhs,
buckets,
n_buckets);
for (ii = n_buckets; ii < old_n_buckets; ii++)
{
dpo_reset(&buckets[ii]);
}
}
}
}
vec_foreach (nh, nhs)
{
dpo_reset(&nh->path_dpo);
}
vec_free(nhs);
}
static void
replicate_lock (dpo_id_t *dpo)
{
replicate_t *rep;
rep = replicate_get(dpo->dpoi_index);
rep->rep_locks++;
}
static void
replicate_destroy (replicate_t *rep)
{
dpo_id_t *buckets;
int i;
buckets = replicate_get_buckets(rep);
for (i = 0; i < rep->rep_n_buckets; i++)
{
dpo_reset(&buckets[i]);
}
REP_DBG(rep, "destroy");
if (!REP_HAS_INLINE_BUCKETS(rep))
{
vec_free(rep->rep_buckets);
}
pool_put(replicate_pool, rep);
}
static void
replicate_unlock (dpo_id_t *dpo)
{
replicate_t *rep;
rep = replicate_get(dpo->dpoi_index);
rep->rep_locks--;
if (0 == rep->rep_locks)
{
replicate_destroy(rep);
}
}
static void
replicate_mem_show (void)
{
fib_show_memory_usage("replicate",
pool_elts(replicate_pool),
pool_len(replicate_pool),
sizeof(replicate_t));
}
const static dpo_vft_t rep_vft = {
.dv_lock = replicate_lock,
.dv_unlock = replicate_unlock,
.dv_format = format_replicate_dpo,
.dv_mem_show = replicate_mem_show,
};
/**
* @brief The per-protocol VLIB graph nodes that are assigned to a replicate
* object.
*
* this means that these graph nodes are ones from which a replicate is the
* parent object in the DPO-graph.
*/
const static char* const replicate_ip4_nodes[] =
{
"ip4-replicate",
NULL,
};
const static char* const replicate_ip6_nodes[] =
{
"ip6-replicate",
NULL,
};
const static char* const replicate_mpls_nodes[] =
{
"mpls-replicate",
NULL,
};
const static char* const * const replicate_nodes[DPO_PROTO_NUM] =
{
[DPO_PROTO_IP4] = replicate_ip4_nodes,
[DPO_PROTO_IP6] = replicate_ip6_nodes,
[DPO_PROTO_MPLS] = replicate_mpls_nodes,
};
void
replicate_module_init (void)
{
dpo_register(DPO_REPLICATE, &rep_vft, replicate_nodes);
}
static clib_error_t *
replicate_show (vlib_main_t * vm,
unformat_input_t * input,
vlib_cli_command_t * cmd)
{
index_t repi = INDEX_INVALID;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "%d", &repi))
;
else
break;
}
if (INDEX_INVALID != repi)
{
vlib_cli_output (vm, "%U", format_replicate, repi,
REPLICATE_FORMAT_DETAIL);
}
else
{
replicate_t *rep;
pool_foreach(rep, replicate_pool,
({
vlib_cli_output (vm, "%U", format_replicate,
replicate_get_index(rep),
REPLICATE_FORMAT_NONE);
}));
}
return 0;
}
VLIB_CLI_COMMAND (replicate_show_command, static) = {
.path = "show replicate",
.short_help = "show replicate [<index>]",
.function = replicate_show,
};
typedef struct replicate_trace_t_
{
index_t rep_index;
dpo_id_t dpo;
} replicate_trace_t;
static uword
replicate_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
vlib_combined_counter_main_t * cm = &replicate_main.repm_counters;
replicate_main_t * rm = &replicate_main;
u32 n_left_from, * from, * to_next, next_index;
u32 cpu_index = os_get_cpu_number();
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
while (n_left_from > 0)
{
u32 n_left_to_next;
vlib_get_next_frame (vm, node, next_index,
to_next, n_left_to_next);
while (n_left_from > 0 && n_left_to_next > 0)
{
u32 next0, ci0, bi0, bucket, repi0;
const replicate_t *rep0;
vlib_buffer_t * b0, *c0;
const dpo_id_t *dpo0;
u8 num_cloned;
bi0 = from[0];
from += 1;
n_left_from -= 1;
b0 = vlib_get_buffer (vm, bi0);
repi0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
rep0 = replicate_get(repi0);
vlib_increment_combined_counter(
cm, cpu_index, repi0, 1,
vlib_buffer_length_in_chain(vm, b0));
vec_validate (rm->clones[cpu_index], rep0->rep_n_buckets - 1);
num_cloned = vlib_buffer_clone (vm, bi0, rm->clones[cpu_index], rep0->rep_n_buckets, 128);
if (num_cloned != rep0->rep_n_buckets)
{
vlib_node_increment_counter
(vm, node->node_index,
REPLICATE_DPO_ERROR_BUFFER_ALLOCATION_FAILURE, 1);
}
for (bucket = 0; bucket < num_cloned; bucket++)
{
ci0 = rm->clones[cpu_index][bucket];
c0 = vlib_get_buffer(vm, ci0);
to_next[0] = ci0;
to_next += 1;
n_left_to_next -= 1;
dpo0 = replicate_get_bucket_i(rep0, bucket);
next0 = dpo0->dpoi_next_node;
vnet_buffer (c0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
if (PREDICT_FALSE(c0->flags & VLIB_BUFFER_IS_TRACED))
{
replicate_trace_t *t = vlib_add_trace (vm, node, c0, sizeof (*t));
t->rep_index = repi0;
t->dpo = *dpo0;
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next,
ci0, next0);
if (PREDICT_FALSE (n_left_to_next == 0))
{
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
}
}
vec_reset_length (rm->clones[cpu_index]);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
return frame->n_vectors;
}
static u8 *
format_replicate_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
replicate_trace_t *t = va_arg (*args, replicate_trace_t *);
s = format (s, "replicate: %d via %U",
t->rep_index,
format_dpo_id, &t->dpo);
return s;
}
static uword
ip4_replicate (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
return (replicate_inline (vm, node, frame));
}
/**
* @brief
*/
VLIB_REGISTER_NODE (ip4_replicate_node) = {
.function = ip4_replicate,
.name = "ip4-replicate",
.vector_size = sizeof (u32),
.n_errors = ARRAY_LEN(replicate_dpo_error_strings),
.error_strings = replicate_dpo_error_strings,
.format_trace = format_replicate_trace,
.n_next_nodes = 1,
.next_nodes = {
[0] = "error-drop",
},
};
static uword
ip6_replicate (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
return (replicate_inline (vm, node, frame));
}
/**
* @brief
*/
VLIB_REGISTER_NODE (ip6_replicate_node) = {
.function = ip6_replicate,
.name = "ip6-replicate",
.vector_size = sizeof (u32),
.n_errors = ARRAY_LEN(replicate_dpo_error_strings),
.error_strings = replicate_dpo_error_strings,
.format_trace = format_replicate_trace,
.n_next_nodes = 1,
.next_nodes = {
[0] = "error-drop",
},
};
clib_error_t *
replicate_dpo_init (vlib_main_t * vm)
{
replicate_main_t * rm = &replicate_main;
vec_validate (rm->clones, vlib_num_workers());
return 0;
}
VLIB_INIT_FUNCTION (replicate_dpo_init);