blob: 81cb9f55fba56db87c9d179c6e43bff727a25dce [file] [log] [blame]
/*
* Copyright (c) 2016 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vnet/vnet.h>
#include <vppinfra/xxhash.h>
#include <vlib/threads.h>
#include <vnet/handoff.h>
#include <vnet/feature/feature.h>
typedef struct
{
uword *workers_bitmap;
u32 *workers;
} per_inteface_handoff_data_t;
typedef struct
{
u32 cached_next_index;
u32 num_workers;
u32 first_worker_index;
per_inteface_handoff_data_t *if_data;
/* Worker handoff index */
u32 frame_queue_index;
/* convenience variables */
vlib_main_t *vlib_main;
vnet_main_t *vnet_main;
u64 (*hash_fn) (ethernet_header_t *);
} handoff_main_t;
handoff_main_t handoff_main;
vlib_node_registration_t handoff_dispatch_node;
typedef struct
{
u32 sw_if_index;
u32 next_worker_index;
u32 buffer_index;
} worker_handoff_trace_t;
/* packet trace format function */
static u8 *
format_worker_handoff_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
worker_handoff_trace_t *t = va_arg (*args, worker_handoff_trace_t *);
s =
format (s, "worker-handoff: sw_if_index %d, next_worker %d, buffer 0x%x",
t->sw_if_index, t->next_worker_index, t->buffer_index);
return s;
}
vlib_node_registration_t handoff_node;
static uword
worker_handoff_node_fn (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
{
handoff_main_t *hm = &handoff_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
u32 n_left_from, *from;
static __thread vlib_frame_queue_elt_t **handoff_queue_elt_by_worker_index;
static __thread vlib_frame_queue_t **congested_handoff_queue_by_worker_index
= 0;
vlib_frame_queue_elt_t *hf = 0;
int i;
u32 n_left_to_next_worker = 0, *to_next_worker = 0;
u32 next_worker_index = 0;
u32 current_worker_index = ~0;
if (PREDICT_FALSE (handoff_queue_elt_by_worker_index == 0))
{
vec_validate (handoff_queue_elt_by_worker_index, tm->n_vlib_mains - 1);
vec_validate_init_empty (congested_handoff_queue_by_worker_index,
hm->first_worker_index + hm->num_workers - 1,
(vlib_frame_queue_t *) (~0));
}
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
while (n_left_from > 0)
{
u32 bi0;
vlib_buffer_t *b0;
u32 sw_if_index0;
u32 hash;
u64 hash_key;
per_inteface_handoff_data_t *ihd0;
u32 index0;
bi0 = from[0];
from += 1;
n_left_from -= 1;
b0 = vlib_get_buffer (vm, bi0);
sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
ASSERT (hm->if_data);
ihd0 = vec_elt_at_index (hm->if_data, sw_if_index0);
next_worker_index = hm->first_worker_index;
/*
* Force unknown traffic onto worker 0,
* and into ethernet-input. $$$$ add more hashes.
*/
/* Compute ingress LB hash */
hash_key = hm->hash_fn ((ethernet_header_t *) b0->data);
hash = (u32) clib_xxhash (hash_key);
/* if input node did not specify next index, then packet
should go to eternet-input */
if (PREDICT_FALSE ((b0->flags & VNET_BUFFER_F_HANDOFF_NEXT_VALID) == 0))
vnet_buffer (b0)->handoff.next_index =
HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT;
else if (vnet_buffer (b0)->handoff.next_index ==
HANDOFF_DISPATCH_NEXT_IP4_INPUT
|| vnet_buffer (b0)->handoff.next_index ==
HANDOFF_DISPATCH_NEXT_IP6_INPUT
|| vnet_buffer (b0)->handoff.next_index ==
HANDOFF_DISPATCH_NEXT_MPLS_INPUT)
vlib_buffer_advance (b0, (sizeof (ethernet_header_t)));
if (PREDICT_TRUE (is_pow2 (vec_len (ihd0->workers))))
index0 = hash & (vec_len (ihd0->workers) - 1);
else
index0 = hash % vec_len (ihd0->workers);
next_worker_index += ihd0->workers[index0];
if (next_worker_index != current_worker_index)
{
if (hf)
hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
hf = vlib_get_worker_handoff_queue_elt (hm->frame_queue_index,
next_worker_index,
handoff_queue_elt_by_worker_index);
n_left_to_next_worker = VLIB_FRAME_SIZE - hf->n_vectors;
to_next_worker = &hf->buffer_index[hf->n_vectors];
current_worker_index = next_worker_index;
}
/* enqueue to correct worker thread */
to_next_worker[0] = bi0;
to_next_worker++;
n_left_to_next_worker--;
if (n_left_to_next_worker == 0)
{
hf->n_vectors = VLIB_FRAME_SIZE;
vlib_put_frame_queue_elt (hf);
current_worker_index = ~0;
handoff_queue_elt_by_worker_index[next_worker_index] = 0;
hf = 0;
}
if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
&& (b0->flags & VLIB_BUFFER_IS_TRACED)))
{
worker_handoff_trace_t *t =
vlib_add_trace (vm, node, b0, sizeof (*t));
t->sw_if_index = sw_if_index0;
t->next_worker_index = next_worker_index - hm->first_worker_index;
t->buffer_index = bi0;
}
}
if (hf)
hf->n_vectors = VLIB_FRAME_SIZE - n_left_to_next_worker;
/* Ship frames to the worker nodes */
for (i = 0; i < vec_len (handoff_queue_elt_by_worker_index); i++)
{
if (handoff_queue_elt_by_worker_index[i])
{
hf = handoff_queue_elt_by_worker_index[i];
/*
* It works better to let the handoff node
* rate-adapt, always ship the handoff queue element.
*/
if (1 || hf->n_vectors == hf->last_n_vectors)
{
vlib_put_frame_queue_elt (hf);
handoff_queue_elt_by_worker_index[i] = 0;
}
else
hf->last_n_vectors = hf->n_vectors;
}
congested_handoff_queue_by_worker_index[i] =
(vlib_frame_queue_t *) (~0);
}
hf = 0;
current_worker_index = ~0;
return frame->n_vectors;
}
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (worker_handoff_node) = {
.function = worker_handoff_node_fn,
.name = "worker-handoff",
.vector_size = sizeof (u32),
.format_trace = format_worker_handoff_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
.n_next_nodes = 1,
.next_nodes = {
[0] = "error-drop",
},
};
VLIB_NODE_FUNCTION_MULTIARCH (worker_handoff_node, worker_handoff_node_fn)
/* *INDENT-ON* */
int
interface_handoff_enable_disable (vlib_main_t * vm, u32 sw_if_index,
uword * bitmap, int enable_disable)
{
handoff_main_t *hm = &handoff_main;
vnet_sw_interface_t *sw;
vnet_main_t *vnm = vnet_get_main ();
per_inteface_handoff_data_t *d;
int i, rv = 0;
if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
return VNET_API_ERROR_INVALID_SW_IF_INDEX;
sw = vnet_get_sw_interface (vnm, sw_if_index);
if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
return VNET_API_ERROR_INVALID_SW_IF_INDEX;
if (clib_bitmap_last_set (bitmap) >= hm->num_workers)
return VNET_API_ERROR_INVALID_WORKER;
if (hm->frame_queue_index == ~0)
hm->frame_queue_index =
vlib_frame_queue_main_init (handoff_dispatch_node.index, 0);
vec_validate (hm->if_data, sw_if_index);
d = vec_elt_at_index (hm->if_data, sw_if_index);
vec_free (d->workers);
vec_free (d->workers_bitmap);
if (enable_disable)
{
d->workers_bitmap = bitmap;
/* *INDENT-OFF* */
clib_bitmap_foreach (i, bitmap,
({
vec_add1(d->workers, i);
}));
/* *INDENT-ON* */
}
vnet_feature_enable_disable ("device-input", "worker-handoff",
sw_if_index, enable_disable, 0, 0);
return rv;
}
static clib_error_t *
set_interface_handoff_command_fn (vlib_main_t * vm,
unformat_input_t * input,
vlib_cli_command_t * cmd)
{
handoff_main_t *hm = &handoff_main;
u32 sw_if_index = ~0;
int enable_disable = 1;
uword *bitmap = 0;
u32 sym = ~0;
int rv = 0;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "disable"))
enable_disable = 0;
else if (unformat (input, "workers %U", unformat_bitmap_list, &bitmap))
;
else if (unformat (input, "%U", unformat_vnet_sw_interface,
vnet_get_main (), &sw_if_index))
;
else if (unformat (input, "symmetrical"))
sym = 1;
else if (unformat (input, "asymmetrical"))
sym = 0;
else
break;
}
if (sw_if_index == ~0)
return clib_error_return (0, "Please specify an interface...");
if (bitmap == 0)
return clib_error_return (0, "Please specify list of workers...");
rv =
interface_handoff_enable_disable (vm, sw_if_index, bitmap,
enable_disable);
switch (rv)
{
case 0:
break;
case VNET_API_ERROR_INVALID_SW_IF_INDEX:
return clib_error_return (0, "Invalid interface");
break;
case VNET_API_ERROR_INVALID_WORKER:
return clib_error_return (0, "Invalid worker(s)");
break;
case VNET_API_ERROR_UNIMPLEMENTED:
return clib_error_return (0,
"Device driver doesn't support redirection");
break;
default:
return clib_error_return (0, "unknown return value %d", rv);
}
if (sym == 1)
hm->hash_fn = eth_get_sym_key;
else if (sym == 0)
hm->hash_fn = eth_get_key;
return 0;
}
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_handoff_command, static) = {
.path = "set interface handoff",
.short_help =
"set interface handoff <interface-name> workers <workers-list> [symmetrical|asymmetrical]",
.function = set_interface_handoff_command_fn,
};
/* *INDENT-ON* */
typedef struct
{
u32 buffer_index;
u32 next_index;
u32 sw_if_index;
} handoff_dispatch_trace_t;
/* packet trace format function */
static u8 *
format_handoff_dispatch_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
handoff_dispatch_trace_t *t = va_arg (*args, handoff_dispatch_trace_t *);
s = format (s, "handoff-dispatch: sw_if_index %d next_index %d buffer 0x%x",
t->sw_if_index, t->next_index, t->buffer_index);
return s;
}
#define foreach_handoff_dispatch_error \
_(EXAMPLE, "example packets")
typedef enum
{
#define _(sym,str) HANDOFF_DISPATCH_ERROR_##sym,
foreach_handoff_dispatch_error
#undef _
HANDOFF_DISPATCH_N_ERROR,
} handoff_dispatch_error_t;
static char *handoff_dispatch_error_strings[] = {
#define _(sym,string) string,
foreach_handoff_dispatch_error
#undef _
};
static uword
handoff_dispatch_node_fn (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
{
u32 n_left_from, *from, *to_next;
handoff_dispatch_next_t next_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
while (n_left_from > 0)
{
u32 n_left_to_next;
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
while (n_left_from >= 4 && n_left_to_next >= 2)
{
u32 bi0, bi1;
vlib_buffer_t *b0, *b1;
u32 next0, next1;
u32 sw_if_index0, sw_if_index1;
/* Prefetch next iteration. */
{
vlib_buffer_t *p2, *p3;
p2 = vlib_get_buffer (vm, from[2]);
p3 = vlib_get_buffer (vm, from[3]);
vlib_prefetch_buffer_header (p2, LOAD);
vlib_prefetch_buffer_header (p3, LOAD);
}
/* speculatively enqueue b0 and b1 to the current next frame */
to_next[0] = bi0 = from[0];
to_next[1] = bi1 = from[1];
from += 2;
to_next += 2;
n_left_from -= 2;
n_left_to_next -= 2;
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
next0 = vnet_buffer (b0)->handoff.next_index;
next1 = vnet_buffer (b1)->handoff.next_index;
if (PREDICT_FALSE (vm->trace_main.trace_active_hint))
{
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */
0);
handoff_dispatch_trace_t *t =
vlib_add_trace (vm, node, b0, sizeof (*t));
sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
t->sw_if_index = sw_if_index0;
t->next_index = next0;
t->buffer_index = bi0;
}
if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
{
vlib_trace_buffer (vm, node, next1, b1, /* follow_chain */
0);
handoff_dispatch_trace_t *t =
vlib_add_trace (vm, node, b1, sizeof (*t));
sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
t->sw_if_index = sw_if_index1;
t->next_index = next1;
t->buffer_index = bi1;
}
}
/* verify speculative enqueues, maybe switch current next frame */
vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
to_next, n_left_to_next,
bi0, bi1, next0, next1);
}
while (n_left_from > 0 && n_left_to_next > 0)
{
u32 bi0;
vlib_buffer_t *b0;
u32 next0;
u32 sw_if_index0;
/* speculatively enqueue b0 to the current next frame */
bi0 = from[0];
to_next[0] = bi0;
from += 1;
to_next += 1;
n_left_from -= 1;
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
next0 = vnet_buffer (b0)->handoff.next_index;
if (PREDICT_FALSE (vm->trace_main.trace_active_hint))
{
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */
0);
handoff_dispatch_trace_t *t =
vlib_add_trace (vm, node, b0, sizeof (*t));
sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
t->sw_if_index = sw_if_index0;
t->next_index = next0;
t->buffer_index = bi0;
}
}
/* verify speculative enqueue, maybe switch current next frame */
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next,
bi0, next0);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
return frame->n_vectors;
}
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (handoff_dispatch_node) = {
.function = handoff_dispatch_node_fn,
.name = "handoff-dispatch",
.vector_size = sizeof (u32),
.format_trace = format_handoff_dispatch_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
.flags = VLIB_NODE_FLAG_IS_HANDOFF,
.n_errors = ARRAY_LEN(handoff_dispatch_error_strings),
.error_strings = handoff_dispatch_error_strings,
.n_next_nodes = HANDOFF_DISPATCH_N_NEXT,
.next_nodes = {
[HANDOFF_DISPATCH_NEXT_DROP] = "error-drop",
[HANDOFF_DISPATCH_NEXT_ETHERNET_INPUT] = "ethernet-input",
[HANDOFF_DISPATCH_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
[HANDOFF_DISPATCH_NEXT_IP6_INPUT] = "ip6-input",
[HANDOFF_DISPATCH_NEXT_MPLS_INPUT] = "mpls-input",
},
};
VLIB_NODE_FUNCTION_MULTIARCH (handoff_dispatch_node, handoff_dispatch_node_fn)
/* *INDENT-ON* */
clib_error_t *
handoff_init (vlib_main_t * vm)
{
handoff_main_t *hm = &handoff_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
clib_error_t *error;
uword *p;
if ((error = vlib_call_init_function (vm, threads_init)))
return error;
vlib_thread_registration_t *tr;
/* Only the standard vnet worker threads are supported */
p = hash_get_mem (tm->thread_registrations_by_name, "workers");
if (p)
{
tr = (vlib_thread_registration_t *) p[0];
if (tr)
{
hm->num_workers = tr->count;
hm->first_worker_index = tr->first_index;
}
}
hm->hash_fn = eth_get_key;
hm->vlib_main = vm;
hm->vnet_main = &vnet_main;
hm->frame_queue_index = ~0;
return 0;
}
VLIB_INIT_FUNCTION (handoff_init);
/*
* fd.io coding-style-patch-verification: ON
*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
*/