blob: b3de37d7bf37b711063f790b1c3cff4e3bacf5df [file] [log] [blame]
/* SPDX-License-Identifier: Apache-2.0
* Copyright (c) 2023 Cisco Systems, Inc.
*/
#include <vnet/vnet.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/dev/dev.h>
#include <vnet/dev/counters.h>
#include <vnet/dev/log.h>
VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
.class_name = "dev",
.subclass_name = "port",
};
static uword
dummy_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame)
{
ASSERT (0);
return 0;
}
VLIB_REGISTER_NODE (port_rx_eth_node) = {
.function = dummy_input_fn,
.name = "port-rx-eth",
.runtime_data_bytes = sizeof (vnet_dev_rx_node_runtime_t),
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_DISABLED,
.n_next_nodes = VNET_DEV_ETH_RX_PORT_N_NEXTS,
.next_nodes = {
#define _(n, s) [VNET_DEV_ETH_RX_PORT_NEXT_##n] = s,
foreach_vnet_dev_port_rx_next
#undef _
},
};
u16 vnet_dev_default_next_index_by_port_type[] = {
[VNET_DEV_PORT_TYPE_ETHERNET] = VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT,
};
VNET_FEATURE_ARC_INIT (eth_port_rx, static) = {
.arc_name = "port-rx-eth",
.start_nodes = VNET_FEATURES ("port-rx-eth"),
.last_in_arc = "ethernet-input",
.arc_index_ptr = &vnet_dev_main.eth_port_rx_feature_arc_index,
};
VNET_FEATURE_INIT (l2_patch, static) = {
.arc_name = "port-rx-eth",
.node_name = "l2-patch",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
VNET_FEATURE_INIT (worker_handoff, static) = {
.arc_name = "port-rx-eth",
.node_name = "worker-handoff",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
VNET_FEATURE_INIT (span_input, static) = {
.arc_name = "port-rx-eth",
.node_name = "span-input",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
VNET_FEATURE_INIT (p2p_ethernet_node, static) = {
.arc_name = "port-rx-eth",
.node_name = "p2p-ethernet-input",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
VNET_FEATURE_INIT (ethernet_input, static) = {
.arc_name = "port-rx-eth",
.node_name = "ethernet-input",
.runs_before = 0, /* not before any other features */
};
void
vnet_dev_port_free (vlib_main_t *vm, vnet_dev_port_t *port)
{
vnet_dev_t *dev = port->dev;
vnet_dev_port_validate (vm, port);
ASSERT (port->started == 0);
log_debug (dev, "port %u", port->port_id);
if (port->port_ops.free)
port->port_ops.free (vm, port);
pool_free (port->secondary_hw_addr);
pool_free (port->rx_queues);
pool_free (port->tx_queues);
pool_put_index (dev->ports, port->index);
clib_mem_free (port);
}
void
vnet_dev_port_update_tx_node_runtime (vlib_main_t *vm, vnet_dev_port_t *port)
{
vnet_dev_port_validate (vm, port);
foreach_vnet_dev_port_tx_queue (q, port)
{
u32 ti;
clib_bitmap_foreach (ti, q->assigned_threads)
{
vlib_main_t *tvm = vlib_get_main_by_index (ti);
vlib_node_runtime_t *nr =
vlib_node_get_runtime (tvm, port->intf.tx_node_index);
vnet_dev_tx_node_runtime_t *tnr = vnet_dev_get_tx_node_runtime (nr);
tnr->hw_if_index = port->intf.hw_if_index;
tnr->tx_queue = q;
}
}
}
void
vnet_dev_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
{
vnet_dev_t *dev = port->dev;
vnet_dev_rt_op_t *ops = 0;
u16 n_threads = vlib_get_n_threads ();
log_debug (dev, "stopping port %u", port->port_id);
for (u16 i = 0; i < n_threads; i++)
{
vnet_dev_rt_op_t op = { .thread_index = i, .port = port };
vec_add1 (ops, op);
}
vnet_dev_rt_exec_ops (vm, dev, ops, vec_len (ops));
vec_free (ops);
port->port_ops.stop (vm, port);
foreach_vnet_dev_port_rx_queue (q, port)
{
q->started = 0;
log_debug (dev, "port %u rx queue %u stopped", port->port_id,
q->queue_id);
}
foreach_vnet_dev_port_tx_queue (q, port)
{
q->started = 0;
log_debug (dev, "port %u tx queue %u stopped", port->port_id,
q->queue_id);
}
log_debug (dev, "port %u stopped", port->port_id);
port->started = 0;
}
vnet_dev_rv_t
vnet_dev_port_start_all_rx_queues (vlib_main_t *vm, vnet_dev_port_t *port)
{
vnet_dev_rv_t rv = VNET_DEV_OK;
vnet_dev_port_validate (vm, port);
foreach_vnet_dev_port_rx_queue (q, port)
{
rv = vnet_dev_rx_queue_start (vm, q);
if (rv != VNET_DEV_OK)
return rv;
}
return rv;
}
vnet_dev_rv_t
vnet_dev_port_start_all_tx_queues (vlib_main_t *vm, vnet_dev_port_t *port)
{
vnet_dev_rv_t rv = VNET_DEV_OK;
vnet_dev_port_validate (vm, port);
foreach_vnet_dev_port_tx_queue (q, port)
{
rv = vnet_dev_tx_queue_start (vm, q);
if (rv != VNET_DEV_OK)
return rv;
}
return rv;
}
vnet_dev_rv_t
vnet_dev_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
{
u16 n_threads = vlib_get_n_threads ();
vnet_dev_t *dev = port->dev;
vnet_dev_rt_op_t *ops = 0;
vnet_dev_rv_t rv;
vnet_dev_port_validate (vm, port);
log_debug (dev, "starting port %u", port->port_id);
vnet_dev_port_update_tx_node_runtime (vm, port);
if ((rv = port->port_ops.start (vm, port)) != VNET_DEV_OK)
{
vnet_dev_port_stop (vm, port);
return rv;
}
for (u16 i = 0; i < n_threads; i++)
{
vnet_dev_rt_op_t op = { .thread_index = i, .port = port };
vec_add1 (ops, op);
}
vnet_dev_rt_exec_ops (vm, dev, ops, vec_len (ops));
vec_free (ops);
foreach_vnet_dev_port_rx_queue (q, port)
if (q->enabled)
{
log_debug (dev, "port %u rx queue %u started", port->port_id,
q->queue_id);
q->started = 1;
}
foreach_vnet_dev_port_tx_queue (q, port)
if (q->enabled)
{
log_debug (dev, "port %u tx queue %u started", port->port_id,
q->queue_id);
q->started = 1;
}
port->started = 1;
log_debug (dev, "port %u started", port->port_id);
return VNET_DEV_OK;
}
vnet_dev_rv_t
vnet_dev_port_add (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_port_id_t id,
vnet_dev_port_add_args_t *args)
{
vnet_dev_port_t **pp, *port;
vnet_dev_rv_t rv = VNET_DEV_OK;
ASSERT (args->port.attr.type != VNET_DEV_PORT_TYPE_UNKNOWN);
ASSERT (args->port.attr.max_supported_frame_size);
port =
vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t), args->port.data_size);
pool_get (dev->ports, pp);
pp[0] = port;
clib_memcpy (vnet_dev_get_port_data (port), args->port.initial_data,
args->port.data_size);
port->port_id = id;
port->index = pp - dev->ports;
port->dev = dev;
port->attr = args->port.attr;
port->rx_queue_config = args->rx_queue.config;
port->tx_queue_config = args->tx_queue.config;
port->rx_queue_ops = args->rx_queue.ops;
port->tx_queue_ops = args->tx_queue.ops;
port->port_ops = args->port.ops;
port->rx_node = *args->rx_node;
port->tx_node = *args->tx_node;
/* defaults out of port attributes */
port->max_frame_size = args->port.attr.max_supported_frame_size;
port->primary_hw_addr = args->port.attr.hw_addr;
if (port->port_ops.alloc)
rv = port->port_ops.alloc (vm, port);
if (rv == VNET_DEV_OK)
port->initialized = 1;
return rv;
}
vnet_dev_rv_t
vnet_dev_port_cfg_change_req_validate (vlib_main_t *vm, vnet_dev_port_t *port,
vnet_dev_port_cfg_change_req_t *req)
{
vnet_dev_rv_t rv;
vnet_dev_hw_addr_t *addr;
int found;
if (req->validated)
return VNET_DEV_OK;
switch (req->type)
{
case VNET_DEV_PORT_CFG_MAX_FRAME_SIZE:
if (req->max_frame_size > port->attr.max_supported_frame_size)
return VNET_DEV_ERR_INVALID_VALUE;
if (req->max_frame_size == port->max_frame_size)
return VNET_DEV_ERR_NO_CHANGE;
break;
case VNET_DEV_PORT_CFG_PROMISC_MODE:
if (req->promisc == port->promisc)
return VNET_DEV_ERR_NO_CHANGE;
break;
case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
if (clib_memcmp (&req->addr, &port->primary_hw_addr,
sizeof (vnet_dev_hw_addr_t)) == 0)
return VNET_DEV_ERR_NO_CHANGE;
break;
case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
pool_foreach (addr, port->secondary_hw_addr)
if (clib_memcmp (addr, &req->addr, sizeof (*addr)) == 0)
return VNET_DEV_ERR_ALREADY_EXISTS;
break;
case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
found = 0;
pool_foreach (addr, port->secondary_hw_addr)
if (clib_memcmp (addr, &req->addr, sizeof (*addr)) == 0)
found = 1;
if (!found)
return VNET_DEV_ERR_NO_SUCH_ENTRY;
break;
default:
break;
}
if (port->port_ops.config_change_validate)
{
rv = port->port_ops.config_change_validate (vm, port, req);
if (rv != VNET_DEV_OK)
return rv;
}
req->validated = 1;
return VNET_DEV_OK;
}
vnet_dev_rv_t
vnet_dev_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
vnet_dev_port_cfg_change_req_t *req)
{
vnet_dev_rv_t rv = VNET_DEV_OK;
vnet_dev_hw_addr_t *a;
vnet_dev_rx_queue_t *rxq = 0;
u8 enable = 0;
vnet_dev_port_validate (vm, port);
if (req->type == VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE ||
req->type == VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE)
{
if (req->all_queues == 0)
{
rxq = vnet_dev_port_get_rx_queue_by_id (port, req->queue_id);
if (rxq == 0)
return VNET_DEV_ERR_BUG;
}
}
if ((rv = vnet_dev_port_cfg_change_req_validate (vm, port, req)))
return rv;
if (port->port_ops.config_change)
rv = port->port_ops.config_change (vm, port, req);
if (rv != VNET_DEV_OK)
return rv;
switch (req->type)
{
case VNET_DEV_PORT_CFG_MAX_FRAME_SIZE:
port->max_frame_size = req->max_frame_size;
break;
case VNET_DEV_PORT_CFG_PROMISC_MODE:
port->promisc = req->promisc;
break;
case VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE:
enable = 1;
case VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE:
if (req->all_queues)
{
clib_bitmap_t *bmp = 0;
vnet_dev_rt_op_t *ops = 0;
u32 i;
foreach_vnet_dev_port_rx_queue (q, port)
{
q->interrupt_mode = enable;
bmp = clib_bitmap_set (bmp, q->rx_thread_index, 1);
}
clib_bitmap_foreach (i, bmp)
{
vnet_dev_rt_op_t op = { .port = port, .thread_index = i };
vec_add1 (ops, op);
}
vnet_dev_rt_exec_ops (vm, port->dev, ops, vec_len (ops));
clib_bitmap_free (bmp);
vec_free (ops);
}
else
{
rxq->interrupt_mode = enable;
vnet_dev_rt_exec_ops (vm, port->dev,
&(vnet_dev_rt_op_t){
.port = port,
.thread_index = rxq->rx_thread_index,
},
1);
}
break;
case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
clib_memcpy (&port->primary_hw_addr, &req->addr,
sizeof (vnet_dev_hw_addr_t));
break;
case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
pool_get (port->secondary_hw_addr, a);
clib_memcpy (a, &req->addr, sizeof (vnet_dev_hw_addr_t));
break;
case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
pool_foreach (a, port->secondary_hw_addr)
if (clib_memcmp (a, &req->addr, sizeof (vnet_dev_hw_addr_t)) == 0)
{
pool_put (port->secondary_hw_addr, a);
break;
}
break;
default:
break;
}
return VNET_DEV_OK;
}
void
vnet_dev_port_state_change (vlib_main_t *vm, vnet_dev_port_t *port,
vnet_dev_port_state_changes_t changes)
{
vnet_main_t *vnm = vnet_get_main ();
vnet_dev_port_validate (vm, port);
if (changes.change.link_speed)
{
port->speed = changes.link_speed;
if (port->interface_created)
vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index,
changes.link_speed);
log_debug (port->dev, "port speed changed to %u", changes.link_speed);
}
if (changes.change.link_state)
{
port->link_up = changes.link_state;
if (port->interface_created)
vnet_hw_interface_set_flags (
vnm, port->intf.hw_if_index,
changes.link_state ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
log_debug (port->dev, "port link state changed to %s",
changes.link_state ? "up" : "down");
}
}
void
vnet_dev_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port,
vnet_dev_counter_t *counters, u16 n_counters)
{
vnet_dev_port_validate (vm, port);
port->counter_main =
vnet_dev_counters_alloc (vm, counters, n_counters, "%s port %u counters",
port->dev->device_id, port->port_id);
}
void
vnet_dev_port_free_counters (vlib_main_t *vm, vnet_dev_port_t *port)
{
vnet_dev_port_validate (vm, port);
if (port->counter_main)
vnet_dev_counters_free (vm, port->counter_main);
}
vnet_dev_rv_t
vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port)
{
vnet_main_t *vnm = vnet_get_main ();
u16 n_threads = vlib_get_n_threads ();
vnet_dev_main_t *dm = &vnet_dev_main;
vnet_dev_t *dev = port->dev;
vnet_dev_port_t **pp;
vnet_dev_rv_t rv;
u16 ti = 0;
if (port->intf.name[0] == 0)
{
u8 *s;
s = format (0, "%s%u/%u",
dm->drivers[port->dev->driver_index].registration->name,
port->dev->index, port->index);
u32 n = vec_len (s);
if (n >= sizeof (port->intf.name))
{
vec_free (s);
return VNET_DEV_ERR_BUG;
}
clib_memcpy (port->intf.name, s, n);
port->intf.name[n] = 0;
vec_free (s);
}
log_debug (
dev, "allocating %u rx queues with size %u and %u tx queues with size %u",
port->intf.num_rx_queues, port->intf.rxq_sz, port->intf.num_tx_queues,
port->intf.txq_sz);
for (int i = 0; i < port->intf.num_rx_queues; i++)
if ((rv = vnet_dev_rx_queue_alloc (vm, port, port->intf.rxq_sz)) !=
VNET_DEV_OK)
goto error;
for (u32 i = 0; i < port->intf.num_tx_queues; i++)
if ((rv = vnet_dev_tx_queue_alloc (vm, port, port->intf.txq_sz)) !=
VNET_DEV_OK)
goto error;
foreach_vnet_dev_port_tx_queue (q, port)
{
q->assigned_threads = clib_bitmap_set (q->assigned_threads, ti, 1);
log_debug (dev, "port %u tx queue %u assigned to thread %u",
port->port_id, q->queue_id, ti);
if (++ti >= n_threads)
break;
}
/* pool of port pointers helps us to assign unique dev_instance */
pool_get (dm->ports_by_dev_instance, pp);
port->intf.dev_instance = pp - dm->ports_by_dev_instance;
pp[0] = port;
if (port->attr.type == VNET_DEV_PORT_TYPE_ETHERNET)
{
vnet_device_class_t *dev_class;
vnet_dev_driver_t *driver;
vnet_sw_interface_t *sw;
vnet_hw_interface_t *hw;
u32 rx_node_index;
driver = pool_elt_at_index (dm->drivers, dev->driver_index);
/* hack to provide per-port tx node function */
dev_class = vnet_get_device_class (vnm, driver->dev_class_index);
dev_class->tx_fn_registrations = port->tx_node.registrations;
dev_class->format_tx_trace = port->tx_node.format_trace;
dev_class->tx_function_error_counters = port->tx_node.error_counters;
dev_class->tx_function_n_errors = port->tx_node.n_error_counters;
/* create new interface including tx and output nodes */
port->intf.hw_if_index = vnet_eth_register_interface (
vnm, &(vnet_eth_interface_registration_t){
.address = port->primary_hw_addr.eth_mac,
.max_frame_size = port->max_frame_size,
.dev_class_index = driver->dev_class_index,
.dev_instance = port->intf.dev_instance,
.cb.set_max_frame_size = vnet_dev_port_set_max_frame_size,
.cb.flag_change = vnet_dev_port_eth_flag_change,
});
sw = vnet_get_hw_sw_interface (vnm, port->intf.hw_if_index);
hw = vnet_get_hw_interface (vnm, port->intf.hw_if_index);
port->intf.sw_if_index = sw->sw_if_index;
vnet_hw_interface_set_flags (
vnm, port->intf.hw_if_index,
port->link_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
if (port->speed)
vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index,
port->speed);
port->intf.tx_node_index = hw->tx_node_index;
/* create / reuse rx node */
if (vec_len (dm->free_rx_node_indices))
{
vlib_node_t *n;
rx_node_index = vec_pop (dm->free_rx_node_indices);
vlib_node_rename (vm, rx_node_index, "%s-rx", port->intf.name);
n = vlib_get_node (vm, rx_node_index);
n->function = vlib_node_get_preferred_node_fn_variant (
vm, port->rx_node.registrations);
n->format_trace = port->rx_node.format_trace;
vlib_register_errors (vm, rx_node_index,
port->rx_node.n_error_counters, 0,
port->rx_node.error_counters);
}
else
{
dev_class->format_tx_trace = port->tx_node.format_trace;
dev_class->tx_function_error_counters = port->tx_node.error_counters;
dev_class->tx_function_n_errors = port->tx_node.n_error_counters;
vlib_node_registration_t rx_node_reg = {
.sibling_of = "port-rx-eth",
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_DISABLED,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
.node_fn_registrations = port->rx_node.registrations,
.format_trace = port->rx_node.format_trace,
.error_counters = port->rx_node.error_counters,
.n_errors = port->rx_node.n_error_counters,
};
rx_node_index =
vlib_register_node (vm, &rx_node_reg, "%s-rx", port->intf.name);
}
port->rx_node_assigned = 1;
port->intf.rx_node_index = rx_node_index;
port->intf.rx_next_index =
vnet_dev_default_next_index_by_port_type[port->attr.type];
vlib_worker_thread_node_runtime_update ();
log_debug (dev,
"ethernet interface created, hw_if_index %u sw_if_index %u "
"rx_node_index %u tx_node_index %u",
port->intf.hw_if_index, port->intf.sw_if_index,
port->intf.rx_node_index, port->intf.tx_node_index);
}
port->interface_created = 1;
foreach_vnet_dev_port_rx_queue (q, port)
{
vnet_buffer (&q->buffer_template)->sw_if_index[VLIB_RX] =
port->intf.sw_if_index;
/* poison to catch node not calling runtime update function */
q->next_index = ~0;
q->interrupt_mode = port->intf.default_is_intr_mode;
vnet_dev_rx_queue_rt_request (
vm, q, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 });
}
vnet_dev_port_update_tx_node_runtime (vm, port);
if (port->port_ops.init)
rv = port->port_ops.init (vm, port);
error:
if (rv != VNET_DEV_OK)
vnet_dev_port_if_remove (vm, port);
return rv;
}
vnet_dev_rv_t
vnet_dev_port_if_remove (vlib_main_t *vm, vnet_dev_port_t *port)
{
vnet_dev_main_t *dm = &vnet_dev_main;
vnet_main_t *vnm = vnet_get_main ();
vnet_dev_port_validate (vm, port);
if (port->started)
vnet_dev_port_stop (vm, port);
if (port->rx_node_assigned)
{
vlib_node_rename (vm, port->intf.rx_node_index, "deleted-%u",
port->intf.rx_node_index);
vec_add1 (dm->free_rx_node_indices, port->intf.rx_node_index);
port->rx_node_assigned = 0;
}
if (port->interface_created)
{
vlib_worker_thread_barrier_sync (vm);
vnet_delete_hw_interface (vnm, port->intf.hw_if_index);
vlib_worker_thread_barrier_release (vm);
pool_put_index (dm->ports_by_dev_instance, port->intf.dev_instance);
port->interface_created = 0;
}
port->intf = (typeof (port->intf)){};
if (port->port_ops.deinit)
port->port_ops.deinit (vm, port);
foreach_vnet_dev_port_tx_queue (q, port)
vnet_dev_tx_queue_free (vm, q);
foreach_vnet_dev_port_rx_queue (q, port)
vnet_dev_rx_queue_free (vm, q);
vnet_dev_port_free_counters (vm, port);
return VNET_DEV_OK;
}
void
vnet_dev_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port)
{
if (port->counter_main)
vnet_dev_counters_clear (vm, port->counter_main);
foreach_vnet_dev_port_rx_queue (q, port)
if (q->counter_main)
vnet_dev_counters_clear (vm, q->counter_main);
foreach_vnet_dev_port_tx_queue (q, port)
if (q->counter_main)
vnet_dev_counters_clear (vm, q->counter_main);
log_notice (port->dev, "counters cleared on port %u", port->port_id);
}