vnet: add device-input threadplacement infra
This change adds two new debug CLI command:
- "show interface placmenet" to display which
thread (main or worker) is responsible for processing
interface rx queue
vpp# show interface placement
Thread 0 (vpp_main):
node af-packet-input:
host-vpp1 queue 0
Thread 1 (vpp_wk_0):
node af-packet-input:
host-virbr0 queue 0
Thread 2 (vpp_wk_1):
node af-packet-input:
host-vpp2 queue 0
host-lxcbr0 queue 0
- "set interface placmenet" to assign thread (main or worker)
which process specific interface rx queue
vpp# set interface placement host-vpp1 queue 0 main
Change-Id: Id4dd00cf2b05e10fae2125ac7cb4411b446c5e9c
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 40789f5..ef3a24d 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -685,9 +685,6 @@
clib_memcpy (rt->runtime_data, n->runtime_data,
clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
n->runtime_data_bytes));
- else if (CLIB_DEBUG > 0)
- memset (rt->runtime_data, 0xfe,
- VLIB_NODE_RUNTIME_DATA_SIZE);
}
nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
@@ -701,9 +698,6 @@
clib_memcpy (rt->runtime_data, n->runtime_data,
clib_min (VLIB_NODE_RUNTIME_DATA_SIZE,
n->runtime_data_bytes));
- else if (CLIB_DEBUG > 0)
- memset (rt->runtime_data, 0xfe,
- VLIB_NODE_RUNTIME_DATA_SIZE);
}
nm_clone->processes = vec_dup (nm->processes);
@@ -1405,15 +1399,15 @@
clib_time_init (&vm->clib_time);
clib_mem_set_heap (w->thread_mheap);
+ /* Wait until the dpdk init sequence is complete */
+ while (tm->extern_thread_mgmt && tm->worker_thread_release == 0)
+ vlib_worker_thread_barrier_check ();
+
e = vlib_call_init_exit_functions
(vm, vm->worker_init_function_registrations, 1 /* call_once */ );
if (e)
clib_error_report (e);
- /* Wait until the dpdk init sequence is complete */
- while (tm->extern_thread_mgmt && tm->worker_thread_release == 0)
- vlib_worker_thread_barrier_check ();
-
vlib_worker_loop (vm);
}
diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c
index e491ba4..5fdc59f 100644
--- a/src/vnet/devices/af_packet/af_packet.c
+++ b/src/vnet/devices/af_packet/af_packet.c
@@ -67,15 +67,16 @@
static clib_error_t *
af_packet_fd_read_ready (unix_file_t * uf)
{
- vlib_main_t *vm = vlib_get_main ();
af_packet_main_t *apm = &af_packet_main;
+ vnet_main_t *vnm = vnet_get_main ();
u32 idx = uf->private_data;
+ af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, idx);
apm->pending_input_bitmap =
clib_bitmap_set (apm->pending_input_bitmap, idx, 1);
/* Schedule the rx node */
- vlib_node_set_interrupt_pending (vm, af_packet_input_node.index);
+ vnet_device_input_set_interrupt_pending (vnm, apif->hw_if_index, 0);
return 0;
}
@@ -171,31 +172,6 @@
return ret;
}
-static void
-af_packet_worker_thread_enable ()
-{
- /* If worker threads are enabled, switch to polling mode */
- foreach_vlib_main ((
- {
- vlib_node_set_state (this_vlib_main,
- af_packet_input_node.index,
- VLIB_NODE_STATE_POLLING);
- }));
-
-}
-
-static void
-af_packet_worker_thread_disable ()
-{
- foreach_vlib_main ((
- {
- vlib_node_set_state (this_vlib_main,
- af_packet_input_node.index,
- VLIB_NODE_STATE_INTERRUPT);
- }));
-
-}
-
int
af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set,
u32 * sw_if_index)
@@ -298,6 +274,9 @@
sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index);
apif->sw_if_index = sw->sw_if_index;
+ vnet_set_device_input_node (apif->hw_if_index, af_packet_input_node.index);
+ vnet_device_input_assign_thread (apif->hw_if_index, 0, /* queue */
+ ~0 /* any cpu */ );
vnet_hw_interface_set_flags (vnm, apif->hw_if_index,
VNET_HW_INTERFACE_FLAG_LINK_UP);
@@ -307,9 +286,6 @@
if (sw_if_index)
*sw_if_index = apif->sw_if_index;
- if (tm->n_vlib_mains > 1 && pool_elts (apm->interfaces) == 1)
- af_packet_worker_thread_enable ();
-
return 0;
error:
@@ -323,7 +299,6 @@
af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name)
{
vnet_main_t *vnm = vnet_get_main ();
- vlib_thread_main_t *tm = vlib_get_thread_main ();
af_packet_main_t *apm = &af_packet_main;
af_packet_if_t *apif;
uword *p;
@@ -373,8 +348,6 @@
ethernet_delete_interface (vnm, apif->hw_if_index);
pool_put (apm->interfaces, apif);
- if (tm->n_vlib_mains > 1 && pool_elts (apm->interfaces) == 0)
- af_packet_worker_thread_disable ();
return 0;
}
@@ -384,24 +357,9 @@
{
af_packet_main_t *apm = &af_packet_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
- vlib_thread_registration_t *tr;
- uword *p;
memset (apm, 0, sizeof (af_packet_main_t));
- apm->input_cpu_first_index = 0;
- apm->input_cpu_count = 1;
-
- /* find out which cpus will be used for input */
- p = hash_get_mem (tm->thread_registrations_by_name, "workers");
- tr = p ? (vlib_thread_registration_t *) p[0] : 0;
-
- if (tr && tr->count > 0)
- {
- apm->input_cpu_first_index = tr->first_index;
- apm->input_cpu_count = tr->count;
- }
-
mhash_init_vec_string (&apm->if_index_by_host_if_name, sizeof (uword));
vec_validate_aligned (apm->rx_buffers, tm->n_vlib_mains - 1,
diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h
index e00e5cb..50ec237 100644
--- a/src/vnet/devices/af_packet/af_packet.h
+++ b/src/vnet/devices/af_packet/af_packet.h
@@ -51,12 +51,6 @@
/* hash of host interface names */
mhash_t if_index_by_host_if_name;
-
- /* first cpu index */
- u32 input_cpu_first_index;
-
- /* total cpu count */
- u32 input_cpu_count;
} af_packet_main_t;
af_packet_main_t af_packet_main;
diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c
index ab7fd80..ba337f3 100644
--- a/src/vnet/devices/af_packet/node.c
+++ b/src/vnet/devices/af_packet/node.c
@@ -246,20 +246,18 @@
af_packet_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- int i;
u32 n_rx_packets = 0;
- u32 cpu_index = os_get_cpu_number ();
af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif;
+ vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
+ vnet_device_and_queue_t *dq;
- for (i = 0; i < vec_len (apm->interfaces); i++)
- {
- apif = vec_elt_at_index (apm->interfaces, i);
- if (apif->is_admin_up &&
- (i % apm->input_cpu_count) ==
- (cpu_index - apm->input_cpu_first_index))
- n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif);
- }
+ vec_foreach (dq, rt->devices_and_queues)
+ {
+ af_packet_if_t *apif;
+ apif = vec_elt_at_index (apm->interfaces, dq->dev_instance);
+ if (apif->is_admin_up)
+ n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif);
+ }
return n_rx_packets;
}
@@ -271,9 +269,6 @@
.sibling_of = "device-input",
.format_trace = format_af_packet_input_trace,
.type = VLIB_NODE_TYPE_INPUT,
- /**
- * default state is INTERRUPT mode, switch to POLLING if worker threads are enabled
- */
.state = VLIB_NODE_STATE_INTERRUPT,
.n_errors = AF_PACKET_INPUT_N_ERROR,
.error_strings = af_packet_input_error_strings,
diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c
index 38f3002..4164522 100644
--- a/src/vnet/devices/devices.c
+++ b/src/vnet/devices/devices.c
@@ -32,6 +32,7 @@
VLIB_REGISTER_NODE (device_input_node) = {
.function = device_input_fn,
.name = "device-input",
+ .runtime_data_bytes = sizeof (vnet_device_input_runtime_t),
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_DISABLED,
.n_next_nodes = VNET_DEVICE_INPUT_N_NEXT_NODES,
@@ -83,18 +84,257 @@
};
/* *INDENT-ON* */
+static int
+vnet_device_queue_sort (void *a1, void *a2)
+{
+ vnet_device_and_queue_t *dq1 = a1;
+ vnet_device_and_queue_t *dq2 = a2;
+
+ if (dq1->dev_instance > dq2->dev_instance)
+ return 1;
+ else if (dq1->dev_instance < dq2->dev_instance)
+ return -1;
+ else if (dq1->queue_id > dq2->queue_id)
+ return 1;
+ else if (dq1->queue_id < dq2->queue_id)
+ return -1;
+ else
+ return 0;
+}
+
+void
+vnet_device_input_assign_thread (u32 hw_if_index,
+ u16 queue_id, uword cpu_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_device_main_t *vdm = &vnet_device_main;
+ vlib_main_t *vm;
+ vnet_device_input_runtime_t *rt;
+ vnet_device_and_queue_t *dq;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+
+ ASSERT (hw->input_node_index > 0);
+
+ if (vdm->first_worker_cpu_index == 0)
+ cpu_index = 0;
+
+ if (cpu_index != 0 &&
+ (cpu_index < vdm->first_worker_cpu_index ||
+ cpu_index > vdm->last_worker_cpu_index))
+ {
+ cpu_index = vdm->next_worker_cpu_index++;
+ if (vdm->next_worker_cpu_index > vdm->last_worker_cpu_index)
+ vdm->next_worker_cpu_index = vdm->first_worker_cpu_index;
+ }
+
+ vm = vlib_mains[cpu_index];
+ rt = vlib_node_get_runtime_data (vm, hw->input_node_index);
+
+ vec_add2 (rt->devices_and_queues, dq, 1);
+ dq->hw_if_index = hw_if_index;
+ dq->dev_instance = hw->dev_instance;
+ dq->queue_id = queue_id;
+
+ vec_sort_with_function (rt->devices_and_queues, vnet_device_queue_sort);
+ vec_validate (hw->input_node_cpu_index_by_queue, queue_id);
+ hw->input_node_cpu_index_by_queue[queue_id] = cpu_index;
+}
+
+static int
+vnet_device_input_unassign_thread (u32 hw_if_index, u16 queue_id,
+ uword cpu_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_device_input_runtime_t *rt;
+ vnet_device_and_queue_t *dq;
+ uword old_cpu_index;
+
+ if (hw->input_node_cpu_index_by_queue == 0)
+ return VNET_API_ERROR_INVALID_INTERFACE;
+
+ if (vec_len (hw->input_node_cpu_index_by_queue) < queue_id + 1)
+ return VNET_API_ERROR_INVALID_INTERFACE;
+
+ old_cpu_index = hw->input_node_cpu_index_by_queue[queue_id];
+
+ if (old_cpu_index == cpu_index)
+ return 0;
+
+ rt =
+ vlib_node_get_runtime_data (vlib_mains[old_cpu_index],
+ hw->input_node_index);
+
+ vec_foreach (dq, rt->devices_and_queues)
+ if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id)
+ {
+ vec_del1 (rt->devices_and_queues, dq - rt->devices_and_queues);
+ goto deleted;
+ }
+
+ return VNET_API_ERROR_INVALID_INTERFACE;
+
+deleted:
+ vec_sort_with_function (rt->devices_and_queues, vnet_device_queue_sort);
+
+ return 0;
+}
+
+static clib_error_t *
+show_device_placement_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u8 *s = 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_device_input_runtime_t *rt;
+ vnet_device_and_queue_t *dq;
+ vlib_node_t *pn = vlib_get_node_by_name (vm, (u8 *) "device-input");
+ uword si;
+ int index = 0;
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main (({
+ clib_bitmap_foreach (si, pn->sibling_bitmap,
+ ({
+ rt = vlib_node_get_runtime_data (this_vlib_main, si);
+
+ if (vec_len (rt->devices_and_queues))
+ s = format (s, " node %U:\n", format_vlib_node_name, vm, si);
+
+ vec_foreach (dq, rt->devices_and_queues)
+ {
+ s = format (s, " %U queue %u\n",
+ format_vnet_sw_if_index_name, vnm, dq->hw_if_index,
+ dq->queue_id);
+ }
+ }));
+ if (vec_len (s) > 0)
+ {
+ vlib_cli_output(vm, "Thread %u (%v):\n%v", index,
+ vlib_worker_threads[index].name, s);
+ vec_reset_length (s);
+ }
+ index++;
+ }));
+ /* *INDENT-ON* */
+
+ vec_free (s);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (memif_delete_command, static) = {
+ .path = "show interface placement",
+ .short_help = "show interface placement",
+ .function = show_device_placement_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_device_placement (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_device_main_t *vdm = &vnet_device_main;
+ u32 hw_if_index = (u32) ~ 0;
+ u32 queue_id = (u32) 0;
+ u32 cpu_index = (u32) ~ 0;
+ int rv;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (line_input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
+ ;
+ else if (unformat (line_input, "queue %d", &queue_id))
+ ;
+ else if (unformat (line_input, "main", &cpu_index))
+ cpu_index = 0;
+ else if (unformat (line_input, "worker %d", &cpu_index))
+ cpu_index += vdm->first_worker_cpu_index;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ unformat_free (line_input);
+ return error;
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (hw_if_index == (u32) ~ 0)
+ return clib_error_return (0, "please specify valid interface name");
+
+ if (cpu_index > vdm->last_worker_cpu_index)
+ return clib_error_return (0,
+ "please specify valid worker thread or main");
+
+ rv = vnet_device_input_unassign_thread (hw_if_index, queue_id, cpu_index);
+
+ if (rv)
+ return clib_error_return (0, "not found");
+
+ vnet_device_input_assign_thread (hw_if_index, queue_id, cpu_index);
+
+ return 0;
+}
+
+/*?
+ * This command is used to assign a given interface, and optionally a
+ * given queue, to a different thread. If the '<em>queue</em>' is not provided,
+ * it defaults to 0.
+ *
+ * @cliexpar
+ * Example of how to display the interface placement:
+ * @cliexstart{show interface placement}
+ * Thread 1 (vpp_wk_0):
+ * GigabitEthernet0/8/0 queue 0
+ * GigabitEthernet0/9/0 queue 0
+ * Thread 2 (vpp_wk_1):
+ * GigabitEthernet0/8/0 queue 1
+ * GigabitEthernet0/9/0 queue 1
+ * @cliexend
+ * Example of how to assign a interface and queue to a thread:
+ * @cliexcmd{set interface placement GigabitEthernet0/8/0 queue 1 thread 1}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_set_dpdk_if_placement,static) = {
+ .path = "set interface placement",
+ .short_help = "set interface placement <interface> [queue <n>] [thread <n> | main]",
+ .function = set_device_placement,
+};
+/* *INDENT-ON* */
+
static clib_error_t *
vnet_device_init (vlib_main_t * vm)
{
vnet_device_main_t *vdm = &vnet_device_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_thread_registration_t *tr;
+ uword *p;
vec_validate_aligned (vdm->workers, tm->n_vlib_mains - 1,
CLIB_CACHE_LINE_BYTES);
+
+ p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+ tr = p ? (vlib_thread_registration_t *) p[0] : 0;
+ if (tr && tr->count > 0)
+ {
+ vdm->first_worker_cpu_index = tr->first_index;
+ vdm->next_worker_cpu_index = tr->first_index;
+ vdm->last_worker_cpu_index = tr->first_index + tr->count - 1;
+ }
return 0;
}
VLIB_INIT_FUNCTION (vnet_device_init);
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h
index a5cbc35..bbb29fe 100644
--- a/src/vnet/devices/devices.h
+++ b/src/vnet/devices/devices.h
@@ -50,12 +50,38 @@
typedef struct
{
vnet_device_per_worker_data_t *workers;
+ uword first_worker_cpu_index;
+ uword last_worker_cpu_index;
+ uword next_worker_cpu_index;
} vnet_device_main_t;
+typedef struct
+{
+ u32 hw_if_index;
+ u32 dev_instance;
+ u16 queue_id;
+} vnet_device_and_queue_t;
+
+typedef struct
+{
+ vnet_device_and_queue_t *devices_and_queues;
+} vnet_device_input_runtime_t;
+
extern vnet_device_main_t vnet_device_main;
extern vlib_node_registration_t device_input_node;
extern const u32 device_input_next_node_advance[];
+static inline void
+vnet_set_device_input_node (u32 hw_if_index, u32 node_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ hw->input_node_index = node_index;
+}
+
+void vnet_device_input_assign_thread (u32 hw_if_index, u16 queue_id,
+ uword cpu_index);
+
static inline u64
vnet_get_aggregate_rx_packets (void)
{
@@ -78,6 +104,25 @@
pwd->aggregate_rx_packets += count;
}
+static_always_inline vnet_device_and_queue_t *
+vnet_get_device_and_queue (vlib_main_t * vm, vlib_node_runtime_t * node)
+{
+ vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
+ return rt->devices_and_queues;
+}
+
+static_always_inline void
+vnet_device_input_set_interrupt_pending (vnet_main_t * vnm, u32 hw_if_index,
+ u16 queue_id)
+{
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+
+ ASSERT (queue_id < vec_len (hw->input_node_cpu_index_by_queue));
+ u32 cpu_index = hw->input_node_cpu_index_by_queue[queue_id];
+ vlib_node_set_interrupt_pending (vlib_mains[cpu_index],
+ hw->input_node_index);
+}
+
#endif /* included_vnet_vnet_device_h */
/*
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
index ef8f911..a1ea2d6 100644
--- a/src/vnet/interface.h
+++ b/src/vnet/interface.h
@@ -464,6 +464,12 @@
#define VNET_HW_INTERFACE_BOND_INFO_NONE ((uword *) 0)
#define VNET_HW_INTERFACE_BOND_INFO_SLAVE ((uword *) ~0)
+ /* Input node */
+ u32 input_node_index;
+
+ /* input node cpu index by queue */
+ u32 *input_node_cpu_index_by_queue;
+
} vnet_hw_interface_t;
extern vnet_device_class_t vnet_local_interface_device_class;