virtio: integrate with new tx infra
Type: improvement
Change-Id: I337ec63d0868f665329d68eadf1744e080b73a0d
Signed-off-by: Mohsin Kazmi <sykazmi@cisco.com>
diff --git a/src/vnet/devices/tap/tap.c b/src/vnet/devices/tap/tap.c
index 4c0b4e0..a6c9cef 100644
--- a/src/vnet/devices/tap/tap.c
+++ b/src/vnet/devices/tap/tap.c
@@ -87,6 +87,8 @@
clib_error_t *err = 0;
int i;
+ virtio_pre_input_node_disable (vm, vif);
+
/* *INDENT-OFF* */
vec_foreach_index (i, vif->vhost_fds) if (vif->vhost_fds[i] != -1)
close (vif->vhost_fds[i]);
@@ -694,7 +696,9 @@
vnet_sw_interface_set_mtu (vnm, hw->sw_if_index, hw->max_packet_bytes);
}
+ virtio_pre_input_node_enable (vm, vif);
virtio_vring_set_rx_queues (vm, vif);
+ virtio_vring_set_tx_queues (vm, vif);
vif->per_interface_next_index = ~0;
vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c
index ac9be6b..8eeeb0d 100644
--- a/src/vnet/devices/virtio/device.c
+++ b/src/vnet/devices/virtio/device.c
@@ -800,6 +800,7 @@
vring->avail_wrap_counter ^= 1;
}
}
+ virtio_txq_clear_scheduled (vring);
}
while (n_left && used < sz)
@@ -940,6 +941,7 @@
n_buffers_left--;
free_desc_count -= n_added;
}
+ virtio_txq_clear_scheduled (vring);
}
while (n_left && free_desc_count)
@@ -1031,7 +1033,8 @@
virtio_main_t *nm = &virtio_main;
vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
virtio_if_t *vif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
- u16 qid = vm->thread_index % vif->num_txqs;
+ vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
+ u16 qid = tf->queue_id;
virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, qid);
u16 n_left = frame->n_vectors;
u32 *buffers = vlib_frame_vector_args (frame);
@@ -1039,13 +1042,15 @@
int packed = vif->is_packed;
u16 n_vectors = frame->n_vectors;
- clib_spinlock_lock_if_init (&vring->lockp);
+ if (tf->shared_queue)
+ clib_spinlock_lock (&vring->lockp);
if (vif->packet_coalesce)
{
n_vectors = n_left =
vnet_gro_inline (vm, vring->flow_table, buffers, n_left, to);
buffers = to;
+ virtio_txq_clear_scheduled (vring);
}
u16 retry_count = 2;
@@ -1089,7 +1094,8 @@
&buffers[n_vectors - n_left], n_left,
VIRTIO_TX_ERROR_NO_FREE_SLOTS);
- clib_spinlock_unlock_if_init (&vring->lockp);
+ if (tf->shared_queue)
+ clib_spinlock_unlock (&vring->lockp);
return frame->n_vectors - n_left;
}
@@ -1142,7 +1148,6 @@
virtio_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
vnet_hw_if_rx_mode mode)
{
- vlib_main_t *vm = vnm->vlib_main;
virtio_main_t *mm = &virtio_main;
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
@@ -1155,30 +1160,9 @@
}
if (mode == VNET_HW_IF_RX_MODE_POLLING)
- {
- if (vif->packet_coalesce || vif->packet_buffering)
- {
- if (mm->interrupt_queues_count > 0)
- mm->interrupt_queues_count--;
- if (mm->interrupt_queues_count == 0)
- vlib_process_signal_event (vm,
- virtio_send_interrupt_node.index,
- VIRTIO_EVENT_STOP_TIMER, 0);
- }
virtio_set_rx_polling (vif, rx_vring);
- }
else
- {
- if (vif->packet_coalesce || vif->packet_buffering)
- {
- mm->interrupt_queues_count++;
- if (mm->interrupt_queues_count == 1)
- vlib_process_signal_event (vm,
- virtio_send_interrupt_node.index,
- VIRTIO_EVENT_START_TIMER, 0);
- }
virtio_set_rx_interrupt (vif, rx_vring);
- }
rx_vring->mode = mode;
diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c
index 9c4ea90..bc9afd9 100644
--- a/src/vnet/devices/virtio/node.c
+++ b/src/vnet/devices/virtio/node.c
@@ -27,7 +27,6 @@
#include <vlib/unix/unix.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/feature/feature.h>
-#include <vnet/gso/gro_func.h>
#include <vnet/interface/rx_queue_funcs.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
@@ -410,21 +409,8 @@
{
virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid);
const int hdr_sz = vif->virtio_net_hdr_sz;
- u16 txq_id = vm->thread_index % vif->num_txqs;
- virtio_vring_t *txq_vring = vec_elt_at_index (vif->txq_vrings, txq_id);
uword rv;
- if (clib_spinlock_trylock_if_init (&txq_vring->lockp))
- {
- if (vif->packet_coalesce)
- vnet_gro_flow_table_schedule_node_on_dispatcher
- (vm, txq_vring->flow_table);
- else if (vif->packet_buffering)
- virtio_vring_buffering_schedule_node_on_dispatcher
- (vm, txq_vring->buffering);
- clib_spinlock_unlock_if_init (&txq_vring->lockp);
- }
-
if (vif->is_packed)
{
if (vif->gso_enabled)
diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c
index bebba7b..b053266 100644
--- a/src/vnet/devices/virtio/pci.c
+++ b/src/vnet/devices/virtio/pci.c
@@ -727,7 +727,6 @@
virtio_pci_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
u16 queue_num)
{
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
clib_error_t *error = 0;
u16 queue_size = 0;
virtio_vring_t *vring;
@@ -753,8 +752,7 @@
vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num),
CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num));
- if (vif->max_queue_pairs < vtm->n_vlib_mains)
- clib_spinlock_init (&vring->lockp);
+ clib_spinlock_init (&vring->lockp);
}
else
{
@@ -807,7 +805,6 @@
virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
u16 queue_num)
{
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
clib_error_t *error = 0;
u16 queue_size = 0;
virtio_vring_t *vring;
@@ -827,8 +824,7 @@
vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num),
CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num));
- if (vif->max_queue_pairs < vtm->n_vlib_mains)
- clib_spinlock_init (&vring->lockp);
+ clib_spinlock_init (&vring->lockp);
}
else
{
@@ -1498,7 +1494,9 @@
}
}
+ virtio_pre_input_node_enable (vm, vif);
virtio_vring_set_rx_queues (vm, vif);
+ virtio_vring_set_tx_queues (vm, vif);
if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP)
{
@@ -1570,6 +1568,8 @@
vlib_physmem_free (vm, vring->desc);
}
+ virtio_pre_input_node_disable (vm, vif);
+
vec_foreach_index (i, vif->txq_vrings)
{
virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, i);
diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c
index fe808f1..2d1edfe 100644
--- a/src/vnet/devices/virtio/virtio.c
+++ b/src/vnet/devices/virtio/virtio.c
@@ -33,6 +33,7 @@
#include <vnet/devices/virtio/virtio_inline.h>
#include <vnet/devices/virtio/pci.h>
#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
virtio_main_t virtio_main;
@@ -73,12 +74,10 @@
if (idx % 2)
{
- vlib_thread_main_t *thm = vlib_get_thread_main ();
vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (idx),
CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (idx));
- if (thm->n_vlib_mains > vif->num_txqs)
- clib_spinlock_init (&vring->lockp);
+ clib_spinlock_init (&vring->lockp);
}
else
{
@@ -284,6 +283,27 @@
vnet_hw_if_update_runtime_data (vnm, vif->hw_if_index);
}
+void
+virtio_vring_set_tx_queues (vlib_main_t *vm, virtio_if_t *vif)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ virtio_vring_t *vring;
+
+ vec_foreach (vring, vif->txq_vrings)
+ {
+ vring->queue_index = vnet_hw_if_register_tx_queue (
+ vnm, vif->hw_if_index, TX_QUEUE_ACCESS (vring->queue_id));
+ }
+
+ for (u32 j = 0; j < vlib_get_n_threads (); j++)
+ {
+ u32 qi = vif->txq_vrings[j % vif->num_txqs].queue_index;
+ vnet_hw_if_tx_queue_assign_thread (vnm, qi, j);
+ }
+
+ vnet_hw_if_update_runtime_data (vnm, vif->hw_if_index);
+}
+
inline void
virtio_set_net_hdr_size (virtio_if_t * vif)
{
diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h
index 87ecfcb..267d8f9 100644
--- a/src/vnet/devices/virtio/virtio.h
+++ b/src/vnet/devices/virtio/virtio.h
@@ -103,6 +103,7 @@
};
};
#define VRING_TX_OUT_OF_ORDER 1
+#define VRING_TX_SCHEDULED 2
u16 flags;
u8 buffer_pool_index;
vnet_hw_if_rx_mode mode;
@@ -213,7 +214,7 @@
typedef struct
{
- u32 interrupt_queues_count;
+ u32 gro_or_buffering_if_count;
/* logging */
vlib_log_class_t log_default;
@@ -224,7 +225,6 @@
extern virtio_main_t virtio_main;
extern vnet_device_class_t virtio_device_class;
extern vlib_node_registration_t virtio_input_node;
-extern vlib_node_registration_t virtio_send_interrupt_node;
clib_error_t *virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx,
u16 sz);
@@ -233,6 +233,7 @@
clib_error_t *virtio_vring_free_tx (vlib_main_t * vm, virtio_if_t * vif,
u32 idx);
void virtio_vring_set_rx_queues (vlib_main_t *vm, virtio_if_t *vif);
+void virtio_vring_set_tx_queues (vlib_main_t *vm, virtio_if_t *vif);
extern void virtio_free_buffers (vlib_main_t * vm, virtio_vring_t * vring);
extern void virtio_set_net_hdr_size (virtio_if_t * vif);
extern void virtio_show (vlib_main_t *vm, u32 *hw_if_indices, u8 show_descr,
@@ -245,6 +246,9 @@
extern void virtio_pci_modern_notify_queue (vlib_main_t * vm,
virtio_if_t * vif, u16 queue_id,
u16 queue_notify_offset);
+extern void virtio_pre_input_node_enable (vlib_main_t *vm, virtio_if_t *vif);
+extern void virtio_pre_input_node_disable (vlib_main_t *vm, virtio_if_t *vif);
+
format_function_t format_virtio_device_name;
format_function_t format_virtio_log_name;
@@ -270,6 +274,28 @@
}
}
+static_always_inline u8
+virtio_txq_is_scheduled (virtio_vring_t *vring)
+{
+ if (vring)
+ return (vring->flags & VRING_TX_SCHEDULED);
+ return 1;
+}
+
+static_always_inline void
+virtio_txq_set_scheduled (virtio_vring_t *vring)
+{
+ if (vring)
+ vring->flags |= VRING_TX_SCHEDULED;
+}
+
+static_always_inline void
+virtio_txq_clear_scheduled (virtio_vring_t *vring)
+{
+ if (vring)
+ vring->flags &= ~VRING_TX_SCHEDULED;
+}
+
#define virtio_log_debug(vif, f, ...) \
{ \
vlib_log(VLIB_LOG_LEVEL_DEBUG, virtio_main.log_default, \
diff --git a/src/vnet/devices/virtio/virtio_buffering.h b/src/vnet/devices/virtio/virtio_buffering.h
index ef3d9d2..6f13a1f 100644
--- a/src/vnet/devices/virtio/virtio_buffering.h
+++ b/src/vnet/devices/virtio/virtio_buffering.h
@@ -18,6 +18,8 @@
#ifndef _VNET_DEVICES_VIRTIO_VIRTIO_BUFFERING_H_
#define _VNET_DEVICES_VIRTIO_VIRTIO_BUFFERING_H_
+#include <vnet/interface.h>
+
#define VIRTIO_BUFFERING_DEFAULT_SIZE 1024
#define VIRTIO_BUFFERING_TIMEOUT 1e-5
@@ -205,15 +207,18 @@
}
static_always_inline void
-virtio_vring_buffering_schedule_node_on_dispatcher (vlib_main_t * vm,
- virtio_vring_buffering_t *
- buffering)
+virtio_vring_buffering_schedule_node_on_dispatcher (
+ vlib_main_t *vm, vnet_hw_if_tx_queue_t *txq,
+ virtio_vring_buffering_t *buffering)
{
if (buffering && virtio_vring_buffering_is_timeout (vm, buffering)
&& virtio_vring_n_buffers (buffering))
{
vlib_frame_t *f = vlib_get_frame_to_node (vm, buffering->node_index);
+ vnet_hw_if_tx_frame_t *ft = vlib_frame_scalar_args (f);
u32 *f_to = vlib_frame_vector_args (f);
+ ft->shared_queue = txq->shared_queue;
+ ft->queue_id = txq->queue_id;
f_to[f->n_vectors] = virtio_vring_buffering_read_from_back (buffering);
f->n_vectors++;
vlib_put_frame_to_node (vm, buffering->node_index, f);
diff --git a/src/vnet/devices/virtio/virtio_pre_input.c b/src/vnet/devices/virtio/virtio_pre_input.c
new file mode 100644
index 0000000..ee6e848
--- /dev/null
+++ b/src/vnet/devices/virtio/virtio_pre_input.c
@@ -0,0 +1,159 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/gso/gro_func.h>
+#include <vnet/interface/tx_queue_funcs.h>
+#include <vnet/devices/virtio/virtio.h>
+#include <vnet/devices/virtio/virtio_inline.h>
+
+static_always_inline uword
+virtio_pre_input_inline (vlib_main_t *vm, virtio_vring_t *txq_vring,
+ vnet_hw_if_tx_queue_t *txq, u8 packet_coalesce,
+ u8 packet_buffering)
+{
+ if (txq->shared_queue)
+ {
+ if (clib_spinlock_trylock (&txq_vring->lockp))
+ {
+ if (virtio_txq_is_scheduled (txq_vring))
+ return 0;
+ if (packet_coalesce)
+ vnet_gro_flow_table_schedule_node_on_dispatcher (
+ vm, txq, txq_vring->flow_table);
+ else if (packet_buffering)
+ virtio_vring_buffering_schedule_node_on_dispatcher (
+ vm, txq, txq_vring->buffering);
+ virtio_txq_set_scheduled (txq_vring);
+ clib_spinlock_unlock (&txq_vring->lockp);
+ }
+ }
+ else
+ {
+ if (packet_coalesce)
+ vnet_gro_flow_table_schedule_node_on_dispatcher (
+ vm, txq, txq_vring->flow_table);
+ else if (packet_buffering)
+ virtio_vring_buffering_schedule_node_on_dispatcher (
+ vm, txq, txq_vring->buffering);
+ }
+ return 0;
+}
+
+static uword
+virtio_pre_input (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ virtio_main_t *vim = &virtio_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ virtio_if_t *vif;
+
+ pool_foreach (vif, vim->interfaces)
+ {
+ if (vif->packet_coalesce || vif->packet_buffering)
+ {
+ virtio_vring_t *txq_vring;
+ vec_foreach (txq_vring, vif->txq_vrings)
+ {
+ vnet_hw_if_tx_queue_t *txq =
+ vnet_hw_if_get_tx_queue (vnm, txq_vring->queue_index);
+ if (clib_bitmap_get (txq->threads, vm->thread_index) == 1)
+ virtio_pre_input_inline (vm, txq_vring, txq,
+ vif->packet_coalesce,
+ vif->packet_buffering);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * virtio interfaces support packet coalescing and buffering which
+ * depends on timer expiry to flush the stored packets periodically.
+ * Previously, virtio input node checked timer expiry and scheduled
+ * tx queue accordingly.
+ *
+ * In poll mode, timer expiry was handled naturally, as input node
+ * runs periodically. In interrupt mode, virtio input node was dependent
+ * on the interrupts send from backend. Stored packets could starve,
+ * if there would not be interrupts to input node.
+ *
+ * This problem had been solved through a dedicated process node which
+ * periodically sends interrupt to virtio input node given coalescing
+ * or buffering feature were enabled on an interface.
+ *
+ * But that approach worked with following limitations:
+ * 1) Each VPP thread should have (atleast) 1 rx queue of an interface
+ * (with buffering enabled). And rxqs and txqs should be placed on the
+ * same thread.
+ *
+ * New design provides solution to above problem(s) without any limitation
+ * through (dedicated) pre-input node running on each VPP thread when
+ * atleast 1 virtio interface is enabled with coalescing or buffering.
+ */
+VLIB_REGISTER_NODE (virtio_pre_input_node) = {
+ .function = virtio_pre_input,
+ .type = VLIB_NODE_TYPE_PRE_INPUT,
+ .name = "virtio-pre-input",
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+
+void
+virtio_pre_input_node_enable (vlib_main_t *vm, virtio_if_t *vif)
+{
+ virtio_main_t *vim = &virtio_main;
+ if (vif->packet_coalesce || vif->packet_buffering)
+ {
+ vim->gro_or_buffering_if_count++;
+ if (vim->gro_or_buffering_if_count == 1)
+ {
+ foreach_vlib_main ()
+ {
+ vlib_node_set_state (this_vlib_main, virtio_pre_input_node.index,
+ VLIB_NODE_STATE_POLLING);
+ }
+ }
+ }
+}
+
+void
+virtio_pre_input_node_disable (vlib_main_t *vm, virtio_if_t *vif)
+{
+ virtio_main_t *vim = &virtio_main;
+ if (vif->packet_coalesce || vif->packet_buffering)
+ {
+ if (vim->gro_or_buffering_if_count > 0)
+ vim->gro_or_buffering_if_count--;
+ if (vim->gro_or_buffering_if_count == 0)
+ {
+ foreach_vlib_main ()
+ {
+ vlib_node_set_state (this_vlib_main, virtio_pre_input_node.index,
+ VLIB_NODE_STATE_DISABLED);
+ }
+ }
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */