virtio: integrate with new tx infra Type: improvement Change-Id: I337ec63d0868f665329d68eadf1744e080b73a0d Signed-off-by: Mohsin Kazmi <sykazmi@cisco.com>

commit: b7e4e6d1d24685cec2b1b8c72998661ae237fe94 [log] [tgz]
author: Mohsin Kazmi <sykazmi@cisco.com> Mon Dec 13 18:32:42 2021 +0000
committer: Beno�t Ganne <bganne@cisco.com> Tue Dec 14 13:45:21 2021 +0000
tree: 01149cbf3e751a3215161da9b7945b09b573ac0c
parent: 2e5921b08b167793eacf3085a98344ad00afb5ac [diff]
diff --git a/src/vnet/devices/tap/tap.c b/src/vnet/devices/tap/tap.c
index 4c0b4e0..a6c9cef 100644
--- a/src/vnet/devices/tap/tap.c
+++ b/src/vnet/devices/tap/tap.c

@@ -87,6 +87,8 @@
   clib_error_t *err = 0;
   int i;
 
+  virtio_pre_input_node_disable (vm, vif);
+
   /* *INDENT-OFF* */
   vec_foreach_index (i, vif->vhost_fds) if (vif->vhost_fds[i] != -1)
     close (vif->vhost_fds[i]);
@@ -694,7 +696,9 @@
       vnet_sw_interface_set_mtu (vnm, hw->sw_if_index, hw->max_packet_bytes);
     }
 
+  virtio_pre_input_node_enable (vm, vif);
   virtio_vring_set_rx_queues (vm, vif);
+  virtio_vring_set_tx_queues (vm, vif);
 
   vif->per_interface_next_index = ~0;
   vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;

diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c
index ac9be6b..8eeeb0d 100644
--- a/src/vnet/devices/virtio/device.c
+++ b/src/vnet/devices/virtio/device.c

@@ -800,6 +800,7 @@
 	      vring->avail_wrap_counter ^= 1;
 	    }
 	}
+      virtio_txq_clear_scheduled (vring);
     }
 
   while (n_left && used < sz)
@@ -940,6 +941,7 @@
 	  n_buffers_left--;
 	  free_desc_count -= n_added;
 	}
+      virtio_txq_clear_scheduled (vring);
     }
 
   while (n_left && free_desc_count)
@@ -1031,7 +1033,8 @@
   virtio_main_t *nm = &virtio_main;
   vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
   virtio_if_t *vif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
-  u16 qid = vm->thread_index % vif->num_txqs;
+  vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
+  u16 qid = tf->queue_id;
   virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, qid);
   u16 n_left = frame->n_vectors;
   u32 *buffers = vlib_frame_vector_args (frame);
@@ -1039,13 +1042,15 @@
   int packed = vif->is_packed;
   u16 n_vectors = frame->n_vectors;
 
-  clib_spinlock_lock_if_init (&vring->lockp);
+  if (tf->shared_queue)
+    clib_spinlock_lock (&vring->lockp);
 
   if (vif->packet_coalesce)
     {
       n_vectors = n_left =
 	vnet_gro_inline (vm, vring->flow_table, buffers, n_left, to);
       buffers = to;
+      virtio_txq_clear_scheduled (vring);
     }
 
   u16 retry_count = 2;
@@ -1089,7 +1094,8 @@
 				  &buffers[n_vectors - n_left], n_left,
 				  VIRTIO_TX_ERROR_NO_FREE_SLOTS);
 
-  clib_spinlock_unlock_if_init (&vring->lockp);
+  if (tf->shared_queue)
+    clib_spinlock_unlock (&vring->lockp);
 
   return frame->n_vectors - n_left;
 }
@@ -1142,7 +1148,6 @@
 virtio_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
 				 vnet_hw_if_rx_mode mode)
 {
-  vlib_main_t *vm = vnm->vlib_main;
   virtio_main_t *mm = &virtio_main;
   vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
   virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
@@ -1155,30 +1160,9 @@
     }
 
   if (mode == VNET_HW_IF_RX_MODE_POLLING)
-    {
-      if (vif->packet_coalesce || vif->packet_buffering)
-	{
-	  if (mm->interrupt_queues_count > 0)
-	    mm->interrupt_queues_count--;
-	  if (mm->interrupt_queues_count == 0)
-	    vlib_process_signal_event (vm,
-				       virtio_send_interrupt_node.index,
-				       VIRTIO_EVENT_STOP_TIMER, 0);
-	}
       virtio_set_rx_polling (vif, rx_vring);
-    }
   else
-    {
-      if (vif->packet_coalesce || vif->packet_buffering)
-	{
-	  mm->interrupt_queues_count++;
-	  if (mm->interrupt_queues_count == 1)
-	    vlib_process_signal_event (vm,
-				       virtio_send_interrupt_node.index,
-				       VIRTIO_EVENT_START_TIMER, 0);
-	}
       virtio_set_rx_interrupt (vif, rx_vring);
-    }
 
   rx_vring->mode = mode;
 

diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c
index 9c4ea90..bc9afd9 100644
--- a/src/vnet/devices/virtio/node.c
+++ b/src/vnet/devices/virtio/node.c

@@ -27,7 +27,6 @@
 #include <vlib/unix/unix.h>
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/feature/feature.h>
-#include <vnet/gso/gro_func.h>
 #include <vnet/interface/rx_queue_funcs.h>
 #include <vnet/ip/ip4_packet.h>
 #include <vnet/ip/ip6_packet.h>
@@ -410,21 +409,8 @@
 {
   virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid);
   const int hdr_sz = vif->virtio_net_hdr_sz;
-  u16 txq_id = vm->thread_index % vif->num_txqs;
-  virtio_vring_t *txq_vring = vec_elt_at_index (vif->txq_vrings, txq_id);
   uword rv;
 
-  if (clib_spinlock_trylock_if_init (&txq_vring->lockp))
-    {
-      if (vif->packet_coalesce)
-	vnet_gro_flow_table_schedule_node_on_dispatcher
-	  (vm, txq_vring->flow_table);
-      else if (vif->packet_buffering)
-	virtio_vring_buffering_schedule_node_on_dispatcher
-	  (vm, txq_vring->buffering);
-      clib_spinlock_unlock_if_init (&txq_vring->lockp);
-    }
-
   if (vif->is_packed)
     {
       if (vif->gso_enabled)

diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c
index bebba7b..b053266 100644
--- a/src/vnet/devices/virtio/pci.c
+++ b/src/vnet/devices/virtio/pci.c

@@ -727,7 +727,6 @@
 virtio_pci_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
 			     u16 queue_num)
 {
-  vlib_thread_main_t *vtm = vlib_get_thread_main ();
   clib_error_t *error = 0;
   u16 queue_size = 0;
   virtio_vring_t *vring;
@@ -753,8 +752,7 @@
       vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num),
 			    CLIB_CACHE_LINE_BYTES);
       vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num));
-      if (vif->max_queue_pairs < vtm->n_vlib_mains)
-	clib_spinlock_init (&vring->lockp);
+      clib_spinlock_init (&vring->lockp);
     }
   else
     {
@@ -807,7 +805,6 @@
 virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
 			      u16 queue_num)
 {
-  vlib_thread_main_t *vtm = vlib_get_thread_main ();
   clib_error_t *error = 0;
   u16 queue_size = 0;
   virtio_vring_t *vring;
@@ -827,8 +824,7 @@
       vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num),
 			    CLIB_CACHE_LINE_BYTES);
       vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num));
-      if (vif->max_queue_pairs < vtm->n_vlib_mains)
-	clib_spinlock_init (&vring->lockp);
+      clib_spinlock_init (&vring->lockp);
     }
   else
     {
@@ -1498,7 +1494,9 @@
 	}
     }
 
+  virtio_pre_input_node_enable (vm, vif);
   virtio_vring_set_rx_queues (vm, vif);
+  virtio_vring_set_tx_queues (vm, vif);
 
   if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP)
     {
@@ -1570,6 +1568,8 @@
     vlib_physmem_free (vm, vring->desc);
   }
 
+  virtio_pre_input_node_disable (vm, vif);
+
   vec_foreach_index (i, vif->txq_vrings)
   {
     virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, i);

diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c
index fe808f1..2d1edfe 100644
--- a/src/vnet/devices/virtio/virtio.c
+++ b/src/vnet/devices/virtio/virtio.c

@@ -33,6 +33,7 @@
 #include <vnet/devices/virtio/virtio_inline.h>
 #include <vnet/devices/virtio/pci.h>
 #include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
 
 virtio_main_t virtio_main;
 
@@ -73,12 +74,10 @@
 
   if (idx % 2)
     {
-      vlib_thread_main_t *thm = vlib_get_thread_main ();
       vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (idx),
 			    CLIB_CACHE_LINE_BYTES);
       vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (idx));
-      if (thm->n_vlib_mains > vif->num_txqs)
-	clib_spinlock_init (&vring->lockp);
+      clib_spinlock_init (&vring->lockp);
     }
   else
     {
@@ -284,6 +283,27 @@
   vnet_hw_if_update_runtime_data (vnm, vif->hw_if_index);
 }
 
+void
+virtio_vring_set_tx_queues (vlib_main_t *vm, virtio_if_t *vif)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  virtio_vring_t *vring;
+
+  vec_foreach (vring, vif->txq_vrings)
+    {
+      vring->queue_index = vnet_hw_if_register_tx_queue (
+	vnm, vif->hw_if_index, TX_QUEUE_ACCESS (vring->queue_id));
+    }
+
+  for (u32 j = 0; j < vlib_get_n_threads (); j++)
+    {
+      u32 qi = vif->txq_vrings[j % vif->num_txqs].queue_index;
+      vnet_hw_if_tx_queue_assign_thread (vnm, qi, j);
+    }
+
+  vnet_hw_if_update_runtime_data (vnm, vif->hw_if_index);
+}
+
 inline void
 virtio_set_net_hdr_size (virtio_if_t * vif)
 {

diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h
index 87ecfcb..267d8f9 100644
--- a/src/vnet/devices/virtio/virtio.h
+++ b/src/vnet/devices/virtio/virtio.h

@@ -103,6 +103,7 @@
     };
   };
 #define VRING_TX_OUT_OF_ORDER 1
+#define VRING_TX_SCHEDULED    2
   u16 flags;
   u8 buffer_pool_index;
   vnet_hw_if_rx_mode mode;
@@ -213,7 +214,7 @@
 
 typedef struct
 {
-  u32 interrupt_queues_count;
+  u32 gro_or_buffering_if_count;
   /* logging */
   vlib_log_class_t log_default;
 
@@ -224,7 +225,6 @@
 extern virtio_main_t virtio_main;
 extern vnet_device_class_t virtio_device_class;
 extern vlib_node_registration_t virtio_input_node;
-extern vlib_node_registration_t virtio_send_interrupt_node;
 
 clib_error_t *virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx,
 				 u16 sz);
@@ -233,6 +233,7 @@
 clib_error_t *virtio_vring_free_tx (vlib_main_t * vm, virtio_if_t * vif,
 				    u32 idx);
 void virtio_vring_set_rx_queues (vlib_main_t *vm, virtio_if_t *vif);
+void virtio_vring_set_tx_queues (vlib_main_t *vm, virtio_if_t *vif);
 extern void virtio_free_buffers (vlib_main_t * vm, virtio_vring_t * vring);
 extern void virtio_set_net_hdr_size (virtio_if_t * vif);
 extern void virtio_show (vlib_main_t *vm, u32 *hw_if_indices, u8 show_descr,
@@ -245,6 +246,9 @@
 extern void virtio_pci_modern_notify_queue (vlib_main_t * vm,
 					    virtio_if_t * vif, u16 queue_id,
 					    u16 queue_notify_offset);
+extern void virtio_pre_input_node_enable (vlib_main_t *vm, virtio_if_t *vif);
+extern void virtio_pre_input_node_disable (vlib_main_t *vm, virtio_if_t *vif);
+
 format_function_t format_virtio_device_name;
 format_function_t format_virtio_log_name;
 
@@ -270,6 +274,28 @@
     }
 }
 
+static_always_inline u8
+virtio_txq_is_scheduled (virtio_vring_t *vring)
+{
+  if (vring)
+    return (vring->flags & VRING_TX_SCHEDULED);
+  return 1;
+}
+
+static_always_inline void
+virtio_txq_set_scheduled (virtio_vring_t *vring)
+{
+  if (vring)
+    vring->flags |= VRING_TX_SCHEDULED;
+}
+
+static_always_inline void
+virtio_txq_clear_scheduled (virtio_vring_t *vring)
+{
+  if (vring)
+    vring->flags &= ~VRING_TX_SCHEDULED;
+}
+
 #define virtio_log_debug(vif, f, ...)				\
 {								\
   vlib_log(VLIB_LOG_LEVEL_DEBUG, virtio_main.log_default,	\

diff --git a/src/vnet/devices/virtio/virtio_buffering.h b/src/vnet/devices/virtio/virtio_buffering.h
index ef3d9d2..6f13a1f 100644
--- a/src/vnet/devices/virtio/virtio_buffering.h
+++ b/src/vnet/devices/virtio/virtio_buffering.h

@@ -18,6 +18,8 @@
 #ifndef _VNET_DEVICES_VIRTIO_VIRTIO_BUFFERING_H_
 #define _VNET_DEVICES_VIRTIO_VIRTIO_BUFFERING_H_
 
+#include <vnet/interface.h>
+
 #define VIRTIO_BUFFERING_DEFAULT_SIZE 1024
 #define VIRTIO_BUFFERING_TIMEOUT 1e-5
 
@@ -205,15 +207,18 @@
 }
 
 static_always_inline void
-virtio_vring_buffering_schedule_node_on_dispatcher (vlib_main_t * vm,
-						    virtio_vring_buffering_t *
-						    buffering)
+virtio_vring_buffering_schedule_node_on_dispatcher (
+  vlib_main_t *vm, vnet_hw_if_tx_queue_t *txq,
+  virtio_vring_buffering_t *buffering)
 {
   if (buffering && virtio_vring_buffering_is_timeout (vm, buffering)
       && virtio_vring_n_buffers (buffering))
     {
       vlib_frame_t *f = vlib_get_frame_to_node (vm, buffering->node_index);
+      vnet_hw_if_tx_frame_t *ft = vlib_frame_scalar_args (f);
       u32 *f_to = vlib_frame_vector_args (f);
+      ft->shared_queue = txq->shared_queue;
+      ft->queue_id = txq->queue_id;
       f_to[f->n_vectors] = virtio_vring_buffering_read_from_back (buffering);
       f->n_vectors++;
       vlib_put_frame_to_node (vm, buffering->node_index, f);

diff --git a/src/vnet/devices/virtio/virtio_pre_input.c b/src/vnet/devices/virtio/virtio_pre_input.c
new file mode 100644
index 0000000..ee6e848
--- /dev/null
+++ b/src/vnet/devices/virtio/virtio_pre_input.c

@@ -0,0 +1,159 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/gso/gro_func.h>
+#include <vnet/interface/tx_queue_funcs.h>
+#include <vnet/devices/virtio/virtio.h>
+#include <vnet/devices/virtio/virtio_inline.h>
+
+static_always_inline uword
+virtio_pre_input_inline (vlib_main_t *vm, virtio_vring_t *txq_vring,
+			 vnet_hw_if_tx_queue_t *txq, u8 packet_coalesce,
+			 u8 packet_buffering)
+{
+  if (txq->shared_queue)
+    {
+      if (clib_spinlock_trylock (&txq_vring->lockp))
+	{
+	  if (virtio_txq_is_scheduled (txq_vring))
+	    return 0;
+	  if (packet_coalesce)
+	    vnet_gro_flow_table_schedule_node_on_dispatcher (
+	      vm, txq, txq_vring->flow_table);
+	  else if (packet_buffering)
+	    virtio_vring_buffering_schedule_node_on_dispatcher (
+	      vm, txq, txq_vring->buffering);
+	  virtio_txq_set_scheduled (txq_vring);
+	  clib_spinlock_unlock (&txq_vring->lockp);
+	}
+    }
+  else
+    {
+      if (packet_coalesce)
+	vnet_gro_flow_table_schedule_node_on_dispatcher (
+	  vm, txq, txq_vring->flow_table);
+      else if (packet_buffering)
+	virtio_vring_buffering_schedule_node_on_dispatcher (
+	  vm, txq, txq_vring->buffering);
+    }
+  return 0;
+}
+
+static uword
+virtio_pre_input (vlib_main_t *vm, vlib_node_runtime_t *node,
+		  vlib_frame_t *frame)
+{
+  virtio_main_t *vim = &virtio_main;
+  vnet_main_t *vnm = vnet_get_main ();
+  virtio_if_t *vif;
+
+  pool_foreach (vif, vim->interfaces)
+    {
+      if (vif->packet_coalesce || vif->packet_buffering)
+	{
+	  virtio_vring_t *txq_vring;
+	  vec_foreach (txq_vring, vif->txq_vrings)
+	    {
+	      vnet_hw_if_tx_queue_t *txq =
+		vnet_hw_if_get_tx_queue (vnm, txq_vring->queue_index);
+	      if (clib_bitmap_get (txq->threads, vm->thread_index) == 1)
+		virtio_pre_input_inline (vm, txq_vring, txq,
+					 vif->packet_coalesce,
+					 vif->packet_buffering);
+	    }
+	}
+    }
+
+  return 0;
+}
+
+/**
+ * virtio interfaces support packet coalescing and buffering which
+ * depends on timer expiry to flush the stored packets periodically.
+ * Previously, virtio input node checked timer expiry and scheduled
+ * tx queue accordingly.
+ *
+ * In poll mode, timer expiry was handled naturally, as input node
+ * runs periodically. In interrupt mode, virtio input node was dependent
+ * on the interrupts send from backend. Stored packets could starve,
+ * if there would not be interrupts to input node.
+ *
+ * This problem had been solved through a dedicated process node which
+ * periodically sends interrupt to virtio input node given coalescing
+ * or buffering feature were enabled on an interface.
+ *
+ * But that approach worked with following limitations:
+ * 1) Each VPP thread should have (atleast) 1 rx queue of an interface
+ * (with buffering enabled). And rxqs and txqs should be placed on the
+ * same thread.
+ *
+ * New design provides solution to above problem(s) without any limitation
+ * through (dedicated) pre-input node running on each VPP thread when
+ * atleast 1 virtio interface is enabled with coalescing or buffering.
+ */
+VLIB_REGISTER_NODE (virtio_pre_input_node) = {
+  .function = virtio_pre_input,
+  .type = VLIB_NODE_TYPE_PRE_INPUT,
+  .name = "virtio-pre-input",
+  .state = VLIB_NODE_STATE_DISABLED,
+};
+
+void
+virtio_pre_input_node_enable (vlib_main_t *vm, virtio_if_t *vif)
+{
+  virtio_main_t *vim = &virtio_main;
+  if (vif->packet_coalesce || vif->packet_buffering)
+    {
+      vim->gro_or_buffering_if_count++;
+      if (vim->gro_or_buffering_if_count == 1)
+	{
+	  foreach_vlib_main ()
+	    {
+	      vlib_node_set_state (this_vlib_main, virtio_pre_input_node.index,
+				   VLIB_NODE_STATE_POLLING);
+	    }
+	}
+    }
+}
+
+void
+virtio_pre_input_node_disable (vlib_main_t *vm, virtio_if_t *vif)
+{
+  virtio_main_t *vim = &virtio_main;
+  if (vif->packet_coalesce || vif->packet_buffering)
+    {
+      if (vim->gro_or_buffering_if_count > 0)
+	vim->gro_or_buffering_if_count--;
+      if (vim->gro_or_buffering_if_count == 0)
+	{
+	  foreach_vlib_main ()
+	    {
+	      vlib_node_set_state (this_vlib_main, virtio_pre_input_node.index,
+				   VLIB_NODE_STATE_DISABLED);
+	    }
+	}
+    }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
commit	b7e4e6d1d24685cec2b1b8c72998661ae237fe94	[log] [tgz]
author	Mohsin Kazmi <sykazmi@cisco.com>	Mon Dec 13 18:32:42 2021 +0000
committer	Beno�t Ganne <bganne@cisco.com>	Tue Dec 14 13:45:21 2021 +0000
tree	01149cbf3e751a3215161da9b7945b09b573ac0c
parent	2e5921b08b167793eacf3085a98344ad00afb5ac [diff]