vhost: migrate to use device infra for worker thread assignment, rx-mode.
and add adaptive mode support to receive queue

- Migrate vhost to use device infra which does the interface/queue to worker
thread assignment.

- Retire vhost thread CLI and corresponding code which assigns interface/queue
to worker thread. set interface placement should be used instead to customize
the interface/queue to worker thread assignment.

- Retire vhost interrupt/polling option when creating vhost-user interface.
Instead, set interface rx-mode should be used.

- Add code in vnet_device_input_unassign_thread to change the node state
to interrupt if the last polling interface has left the worker thread for the
device of the corresponding interface/queue.

- Add adaptive mode support. The node state is set to interrupt initially.
When the scheduler detects a burst of traffic, it switches the input node to
polling. Then we inform the device that we don't need interrupt notification.
When the traffic subsides, the scheduler switches the input node back to
interrupt. Then we immediately tell the driver that we want interrupt
notification again.

- Remove some duplicate code in vlib/main.c

Change-Id: Id19bb1b9e50e6521c6464f470f5825c26924d3a8
Signed-off-by: Steven <sluong@cisco.com>
diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c
index e71be60..58c7207 100644
--- a/src/vnet/devices/devices.c
+++ b/src/vnet/devices/devices.c
@@ -150,6 +150,7 @@
   dq->dev_instance = hw->dev_instance;
   dq->queue_id = queue_id;
   dq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING;
+  rt->enabled_node_state = VLIB_NODE_STATE_POLLING;
 
   vnet_device_queue_update (vnm, rt);
   vec_validate (hw->input_node_thread_index_by_queue, queue_id);
@@ -168,6 +169,7 @@
   vnet_device_input_runtime_t *rt;
   vnet_device_and_queue_t *dq;
   uword old_thread_index;
+  vnet_hw_interface_rx_mode mode;
 
   if (hw->input_node_thread_index_by_queue == 0)
     return VNET_API_ERROR_INVALID_INTERFACE;
@@ -184,6 +186,7 @@
   vec_foreach (dq, rt->devices_and_queues)
     if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id)
     {
+      mode = dq->mode;
       vec_del1 (rt->devices_and_queues, dq - rt->devices_and_queues);
       goto deleted;
     }
@@ -197,6 +200,23 @@
 
   if (vec_len (rt->devices_and_queues) == 0)
     vlib_node_set_state (vm, hw->input_node_index, VLIB_NODE_STATE_DISABLED);
+  else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+    {
+      /*
+       * if the deleted interface is polling, we may need to set the node state
+       * to interrupt if there is no more polling interface for this device's
+       * corresponding thread. This is because mixed interfaces
+       * (polling and interrupt), assigned to the same thread, set the
+       * thread to polling prior to the deletion.
+       */
+      vec_foreach (dq, rt->devices_and_queues)
+      {
+	if (dq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+	  return 0;
+      }
+      rt->enabled_node_state = VLIB_NODE_STATE_INTERRUPT;
+      vlib_node_set_state (vm, hw->input_node_index, rt->enabled_node_state);
+    }
 
   return 0;
 }
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
index 451ae43..2318893 100644
--- a/src/vnet/devices/virtio/vhost-user.c
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -362,140 +362,71 @@
     }
 }
 
+/**
+ * @brief Unassign existing interface/queue to thread mappings and re-assign
+ * new interface/queue to thread mappings
+ */
 static void
 vhost_user_rx_thread_placement ()
 {
   vhost_user_main_t *vum = &vhost_user_main;
   vhost_user_intf_t *vui;
-  vhost_cpu_t *vhc;
-  u32 *workers = 0;
-  u32 thread_index;
-  vlib_main_t *vm;
+  vhost_user_vring_t *txvq;
+  vnet_main_t *vnm = vnet_get_main ();
+  u32 qid;
+  int rv;
+  u16 *queue;
 
-  //Let's list all workers cpu indexes
-  u32 i;
-  for (i = vum->input_cpu_first_index;
-       i < vum->input_cpu_first_index + vum->input_cpu_count; i++)
-    {
-      vlib_node_set_state (vlib_mains[i], vhost_user_input_node.index,
-			   VLIB_NODE_STATE_DISABLED);
-      vec_add1 (workers, i);
-    }
-
-  vec_foreach (vhc, vum->cpus)
-  {
-    vec_reset_length (vhc->rx_queues);
-  }
-
-  i = 0;
-  vhost_iface_and_queue_t iaq;
+  // Scrap all existing mappings for all interfaces/queues
   /* *INDENT-OFF* */
   pool_foreach (vui, vum->vhost_user_interfaces, {
-      u32 *vui_workers = vec_len (vui->workers) ? vui->workers : workers;
-      u32 qid;
+      vec_foreach (queue, vui->rx_queues)
+	{
+	  rv = vnet_hw_interface_unassign_rx_thread (vnm, vui->hw_if_index,
+						     *queue);
+	  if (rv)
+	    clib_warning ("Warning: unable to unassign interface %d, "
+			  "queue %d: rc=%d", vui->hw_if_index, *queue, rv);
+	}
+      vec_reset_length (vui->rx_queues);
+  });
+  /* *INDENT-ON* */
+
+  // Create the rx_queues for all interfaces
+  /* *INDENT-OFF* */
+  pool_foreach (vui, vum->vhost_user_interfaces, {
       for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++)
 	{
-	  vhost_user_vring_t *txvq =
-	      &vui->vrings[VHOST_VRING_IDX_TX (qid)];
-	  if (!txvq->started)
-	    continue;
-
-	  i %= vec_len (vui_workers);
-	  thread_index = vui_workers[i];
-	  i++;
-	  vhc = &vum->cpus[thread_index];
-	  txvq->interrupt_thread_index = thread_index;
-
-	  iaq.qid = qid;
-	  iaq.vhost_iface_index = vui - vum->vhost_user_interfaces;
-	  vec_add1 (vhc->rx_queues, iaq);
+	  txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
+	  if (txvq->started)
+	    {
+	      if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_UNKNOWN)
+		/* Set polling as the default */
+		txvq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING;
+	      vec_add1 (vui->rx_queues, qid);
+	    }
 	}
   });
   /* *INDENT-ON* */
 
-  vec_foreach (vhc, vum->cpus)
-  {
-    vhost_iface_and_queue_t *vhiq;
-    u8 mode = VHOST_USER_INTERRUPT_MODE;
-
-    vec_foreach (vhiq, vhc->rx_queues)
-    {
-      vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
-      if (vui->operation_mode == VHOST_USER_POLLING_MODE)
+  // Assign new mappings for all interfaces/queues
+  /* *INDENT-OFF* */
+  pool_foreach (vui, vum->vhost_user_interfaces, {
+      vnet_hw_interface_set_input_node (vnm, vui->hw_if_index,
+					vhost_user_input_node.index);
+      vec_foreach (queue, vui->rx_queues)
 	{
-	  /* At least one interface is polling, cpu is set to polling */
-	  mode = VHOST_USER_POLLING_MODE;
-	  break;
+	  vnet_hw_interface_assign_rx_thread (vnm, vui->hw_if_index, *queue,
+					      ~0);
+	  txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
+	  rv = vnet_hw_interface_set_rx_mode (vnm, vui->hw_if_index, *queue,
+					      txvq->mode);
+	  if (rv)
+	    clib_warning ("Warning: unable to set rx mode for interface %d, "
+			  "queue %d: rc=%d", vui->hw_if_index, *queue, rv);
 	}
-    }
-    vhc->operation_mode = mode;
-  }
-
-  for (thread_index = vum->input_cpu_first_index;
-       thread_index < vum->input_cpu_first_index + vum->input_cpu_count;
-       thread_index++)
-    {
-      vlib_node_state_t state = VLIB_NODE_STATE_POLLING;
-
-      vhc = &vum->cpus[thread_index];
-      vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
-      switch (vhc->operation_mode)
-	{
-	case VHOST_USER_INTERRUPT_MODE:
-	  state = VLIB_NODE_STATE_INTERRUPT;
-	  break;
-	case VHOST_USER_POLLING_MODE:
-	  state = VLIB_NODE_STATE_POLLING;
-	  break;
-	default:
-	  clib_warning ("BUG: bad operation mode %d", vhc->operation_mode);
-	  break;
-	}
-      vlib_node_set_state (vm, vhost_user_input_node.index, state);
-    }
-
-  vec_free (workers);
-}
-
-static int
-vhost_user_thread_placement (u32 sw_if_index, u32 worker_thread_index, u8 del)
-{
-  vhost_user_main_t *vum = &vhost_user_main;
-  vhost_user_intf_t *vui;
-  vnet_hw_interface_t *hw;
-
-  if (worker_thread_index < vum->input_cpu_first_index ||
-      worker_thread_index >=
-      vum->input_cpu_first_index + vum->input_cpu_count)
-    return -1;
-
-  if (!(hw = vnet_get_sup_hw_interface (vnet_get_main (), sw_if_index)))
-    return -2;
-
-  vui = pool_elt_at_index (vum->vhost_user_interfaces, hw->dev_instance);
-  u32 found = ~0, *w;
-  vec_foreach (w, vui->workers)
-  {
-    if (*w == worker_thread_index)
-      {
-	found = w - vui->workers;
-	break;
-      }
-  }
-
-  if (del)
-    {
-      if (found == ~0)
-	return -3;
-      vec_del1 (vui->workers, found);
-    }
-  else if (found == ~0)
-    {
-      vec_add1 (vui->workers, worker_thread_index);
-    }
-
-  vhost_user_rx_thread_placement ();
-  return 0;
+  });
+  /* *INDENT-ON* */
 }
 
 /** @brief Returns whether at least one TX and one RX vring are enabled */
@@ -532,37 +463,17 @@
 static void
 vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq)
 {
-  vhost_user_main_t *vum = &vhost_user_main;
-  vhost_cpu_t *vhc;
-  u32 thread_index;
-  vlib_main_t *vm;
-  u32 ifq2, qid;
-  vhost_user_vring_t *txvq;
+  u32 qid;
+  vnet_main_t *vnm = vnet_get_main ();
 
   qid = ifq & 0xff;
-  if ((qid % 2) == 0)
-    /* Only care about the odd number virtqueue which is TX */
+  if ((qid & 1) == 0)
+    /* Only care about the odd number, or TX, virtqueue */
     return;
 
   if (vhost_user_intf_ready (vui))
-    {
-      txvq = &vui->vrings[qid];
-      thread_index = txvq->interrupt_thread_index;
-      vhc = &vum->cpus[thread_index];
-      if (vhc->operation_mode == VHOST_USER_INTERRUPT_MODE)
-	{
-	  vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
-	  /*
-	   * Convert virtqueue number in the lower byte to vring
-	   * queue index for the input node process. Top bytes contain
-	   * the interface, lower byte contains the queue index.
-	   */
-	  ifq2 = ((ifq >> 8) << 8) | qid / 2;
-	  vhc->pending_input_bitmap =
-	    clib_bitmap_set (vhc->pending_input_bitmap, ifq2, 1);
-	  vlib_node_set_interrupt_pending (vm, vhost_user_input_node.index);
-	}
-    }
+    // qid >> 1 is to convert virtqueue number to vring queue index
+    vnet_device_input_set_interrupt_pending (vnm, vui->hw_if_index, qid >> 1);
 }
 
 static clib_error_t *
@@ -570,14 +481,10 @@
 {
   __attribute__ ((unused)) int n;
   u8 buff[8];
-  vhost_user_intf_t *vui =
-    pool_elt_at_index (vhost_user_main.vhost_user_interfaces,
-		       uf->private_data >> 8);
 
   n = read (uf->file_descriptor, ((char *) &buff), 8);
   DBG_SOCK ("if %d CALL queue %d", uf->private_data >> 8,
 	    uf->private_data & 0xff);
-  vhost_user_set_interrupt_pending (vui, uf->private_data);
 
   return 0;
 }
@@ -1001,12 +908,8 @@
 	vui->vrings[msg.state.index].last_avail_idx =
 	vui->vrings[msg.state.index].used->idx;
 
-      if (vui->operation_mode == VHOST_USER_POLLING_MODE)
-	/* tell driver that we don't want interrupts */
-	vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
-      else
-	/* tell driver that we want interrupts */
-	vui->vrings[msg.state.index].used->flags = 0;
+      /* tell driver that we don't want interrupts */
+      vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
       break;
 
     case VHOST_USER_SET_OWNER:
@@ -1315,8 +1218,6 @@
   clib_error_t *error;
   vhost_user_main_t *vum = &vhost_user_main;
   vlib_thread_main_t *tm = vlib_get_thread_main ();
-  vlib_thread_registration_t *tr;
-  uword *p;
 
   error = vlib_call_init_function (vm, ip4_init);
   if (error)
@@ -1335,18 +1236,6 @@
     cpu->rx_buffers_len = 0;
   }
 
-  /* find out which cpus will be used for input */
-  vum->input_cpu_first_index = 0;
-  vum->input_cpu_count = 1;
-  p = hash_get_mem (tm->thread_registrations_by_name, "workers");
-  tr = p ? (vlib_thread_registration_t *) p[0] : 0;
-
-  if (tr && tr->count > 0)
-    {
-      vum->input_cpu_first_index = tr->first_index;
-      vum->input_cpu_count = tr->count;
-    }
-
   vum->random = random_default_seed ();
 
   mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword));
@@ -1447,9 +1336,16 @@
   vhost_user_main_t *vum = &vhost_user_main;
   u64 x = 1;
   int fd = UNIX_GET_FD (vq->callfd_idx);
-  int rv __attribute__ ((unused));
-  /* TODO: pay attention to rv */
+  int rv;
+
   rv = write (fd, &x, sizeof (x));
+  if (rv <= 0)
+    {
+      clib_unix_warning
+	("Error: Could not write to unix socket for callfd %d", fd);
+      return;
+    }
+
   vq->n_since_last_int = 0;
   vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
 }
@@ -1564,7 +1460,8 @@
 vhost_user_if_input (vlib_main_t * vm,
 		     vhost_user_main_t * vum,
 		     vhost_user_intf_t * vui,
-		     u16 qid, vlib_node_runtime_t * node)
+		     u16 qid, vlib_node_runtime_t * node,
+		     vnet_hw_interface_rx_mode mode)
 {
   vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
   u16 n_rx_packets = 0;
@@ -1590,6 +1487,26 @@
       vhost_user_send_call (vm, rxvq);
   }
 
+  /*
+   * For adaptive mode, it is optimized to reduce interrupts.
+   * If the scheduler switches the input node to polling due
+   * to burst of traffic, we tell the driver no interrupt.
+   * When the traffic subsides, the scheduler switches the node back to
+   * interrupt mode. We must tell the driver we want interrupt.
+   */
+  if (PREDICT_FALSE (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
+    {
+      if ((node->flags &
+	   VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
+	  !(node->flags &
+	    VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
+	/* Tell driver we want notification */
+	txvq->used->flags = 0;
+      else
+	/* Tell driver we don't want notification */
+	txvq->used->flags = VRING_USED_F_NO_NOTIFY;
+    }
+
   if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE))
     return 0;
 
@@ -1926,34 +1843,23 @@
 {
   vhost_user_main_t *vum = &vhost_user_main;
   uword n_rx_packets = 0;
-  u32 thread_index = vlib_get_thread_index ();
-  vhost_iface_and_queue_t *vhiq;
   vhost_user_intf_t *vui;
-  vhost_cpu_t *vhc;
+  vnet_device_input_runtime_t *rt =
+    (vnet_device_input_runtime_t *) node->runtime_data;
+  vnet_device_and_queue_t *dq;
 
-  vhc = &vum->cpus[thread_index];
-  if (PREDICT_TRUE (vhc->operation_mode == VHOST_USER_POLLING_MODE))
-    {
-      vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
+  vec_foreach (dq, rt->devices_and_queues)
+  {
+    if (clib_smp_swap (&dq->interrupt_pending, 0) ||
+	(node->state == VLIB_NODE_STATE_POLLING))
       {
-	vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
-	n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node);
+	vui =
+	  pool_elt_at_index (vum->vhost_user_interfaces, dq->dev_instance);
+	n_rx_packets = vhost_user_if_input (vm, vum, vui, dq->queue_id, node,
+					    dq->mode);
       }
-    }
-  else
-    {
-      int i;
+  }
 
-      /* *INDENT-OFF* */
-      clib_bitmap_foreach (i, vhc->pending_input_bitmap, ({
-	int qid = i & 0xff;
-
-	clib_bitmap_set (vhc->pending_input_bitmap, i, 0);
-	vui = pool_elt_at_index (vum->vhost_user_interfaces, i >> 8);
-	n_rx_packets += vhost_user_if_input (vm, vum, vui, qid, node);
-      }));
-      /* *INDENT-ON* */
-    }
   return n_rx_packets;
 }
 
@@ -2371,6 +2277,161 @@
   return frame->n_vectors;
 }
 
+static uword
+vhost_user_send_interrupt_process (vlib_main_t * vm,
+				   vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+  vhost_user_intf_t *vui;
+  f64 timeout = 3153600000.0 /* 100 years */ ;
+  uword event_type, *event_data = 0;
+  vhost_user_main_t *vum = &vhost_user_main;
+  u16 *queue;
+  f64 now, poll_time_remaining;
+  f64 next_timeout;
+  u8 stop_timer = 0;
+
+  while (1)
+    {
+      poll_time_remaining =
+	vlib_process_wait_for_event_or_clock (vm, timeout);
+      event_type = vlib_process_get_events (vm, &event_data);
+      vec_reset_length (event_data);
+
+      /*
+       * Use the remaining timeout if it is less than coalesce time to avoid
+       * resetting the existing timer in the middle of expiration
+       */
+      timeout = poll_time_remaining;
+      if (vlib_process_suspend_time_is_zero (timeout) ||
+	  (timeout > vum->coalesce_time))
+	timeout = vum->coalesce_time;
+
+      now = vlib_time_now (vm);
+      switch (event_type)
+	{
+	case VHOST_USER_EVENT_STOP_TIMER:
+	  stop_timer = 1;
+	  break;
+
+	case VHOST_USER_EVENT_START_TIMER:
+	  stop_timer = 0;
+	  if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
+	    break;
+	  /* fall through */
+
+	case ~0:
+	  /* *INDENT-OFF* */
+	  pool_foreach (vui, vum->vhost_user_interfaces, {
+	      next_timeout = timeout;
+	      vec_foreach (queue, vui->rx_queues)
+		{
+		  vhost_user_vring_t *rxvq =
+		    &vui->vrings[VHOST_VRING_IDX_RX (*queue)];
+		  vhost_user_vring_t *txvq =
+		    &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
+
+		  if (txvq->n_since_last_int)
+		    {
+		      if (now >= txvq->int_deadline)
+			vhost_user_send_call (vm, txvq);
+		      else
+			next_timeout = txvq->int_deadline - now;
+		    }
+
+		  if (rxvq->n_since_last_int)
+		    {
+		      if (now >= rxvq->int_deadline)
+			vhost_user_send_call (vm, rxvq);
+		      else
+			next_timeout = rxvq->int_deadline - now;
+		    }
+
+		  if ((next_timeout < timeout) && (next_timeout > 0.0))
+		    timeout = next_timeout;
+		}
+	  });
+          /* *INDENT-ON* */
+	  break;
+
+	default:
+	  clib_warning ("BUG: unhandled event type %d", event_type);
+	  break;
+	}
+      /* No less than 1 millisecond */
+      if (timeout < 1e-3)
+	timeout = 1e-3;
+      if (stop_timer)
+	timeout = 3153600000.0;
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = {
+    .function = vhost_user_send_interrupt_process,
+    .type = VLIB_NODE_TYPE_PROCESS,
+    .name = "vhost-user-send-interrupt-process",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
+				     u32 qid, vnet_hw_interface_rx_mode mode)
+{
+  vlib_main_t *vm = vnm->vlib_main;
+  vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
+  vhost_user_main_t *vum = &vhost_user_main;
+  vhost_user_intf_t *vui =
+    pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
+  vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
+
+  if ((mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
+      (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
+    {
+      if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+	{
+	  vum->ifq_count++;
+	  // Start the timer if this is the first encounter on interrupt
+	  // interface/queue
+	  if ((vum->ifq_count == 1) &&
+	      (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
+	    vlib_process_signal_event (vm,
+				       vhost_user_send_interrupt_node.index,
+				       VHOST_USER_EVENT_START_TIMER, 0);
+	}
+    }
+  else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+    {
+      if (((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
+	   (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) &&
+	  vum->ifq_count)
+	{
+	  vum->ifq_count--;
+	  // Stop the timer if there is no more interrupt interface/queue
+	  if ((vum->ifq_count == 0) &&
+	      (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
+	    vlib_process_signal_event (vm,
+				       vhost_user_send_interrupt_node.index,
+				       VHOST_USER_EVENT_STOP_TIMER, 0);
+	}
+    }
+
+  txvq->mode = mode;
+  if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+    txvq->used->flags = VRING_USED_F_NO_NOTIFY;
+  else if ((mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE) ||
+	   (mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT))
+    txvq->used->flags = 0;
+  else
+    {
+      clib_warning ("BUG: unhandled mode %d changed for if %d queue %d", mode,
+		    hw_if_index, qid);
+      return clib_error_return (0, "unsupported");
+    }
+
+  return 0;
+}
+
 static clib_error_t *
 vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
 				    u32 flags)
@@ -2399,6 +2460,7 @@
   .format_device_name = format_vhost_user_interface_name,
   .name_renumber = vhost_user_name_renumber,
   .admin_up_down_function = vhost_user_interface_admin_up_down,
+  .rx_mode_change_function = vhost_user_interface_rx_mode_change,
   .format_tx_trace = format_vhost_trace,
 };
 
@@ -2523,8 +2585,6 @@
   int q;
   vhost_user_main_t *vum = &vhost_user_main;
 
-  // Delete configured thread pinning
-  vec_reset_length (vui->workers);
   // disconnect interface sockets
   vhost_user_if_disconnect (vui);
   vhost_user_update_iface_state (vui);
@@ -2555,6 +2615,7 @@
   vhost_user_intf_t *vui;
   int rv = 0;
   vnet_hw_interface_t *hwif;
+  u16 *queue;
 
   if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
       hwif->dev_class_index != vhost_user_dev_class.index)
@@ -2565,6 +2626,28 @@
 
   vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
 
+  vec_foreach (queue, vui->rx_queues)
+  {
+    vhost_user_vring_t *txvq;
+
+    txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
+    if ((vum->ifq_count > 0) &&
+	((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
+	 (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)))
+      {
+	vum->ifq_count--;
+	// Stop the timer if there is no more interrupt interface/queue
+	if ((vum->ifq_count == 0) &&
+	    (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
+	  {
+	    vlib_process_signal_event (vm,
+				       vhost_user_send_interrupt_node.index,
+				       VHOST_USER_EVENT_STOP_TIMER, 0);
+	    break;
+	  }
+      }
+  }
+
   // Disable and reset interface
   vhost_user_term_if (vui);
 
@@ -2687,13 +2770,15 @@
 		     vhost_user_intf_t * vui,
 		     int server_sock_fd,
 		     const char *sock_filename,
-		     u64 feature_mask, u32 * sw_if_index, u8 operation_mode)
+		     u64 feature_mask, u32 * sw_if_index)
 {
   vnet_sw_interface_t *sw;
-  sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
   int q;
   vhost_user_main_t *vum = &vhost_user_main;
+  vnet_hw_interface_t *hw;
 
+  hw = vnet_get_hw_interface (vnm, vui->hw_if_index);
+  sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
   if (server_sock_fd != -1)
     {
       unix_file_t template = { 0 };
@@ -2715,7 +2800,6 @@
   vui->feature_mask = feature_mask;
   vui->unix_file_index = ~0;
   vui->log_base_addr = 0;
-  vui->operation_mode = operation_mode;
   vui->if_index = vui - vum->vhost_user_interfaces;
   mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename,
 		 &vui->if_index, 0);
@@ -2723,6 +2807,7 @@
   for (q = 0; q < VHOST_VRING_MAX_N; q++)
     vhost_user_vring_init (vui, q);
 
+  hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
   vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
 
   if (sw_if_index)
@@ -2740,106 +2825,13 @@
   vhost_user_tx_thread_placement (vui);
 }
 
-static uword
-vhost_user_send_interrupt_process (vlib_main_t * vm,
-				   vlib_node_runtime_t * rt, vlib_frame_t * f)
-{
-  vhost_user_intf_t *vui;
-  f64 timeout = 3153600000.0 /* 100 years */ ;
-  uword event_type, *event_data = 0;
-  vhost_user_main_t *vum = &vhost_user_main;
-  vhost_iface_and_queue_t *vhiq;
-  vhost_cpu_t *vhc;
-  f64 now, poll_time_remaining;
-
-  while (1)
-    {
-      poll_time_remaining =
-	vlib_process_wait_for_event_or_clock (vm, timeout);
-      event_type = vlib_process_get_events (vm, &event_data);
-      vec_reset_length (event_data);
-
-      /*
-       * Use the remaining timeout if it is less than coalesce time to avoid
-       * resetting the existing timer in the middle of expiration
-       */
-      timeout = poll_time_remaining;
-      if (vlib_process_suspend_time_is_zero (timeout) ||
-	  (timeout > vum->coalesce_time))
-	timeout = vum->coalesce_time;
-
-      now = vlib_time_now (vm);
-      switch (event_type)
-	{
-	case VHOST_USER_EVENT_START_TIMER:
-	  if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
-	    break;
-	  /* fall through */
-
-	case ~0:
-	  vec_foreach (vhc, vum->cpus)
-	  {
-	    u32 thread_index = vhc - vum->cpus;
-	    f64 next_timeout;
-
-	    next_timeout = timeout;
-	    vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
-	    {
-	      vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
-	      vhost_user_vring_t *rxvq =
-		&vui->vrings[VHOST_VRING_IDX_RX (vhiq->qid)];
-	      vhost_user_vring_t *txvq =
-		&vui->vrings[VHOST_VRING_IDX_TX (vhiq->qid)];
-
-	      if (txvq->n_since_last_int)
-		{
-		  if (now >= txvq->int_deadline)
-		    vhost_user_send_call (vm, txvq);
-		  else
-		    next_timeout = txvq->int_deadline - now;
-		}
-
-	      if (rxvq->n_since_last_int)
-		{
-		  if (now >= rxvq->int_deadline)
-		    vhost_user_send_call (vm, rxvq);
-		  else
-		    next_timeout = rxvq->int_deadline - now;
-		}
-
-	      if ((next_timeout < timeout) && (next_timeout > 0.0))
-		timeout = next_timeout;
-	    }
-	  }
-	  break;
-
-	default:
-	  clib_warning ("BUG: unhandled event type %d", event_type);
-	  break;
-	}
-      /* No less than 1 millisecond */
-      if (timeout < 1e-3)
-	timeout = 1e-3;
-    }
-  return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = {
-    .function = vhost_user_send_interrupt_process,
-    .type = VLIB_NODE_TYPE_PROCESS,
-    .name = "vhost-user-send-interrupt-process",
-};
-/* *INDENT-ON* */
-
 int
 vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
 		      const char *sock_filename,
 		      u8 is_server,
 		      u32 * sw_if_index,
 		      u64 feature_mask,
-		      u8 renumber, u32 custom_dev_instance, u8 * hwaddr,
-		      u8 operation_mode)
+		      u8 renumber, u32 custom_dev_instance, u8 * hwaddr)
 {
   vhost_user_intf_t *vui = NULL;
   u32 sw_if_idx = ~0;
@@ -2848,10 +2840,6 @@
   vhost_user_main_t *vum = &vhost_user_main;
   uword *if_index;
 
-  if ((operation_mode != VHOST_USER_POLLING_MODE) &&
-      (operation_mode != VHOST_USER_INTERRUPT_MODE))
-    return VNET_API_ERROR_UNIMPLEMENTED;
-
   if (sock_filename == NULL || !(strlen (sock_filename) > 0))
     {
       return VNET_API_ERROR_INVALID_ARGUMENT;
@@ -2881,7 +2869,7 @@
 
   vhost_user_create_ethernet (vnm, vm, vui, hwaddr);
   vhost_user_vui_init (vnm, vui, server_sock_fd, sock_filename,
-		       feature_mask, &sw_if_idx, operation_mode);
+		       feature_mask, &sw_if_idx);
 
   if (renumber)
     vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
@@ -2892,14 +2880,6 @@
   // Process node must connect
   vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
 
-  if ((operation_mode == VHOST_USER_INTERRUPT_MODE) &&
-      !vum->interrupt_mode && (vum->coalesce_time > 0.0) &&
-      (vum->coalesce_frames > 0))
-    {
-      vum->interrupt_mode = 1;
-      vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index,
-				 VHOST_USER_EVENT_START_TIMER, 0);
-    }
   return rv;
 }
 
@@ -2908,8 +2888,7 @@
 		      const char *sock_filename,
 		      u8 is_server,
 		      u32 sw_if_index,
-		      u64 feature_mask, u8 renumber, u32 custom_dev_instance,
-		      u8 operation_mode)
+		      u64 feature_mask, u8 renumber, u32 custom_dev_instance)
 {
   vhost_user_main_t *vum = &vhost_user_main;
   vhost_user_intf_t *vui = NULL;
@@ -2919,9 +2898,6 @@
   vnet_hw_interface_t *hwif;
   uword *if_index;
 
-  if ((operation_mode != VHOST_USER_POLLING_MODE) &&
-      (operation_mode != VHOST_USER_INTERRUPT_MODE))
-    return VNET_API_ERROR_UNIMPLEMENTED;
   if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
       hwif->dev_class_index != vhost_user_dev_class.index)
     return VNET_API_ERROR_INVALID_SW_IF_INDEX;
@@ -2947,8 +2923,7 @@
 
   vhost_user_term_if (vui);
   vhost_user_vui_init (vnm, vui, server_sock_fd,
-		       sock_filename, feature_mask, &sw_if_idx,
-		       operation_mode);
+		       sock_filename, feature_mask, &sw_if_idx);
 
   if (renumber)
     vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
@@ -2956,33 +2931,9 @@
   // Process node must connect
   vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
 
-  if ((operation_mode == VHOST_USER_INTERRUPT_MODE) &&
-      !vum->interrupt_mode && (vum->coalesce_time > 0.0) &&
-      (vum->coalesce_frames > 0))
-    {
-      vum->interrupt_mode = 1;
-      vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index,
-				 VHOST_USER_EVENT_START_TIMER, 0);
-    }
   return rv;
 }
 
-static uword
-unformat_vhost_user_operation_mode (unformat_input_t * input, va_list * args)
-{
-  u8 *operation_mode = va_arg (*args, u8 *);
-  uword rc = 1;
-
-  if (unformat (input, "interrupt"))
-    *operation_mode = VHOST_USER_INTERRUPT_MODE;
-  else if (unformat (input, "polling"))
-    *operation_mode = VHOST_USER_POLLING_MODE;
-  else
-    rc = 0;
-
-  return rc;
-}
-
 clib_error_t *
 vhost_user_connect_command_fn (vlib_main_t * vm,
 			       unformat_input_t * input,
@@ -2998,7 +2949,6 @@
   u8 hwaddr[6];
   u8 *hw = NULL;
   clib_error_t *error = NULL;
-  u8 operation_mode = VHOST_USER_POLLING_MODE;
 
   /* Get a line of input. */
   if (!unformat_user (input, unformat_line_input, line_input))
@@ -3020,9 +2970,6 @@
 	{
 	  renumber = 1;
 	}
-      else if (unformat (line_input, "mode %U",
-			 unformat_vhost_user_operation_mode, &operation_mode))
-	;
       else
 	{
 	  error = clib_error_return (0, "unknown input `%U'",
@@ -3036,8 +2983,7 @@
   int rv;
   if ((rv = vhost_user_create_if (vnm, vm, (char *) sock_filename,
 				  is_server, &sw_if_index, feature_mask,
-				  renumber, custom_dev_instance, hw,
-				  operation_mode)))
+				  renumber, custom_dev_instance, hw)))
     {
       error = clib_error_return (0, "vhost_user_create_if returned %d", rv);
       goto done;
@@ -3127,7 +3073,6 @@
       vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
 
       vec_add2 (r_vuids, vuid, 1);
-      vuid->operation_mode = vui->operation_mode;
       vuid->sw_if_index = vui->sw_if_index;
       vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz;
       vuid->features = vui->features;
@@ -3152,25 +3097,6 @@
   return rv;
 }
 
-static u8 *
-format_vhost_user_operation_mode (u8 * s, va_list * va)
-{
-  int operation_mode = va_arg (*va, int);
-
-  switch (operation_mode)
-    {
-    case VHOST_USER_POLLING_MODE:
-      s = format (s, "%s", "polling");
-      break;
-    case VHOST_USER_INTERRUPT_MODE:
-      s = format (s, "%s", "interrupt");
-      break;
-    default:
-      s = format (s, "%s", "invalid");
-    }
-  return s;
-}
-
 clib_error_t *
 show_vhost_user_command_fn (vlib_main_t * vm,
 			    unformat_input_t * input,
@@ -3182,10 +3108,8 @@
   vhost_user_intf_t *vui;
   u32 hw_if_index, *hw_if_indices = 0;
   vnet_hw_interface_t *hi;
-  vhost_cpu_t *vhc;
-  vhost_iface_and_queue_t *vhiq;
+  u16 *queue;
   u32 ci;
-
   int i, j, q;
   int show_descr = 0;
   struct feat_struct
@@ -3238,6 +3162,8 @@
   vlib_cli_output (vm, "Virtio vhost-user interfaces");
   vlib_cli_output (vm, "Global:\n  coalesce frames %d time %e",
 		   vum->coalesce_frames, vum->coalesce_time);
+  vlib_cli_output (vm, "  number of rx virtqueues in interrupt mode: %d",
+		   vum->ifq_count);
 
   for (i = 0; i < vec_len (hw_if_indices); i++)
     {
@@ -3279,23 +3205,21 @@
 		       (vui->unix_server_index != ~0) ? "server" : "client",
 		       strerror (vui->sock_errno));
 
-      vlib_cli_output (vm, " configured mode: %U\n",
-		       format_vhost_user_operation_mode, vui->operation_mode);
       vlib_cli_output (vm, " rx placement: ");
-      vec_foreach (vhc, vum->cpus)
+
+      vec_foreach (queue, vui->rx_queues)
       {
-	vec_foreach (vhiq, vhc->rx_queues)
-	{
-	  if (vhiq->vhost_iface_index == vui - vum->vhost_user_interfaces)
-	    {
-	      vlib_cli_output (vm, "   thread %d on vring %d\n",
-			       vhc - vum->cpus,
-			       VHOST_VRING_IDX_TX (vhiq->qid));
-	      vlib_cli_output (vm, "   mode: %U\n",
-			       format_vhost_user_operation_mode,
-			       vhc->operation_mode);
-	    }
-	}
+	vnet_main_t *vnm = vnet_get_main ();
+	uword thread_index;
+	vnet_hw_interface_rx_mode mode;
+
+	thread_index = vnet_get_device_input_thread_index (vnm,
+							   vui->hw_if_index,
+							   *queue);
+	vnet_hw_interface_get_rx_mode (vnm, vui->hw_if_index, *queue, &mode);
+	vlib_cli_output (vm, "   thread %d on vring %d, %U\n",
+			 thread_index, VHOST_VRING_IDX_TX (*queue),
+			 format_vnet_hw_interface_rx_mode, mode);
       }
 
       vlib_cli_output (vm, " tx placement: %s\n",
@@ -3444,8 +3368,7 @@
 VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
     .path = "create vhost-user",
     .short_help = "create vhost-user socket <socket-filename> [server] "
-    "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] "
-    "[mode {interrupt | polling}]",
+    "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] ",
     .function = vhost_user_connect_command_fn,
 };
 /* *INDENT-ON* */
@@ -3648,69 +3571,6 @@
     }
 }
 
-static clib_error_t *
-vhost_thread_command_fn (vlib_main_t * vm,
-			 unformat_input_t * input, vlib_cli_command_t * cmd)
-{
-  unformat_input_t _line_input, *line_input = &_line_input;
-  u32 worker_thread_index;
-  u32 sw_if_index;
-  u8 del = 0;
-  int rv;
-  clib_error_t *error = NULL;
-
-  /* Get a line of input. */
-  if (!unformat_user (input, unformat_line_input, line_input))
-    return 0;
-
-  if (!unformat
-      (line_input, "%U %d", unformat_vnet_sw_interface, vnet_get_main (),
-       &sw_if_index, &worker_thread_index))
-    {
-      error = clib_error_return (0, "unknown input `%U'",
-				 format_unformat_error, line_input);
-      goto done;
-    }
-
-  if (unformat (line_input, "del"))
-    del = 1;
-
-  if ((rv =
-       vhost_user_thread_placement (sw_if_index, worker_thread_index, del)))
-    {
-      error = clib_error_return (0, "vhost_user_thread_placement returned %d",
-				 rv);
-      goto done;
-    }
-
-done:
-  unformat_free (line_input);
-
-  return error;
-}
-
-
-/*?
- * This command is used to move the RX processing for the given
- * interfaces to the provided thread. If the '<em>del</em>' option is used,
- * the forced thread assignment is removed and the thread assigment is
- * reassigned automatically. Use '<em>show vhost-user <interface></em>'
- * to see the thread assignment.
- *
- * @cliexpar
- * Example of how to move the RX processing for a given interface to a given thread:
- * @cliexcmd{vhost thread VirtualEthernet0/0/0 1}
- * Example of how to remove the forced thread assignment for a given interface:
- * @cliexcmd{vhost thread VirtualEthernet0/0/0 1 del}
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (vhost_user_thread_command, static) = {
-    .path = "vhost thread",
-    .short_help = "vhost thread <iface> <worker-index> [del]",
-    .function = vhost_thread_command_fn,
-};
-/* *INDENT-ON* */
-
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h
index 56b6547..ceced34 100644
--- a/src/vnet/devices/virtio/vhost-user.h
+++ b/src/vnet/devices/virtio/vhost-user.h
@@ -66,13 +66,11 @@
 int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
 			  const char *sock_filename, u8 is_server,
 			  u32 * sw_if_index, u64 feature_mask,
-			  u8 renumber, u32 custom_dev_instance, u8 * hwaddr,
-			  u8 operation_mode);
+			  u8 renumber, u32 custom_dev_instance, u8 * hwaddr);
 int vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
 			  const char *sock_filename, u8 is_server,
 			  u32 sw_if_index, u64 feature_mask,
-			  u8 renumber, u32 custom_dev_instance,
-			  u8 operation_mode);
+			  u8 renumber, u32 custom_dev_instance);
 int vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm,
 			  u32 sw_if_index);
 
@@ -210,14 +208,13 @@
   u32 callfd_idx;
   u32 kickfd_idx;
   u64 log_guest_addr;
-  u32 interrupt_thread_index;
+
+  /* The rx queue policy (interrupt/adaptive/polling) for this queue */
+  u32 mode;
 } vhost_user_vring_t;
 
-#define VHOST_USER_POLLING_MODE   0
-#define VHOST_USER_INTERRUPT_MODE 1
-#define VHOST_USER_ADAPTIVE_MODE  2
-
 #define VHOST_USER_EVENT_START_TIMER 1
+#define VHOST_USER_EVENT_STOP_TIMER  2
 
 typedef struct
 {
@@ -258,20 +255,12 @@
   u8 use_tx_spinlock;
   u16 *per_cpu_tx_qid;
 
-  /* Vector of workers for this interface */
-  u32 *workers;
-
-  u8 operation_mode;
+  /* Vector of active rx queues for this interface */
+  u16 *rx_queues;
 } vhost_user_intf_t;
 
 typedef struct
 {
-  u16 vhost_iface_index;
-  u16 qid;
-} vhost_iface_and_queue_t;
-
-typedef struct
-{
   uword dst;
   uword src;
   u32 len;
@@ -292,7 +281,6 @@
 
 typedef struct
 {
-  vhost_iface_and_queue_t *rx_queues;
   u32 rx_buffers_len;
   u32 rx_buffers[VHOST_USER_RX_BUFFERS_N];
 
@@ -302,12 +290,6 @@
   /* This is here so it doesn't end-up
    * using stack or registers. */
   vhost_trace_t *current_trace;
-
-  /* bitmap of pending rx interfaces */
-  uword *pending_input_bitmap;
-
-  /* The operation mode computed per cpu based on interface setting */
-  u8 operation_mode;
 } vhost_cpu_t;
 
 typedef struct
@@ -320,20 +302,14 @@
   f64 coalesce_time;
   int dont_dump_vhost_user_memory;
 
-  /** first cpu index */
-  u32 input_cpu_first_index;
-
-  /** total cpu count */
-  u32 input_cpu_count;
-
   /** Per-CPU data for vhost-user */
   vhost_cpu_t *cpus;
 
   /** Pseudo random iterator */
   u32 random;
 
-  /* Node is in interrupt mode */
-  u8 interrupt_mode;
+  /* The number of rx interface/queue pairs in interrupt mode */
+  u32 ifq_count;
 } vhost_user_main_t;
 
 typedef struct
@@ -346,7 +322,6 @@
   u8 sock_filename[256];
   u32 num_regions;
   int sock_errno;
-  u8 operation_mode;
 } vhost_user_intf_details_t;
 
 int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c
index ac7afa6..8dbd032 100644
--- a/src/vnet/devices/virtio/vhost_user_api.c
+++ b/src/vnet/devices/virtio/vhost_user_api.c
@@ -81,8 +81,7 @@
   rv = vhost_user_create_if (vnm, vm, (char *) mp->sock_filename,
 			     mp->is_server, &sw_if_index, (u64) ~ 0,
 			     mp->renumber, ntohl (mp->custom_dev_instance),
-			     (mp->use_custom_mac) ? mp->mac_address : NULL,
-			     mp->operation_mode);
+			     (mp->use_custom_mac) ? mp->mac_address : NULL);
 
   /* Remember an interface tag for the new interface */
   if (rv == 0)
@@ -117,8 +116,7 @@
 
   rv = vhost_user_modify_if (vnm, vm, (char *) mp->sock_filename,
 			     mp->is_server, sw_if_index, (u64) ~ 0,
-			     mp->renumber, ntohl (mp->custom_dev_instance),
-			     mp->operation_mode);
+			     mp->renumber, ntohl (mp->custom_dev_instance));
 
   REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_REPLY);
 }
@@ -164,7 +162,6 @@
   mp->virtio_net_hdr_sz = ntohl (vui->virtio_net_hdr_sz);
   mp->features = clib_net_to_host_u64 (vui->features);
   mp->is_server = vui->is_server;
-  mp->operation_mode = vui->operation_mode;
   mp->num_regions = ntohl (vui->num_regions);
   mp->sock_errno = ntohl (vui->sock_errno);
   mp->context = context;