virtio: enable msix interrupt mode

Change-Id: Idd560f3afde1dd03bc3d6fbb2070096146865f50
Signed-off-by: Mohsin Kazmi <sykazmi@cisco.com>
diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c
index 78d6907..aa6a342 100644
--- a/src/vnet/devices/virtio/device.c
+++ b/src/vnet/devices/virtio/device.c
@@ -324,6 +324,12 @@
   virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
   virtio_vring_t *vring = vec_elt_at_index (vif->vrings, qid);
 
+  if (vif->type == VIRTIO_IF_TYPE_PCI && !(vif->support_int_mode))
+    {
+      vring->avail->flags |= VIRTIO_RING_FLAG_MASK_INT;
+      return clib_error_return (0, "interrupt mode is not supported");
+    }
+
   if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
     vring->avail->flags |= VIRTIO_RING_FLAG_MASK_INT;
   else
diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c
index ffc18f4..020b088 100644
--- a/src/vnet/devices/virtio/pci.c
+++ b/src/vnet/devices/virtio/pci.c
@@ -36,9 +36,7 @@
 
 #define PCI_MSIX_ENABLE 0x8000
 
-static u32 msix_enabled = 0;
-
-#define PCI_CONFIG_SIZE ((msix_enabled == VIRTIO_MSIX_ENABLED) ? \
+#define PCI_CONFIG_SIZE(vif) ((vif->msix_enabled == VIRTIO_MSIX_ENABLED) ? \
   24 : 20)
 
 static pci_device_id_t virtio_pci_device_ids[] = {
@@ -63,17 +61,17 @@
       if (len >= 4)
 	{
 	  size = 4;
-	  vlib_pci_read_io_u32 (vm, h, PCI_CONFIG_SIZE + addr, dst);
+	  vlib_pci_read_io_u32 (vm, h, PCI_CONFIG_SIZE (vif) + addr, dst);
 	}
       else if (len >= 2)
 	{
 	  size = 2;
-	  vlib_pci_read_io_u16 (vm, h, PCI_CONFIG_SIZE + addr, dst);
+	  vlib_pci_read_io_u16 (vm, h, PCI_CONFIG_SIZE (vif) + addr, dst);
 	}
       else
 	{
 	  size = 1;
-	  vlib_pci_read_io_u8 (vm, h, PCI_CONFIG_SIZE + addr, dst);
+	  vlib_pci_read_io_u8 (vm, h, PCI_CONFIG_SIZE (vif) + addr, dst);
 	}
       dst = (u8 *) dst + size;
       addr += size;
@@ -93,17 +91,17 @@
       if (len >= 4)
 	{
 	  size = 4;
-	  vlib_pci_write_io_u32 (vm, h, PCI_CONFIG_SIZE + addr, src);
+	  vlib_pci_write_io_u32 (vm, h, PCI_CONFIG_SIZE (vif) + addr, src);
 	}
       else if (len >= 2)
 	{
 	  size = 2;
-	  vlib_pci_write_io_u16 (vm, h, PCI_CONFIG_SIZE + addr, src);
+	  vlib_pci_write_io_u16 (vm, h, PCI_CONFIG_SIZE (vif) + addr, src);
 	}
       else
 	{
 	  size = 1;
-	  vlib_pci_write_io_u8 (vm, h, PCI_CONFIG_SIZE + addr, src);
+	  vlib_pci_write_io_u8 (vm, h, PCI_CONFIG_SIZE (vif) + addr, src);
 	}
       src = (u8 *) src + size;
       addr += size;
@@ -244,6 +242,7 @@
 static clib_error_t *
 virtio_pci_get_max_virtqueue_pairs (vlib_main_t * vm, virtio_if_t * vif)
 {
+  virtio_main_t *vim = &virtio_main;
   virtio_net_config_t config;
   clib_error_t *error = 0;
   u16 max_queue_pairs = 1;
@@ -255,6 +254,7 @@
       max_queue_pairs = config.max_virtqueue_pairs;
     }
 
+  virtio_log_debug (vim, vif, "max queue pair is %x", max_queue_pairs);
   if (max_queue_pairs < 1 || max_queue_pairs > 0x8000)
     clib_error_return (error, "max queue pair is %x", max_queue_pairs);
 
@@ -407,6 +407,19 @@
   vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_ISR, &data_u8);
   vlib_cli_output (vm, "isr 0x%x", data_u8);
 
+  if (vif->msix_enabled == VIRTIO_MSIX_ENABLED)
+    {
+      vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_CONFIG_VECTOR,
+			    &data_u16);
+      vlib_cli_output (vm, "config vector 0x%x", data_u16);
+      u16 queue_id = 0;
+      vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL,
+			     &queue_id);
+      vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_QUEUE_VECTOR,
+			    &data_u16);
+      vlib_cli_output (vm, "queue vector for queue (0) 0x%x", data_u16);
+    }
+
   u8 mac[6];
   virtio_pci_legacy_read_config (vm, vif, mac, sizeof (mac), 0);
   vlib_cli_output (vm, "mac %U", format_ethernet_address, mac);
@@ -420,7 +433,7 @@
 				 10);
   vlib_cli_output (vm, "mtu 0x%x", data_u16);
 
-  u32 i = PCI_CONFIG_SIZE + 12, a = 4;
+  u32 i = PCI_CONFIG_SIZE (vif) + 12, a = 4;
   i += a;
   i &= ~a;
   for (; i < 64; i += 4)
@@ -518,6 +531,7 @@
     | VIRTIO_FEATURE (VIRTIO_NET_F_MAC)
     | VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF)
     | VIRTIO_FEATURE (VIRTIO_NET_F_STATUS)
+    | VIRTIO_FEATURE (VIRTIO_F_NOTIFY_ON_EMPTY)
     | VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)
     | VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC);
 
@@ -590,19 +604,26 @@
   vlib_pci_dev_handle_t h = vif->pci_dev_handle;
 
   if ((error = vlib_pci_read_config_u8 (vm, h, PCI_CAPABILITY_LIST, &pos)))
-    clib_error_return (error, "error in reading capabilty list position");
-
+    {
+      virtio_log_error (vim, vif, "error in reading capabilty list position");
+      clib_error_return (error, "error in reading capabilty list position");
+    }
   while (pos)
     {
       if ((error =
 	   vlib_pci_read_write_config (vm, h, VLIB_READ, pos, &cap,
 				       sizeof (cap))))
-	clib_error_return (error, "error in reading the capability at [%2x]",
-			   pos);
+	{
+	  virtio_log_error (vim, vif, "%s [%2x]",
+			    "error in reading the capability at", pos);
+	  clib_error_return (error,
+			     "error in reading the capability at [%2x]", pos);
+	}
 
       if (cap.cap_vndr == PCI_CAP_ID_MSIX)
 	{
-	  u16 flags;
+	  u16 flags, table_size, table_size_mask = 0x07FF;
+
 	  if ((error =
 	       vlib_pci_read_write_config (vm, h, VLIB_READ, pos + 2, &flags,
 					   sizeof (flags))))
@@ -610,10 +631,20 @@
 			       "error in reading the capability at [%2x]",
 			       pos + 2);
 
+	  table_size = flags & table_size_mask;
+	  virtio_log_debug (vim, vif, "flags:0x%x %s 0x%x", flags,
+			    "msix interrupt vector table-size", table_size);
+
 	  if (flags & PCI_MSIX_ENABLE)
-	    msix_enabled = VIRTIO_MSIX_ENABLED;
+	    {
+	      virtio_log_debug (vim, vif, "msix interrupt enabled");
+	      vif->msix_enabled = VIRTIO_MSIX_ENABLED;
+	    }
 	  else
-	    msix_enabled = VIRTIO_MSIX_DISABLED;
+	    {
+	      virtio_log_debug (vim, vif, "msix interrupt disabled");
+	      vif->msix_enabled = VIRTIO_MSIX_DISABLED;
+	    }
 	}
 
       if (cap.cap_vndr != PCI_CAP_ID_VNDR)
@@ -623,6 +654,9 @@
 	  goto next;
 	}
 
+      virtio_log_debug (vim, vif,
+			"[%4x] cfg type: %u, bar: %u, offset: %04x, len: %u",
+			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
       switch (cap.cfg_type)
 	{
 	case VIRTIO_PCI_CAP_COMMON_CFG:
@@ -664,14 +698,17 @@
 			virtio_pci_create_if_args_t * args)
 {
   clib_error_t *error = 0;
+  virtio_main_t *vim = &virtio_main;
   u8 status = 0;
 
   if ((error = virtio_pci_read_caps (vm, vif)))
     clib_error_return (error, "Device not supported");
 
   if (virtio_pci_reset_device (vm, vif) < 0)
-    clib_error_return (error, "Failed to reset the device");
-
+    {
+      virtio_log_error (vim, vif, "Failed to reset the device");
+      clib_error_return (error, "Failed to reset the device");
+    }
   /*
    * read device features and negotiate (user) requested features
    */
@@ -684,8 +721,11 @@
   virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_FEATURES_OK);
   status = virtio_pci_legacy_get_status (vm, vif);
   if (!(status & VIRTIO_CONFIG_STATUS_FEATURES_OK))
-    clib_error_return (error, "Device doesn't support requested features");
-
+    {
+      virtio_log_error (vim, vif,
+			"error encountered: Device doesn't support requested features");
+      clib_error_return (error, "Device doesn't support requested features");
+    }
   vif->status = status;
 
   if (virtio_pci_get_mac (vm, vif))
@@ -712,10 +752,14 @@
   if ((error = virtio_pci_vring_init (vm, vif, 1)))
     goto error;
 
-  if (msix_enabled == VIRTIO_MSIX_ENABLED)
+  if (vif->msix_enabled == VIRTIO_MSIX_ENABLED)
     {
-      virtio_pci_legacy_set_config_irq (vm, vif, VIRTIO_MSI_NO_VECTOR);
-      virtio_pci_legacy_set_queue_irq (vm, vif, VIRTIO_MSI_NO_VECTOR, 0);
+      if (virtio_pci_legacy_set_config_irq (vm, vif, 1) ==
+	  VIRTIO_MSI_NO_VECTOR)
+	virtio_log_warning (vim, vif, "config vector 1 is not set");
+      if (virtio_pci_legacy_set_queue_irq (vm, vif, 0, 0) ==
+	  VIRTIO_MSI_NO_VECTOR)
+	virtio_log_warning (vim, vif, "queue vector 0 is not set");
     }
   virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_DRIVER_OK);
   vif->status = virtio_pci_legacy_get_status (vm, vif);
@@ -771,13 +815,6 @@
   vif->per_interface_next_index = ~0;
   vif->pci_addr.as_u32 = args->addr;
 
-  if ((vif->fd = open ("/dev/vhost-net", O_RDWR | O_NONBLOCK)) < 0)
-    {
-      args->rv = VNET_API_ERROR_SYSCALL_ERROR_1;
-      args->error = clib_error_return_unix (0, "open '/dev/vhost-net'");
-      goto error;
-    }
-
   if ((error =
        vlib_pci_device_open (vm, (vlib_pci_addr_t *) & vif->pci_addr,
 			     virtio_pci_device_ids, &h)))
@@ -808,13 +845,7 @@
       goto error;
     }
 
-  if ((error = virtio_pci_device_init (vm, vif, args)))
-    {
-      virtio_log_error (vim, vif, "error encountered on device init");
-      goto error;
-    }
-
-  if (msix_enabled == VIRTIO_MSIX_ENABLED)
+  if (vlib_pci_get_num_msix_interrupts (vm, h) > 1)
     {
       if ((error = vlib_pci_register_msix_handler (vm, h, 0, 1,
 						   &virtio_pci_irq_0_handler)))
@@ -837,10 +868,32 @@
 			    "error encountered on pci enable msix irq");
 	  goto error;
 	}
+      vif->support_int_mode = 1;
+      virtio_log_debug (vim, vif, "device supports msix interrupts");
+    }
+  else if (vlib_pci_get_num_msix_interrupts (vm, h) == 1)
+    {
+      /*
+       * if msix table-size is 1, fall back to intX.
+       */
+      if ((error =
+	   vlib_pci_register_intx_handler (vm, h, &virtio_pci_irq_handler)))
+	{
+	  virtio_log_error (vim, vif,
+			    "error encountered on pci register interrupt handler");
+	  goto error;
+	}
+      vif->support_int_mode = 1;
+      virtio_log_debug (vim, vif, "pci register interrupt handler");
     }
   else
     {
-      vlib_pci_register_intx_handler (vm, h, &virtio_pci_irq_handler);
+      /*
+       * WARN: intX is showing some weird behaviour.
+       * Please don't use interrupt mode with UIO driver.
+       */
+      vif->support_int_mode = 0;
+      virtio_log_debug (vim, vif, "driver is configured in poll mode only");
     }
 
   if ((error = vlib_pci_intr_enable (vm, h)))
@@ -850,6 +903,12 @@
       goto error;
     }
 
+  if ((error = virtio_pci_device_init (vm, vif, args)))
+    {
+      virtio_log_error (vim, vif, "error encountered on device init");
+      goto error;
+    }
+
   vif->type = VIRTIO_IF_TYPE_PCI;
   /* create interface */
   error = ethernet_register_interface (vnm, virtio_device_class.index,
@@ -875,6 +934,8 @@
   vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, 0, ~0);
   virtio_vring_set_numa_node (vm, vif, 0);
 
+  vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, 0,
+				 VNET_HW_INTERFACE_RX_MODE_POLLING);
   if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP)
     {
       vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
@@ -941,7 +1002,7 @@
   vec_free (vif->vrings);
 
   if (vif->fd != -1)
-    close (vif->fd);
+    vif->fd = -1;
   if (vif->tap_fd != -1)
     vif->tap_fd = -1;
   clib_error_free (vif->error);
diff --git a/src/vnet/devices/virtio/pci.h b/src/vnet/devices/virtio/pci.h
index b08e40e..40648c8 100644
--- a/src/vnet/devices/virtio/pci.h
+++ b/src/vnet/devices/virtio/pci.h
@@ -37,6 +37,7 @@
 					   (16, RW) */
 
 /*
+ * Vector value used to disable MSI for queue.
  * define in include/linux/virtio_pci.h
  * #define VIRTIO_MSI_NO_VECTOR 0xFFFF
  */
@@ -45,7 +46,6 @@
 #define VIRTIO_PCI_ISR_INTR   0x1
 /* The bit of the ISR which indicates a device configuration change. */
 #define VIRTIO_PCI_ISR_CONFIG 0x2
-/* Vector value used to disable MSI for queue. */
 
 /* VirtIO device IDs. */
 #define VIRTIO_ID_NETWORK  0x01
diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h
index b2e74be..af61ca5 100644
--- a/src/vnet/devices/virtio/virtio.h
+++ b/src/vnet/devices/virtio/virtio.h
@@ -143,7 +143,11 @@
     pci_addr_t pci_addr;
   };
   u32 per_interface_next_index;
-  int fd;
+  union
+  {
+    int fd;
+    u32 msix_enabled;
+  };
   union
   {
     int tap_fd;
@@ -155,12 +159,12 @@
 
   /* error */
   clib_error_t *error;
+  u8 support_int_mode;		/* support interrupt mode */
   u16 max_queue_pairs;
   u16 tx_ring_sz;
   u16 rx_ring_sz;
   u8 status;
   u8 mac_addr[6];
-  u64 bar[2];
   u8 *host_if_name;
   u8 *net_ns;
   u8 *host_bridge;
@@ -169,7 +173,6 @@
   u8 host_ip4_prefix_len;
   ip6_address_t host_ip6_addr;
   u8 host_ip6_prefix_len;
-
   int ifindex;
 } virtio_if_t;