pci: add option to force uio binding

Type: improvement

Change-Id: Ifea4badd58f7e2b5e792d7506f6747851a08587f
Signed-off-by: Benoît Ganne <bganne@cisco.com>
diff --git a/docs/configuration/reference.rst b/docs/configuration/reference.rst
index 84b2fd7..84a951c 100644
--- a/docs/configuration/reference.rst
+++ b/docs/configuration/reference.rst
@@ -666,6 +666,18 @@
 
    uio-driver vfio-pci
 
+uio-bind-force
+^^^^^^^^^^^^^^^^^^^^^^
+
+Force VPP to rebind the interface(s) to the selected UIO driver, even if the
+interface is up in Linux.
+By default, VPP will refuse to bind an interface if it is up in Linux,
+in case it is in active use.
+
+.. code-block:: console
+
+   uio-bind-force
+
 no-multi-seg
 ^^^^^^^^^^^^
 
diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h
index 02cf681..7569fc6 100644
--- a/src/plugins/dpdk/device/dpdk.h
+++ b/src/plugins/dpdk/device/dpdk.h
@@ -265,6 +265,7 @@
   u8 **eal_init_args;
   u8 *eal_init_args_str;
   u8 *uio_driver_name;
+  u8 uio_bind_force;
   u8 enable_telemetry;
   u16 max_simd_bitwidth;
 
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
index 3f27a07..83ce2dc 100644
--- a/src/plugins/dpdk/device/init.c
+++ b/src/plugins/dpdk/device/init.c
@@ -681,7 +681,8 @@
         continue;
       }
 
-    error = vlib_pci_bind_to_uio (vm, addr, (char *) conf->uio_driver_name);
+    error = vlib_pci_bind_to_uio (vm, addr, (char *) conf->uio_driver_name,
+				  conf->uio_bind_force);
 
     if (error)
       {
@@ -1089,6 +1090,8 @@
 	}
       else if (unformat (input, "uio-driver %s", &conf->uio_driver_name))
 	;
+      else if (unformat (input, "uio-bind-force"))
+	conf->uio_bind_force = 1;
       else if (unformat (input, "socket-mem %s", &socket_mem))
 	;
       else if (unformat (input, "no-pci"))
diff --git a/src/plugins/vmxnet3/cli.c b/src/plugins/vmxnet3/cli.c
index 039e9f3..f751358 100644
--- a/src/plugins/vmxnet3/cli.c
+++ b/src/plugins/vmxnet3/cli.c
@@ -47,8 +47,10 @@
 	args.enable_gso = 1;
       else if (unformat (line_input, "elog"))
 	args.enable_elog = 1;
+      else if (unformat (line_input, "bind force"))
+	args.bind = VMXNET3_BIND_FORCE;
       else if (unformat (line_input, "bind"))
-	args.bind = 1;
+	args.bind = VMXNET3_BIND_DEFAULT;
       else if (unformat (line_input, "rx-queue-size %u", &size))
 	args.rxq_size = size;
       else if (unformat (line_input, "tx-queue-size %u", &size))
@@ -77,10 +79,11 @@
 /* *INDENT-OFF* */
 VLIB_CLI_COMMAND (vmxnet3_create_command, static) = {
   .path = "create interface vmxnet3",
-  .short_help = "create interface vmxnet3 <pci-address>"
-                " [rx-queue-size <size>] [tx-queue-size <size>]"
-                " [num-tx-queues <number>] [num-rx-queues <number>] [bind]"
-                " [gso]",
+  .short_help =
+    "create interface vmxnet3 <pci-address>"
+    " [rx-queue-size <size>] [tx-queue-size <size>]"
+    " [num-tx-queues <number>] [num-rx-queues <number>] [bind [force]]"
+    " [gso]",
   .function = vmxnet3_create_command_fn,
 };
 /* *INDENT-ON* */
diff --git a/src/plugins/vmxnet3/vmxnet3.c b/src/plugins/vmxnet3/vmxnet3.c
index 770cb2d..be862d7 100644
--- a/src/plugins/vmxnet3/vmxnet3.c
+++ b/src/plugins/vmxnet3/vmxnet3.c
@@ -692,7 +692,8 @@
 
   if (args->bind)
     {
-      error = vlib_pci_bind_to_uio (vm, &args->addr, (char *) "auto");
+      error = vlib_pci_bind_to_uio (vm, &args->addr, (char *) "auto",
+				    VMXNET3_BIND_FORCE == args->bind);
       if (error)
 	{
 	  args->rv = VNET_API_ERROR_INVALID_INTERFACE;
diff --git a/src/plugins/vmxnet3/vmxnet3.h b/src/plugins/vmxnet3/vmxnet3.h
index 81aeec6..89602f8 100644
--- a/src/plugins/vmxnet3/vmxnet3.h
+++ b/src/plugins/vmxnet3/vmxnet3.h
@@ -606,6 +606,13 @@
 
 extern vmxnet3_main_t vmxnet3_main;
 
+typedef enum
+{
+  VMXNET3_BIND_NONE = 0,
+  VMXNET3_BIND_DEFAULT = 1,
+  VMXNET3_BIND_FORCE = 2,
+} __clib_packed vmxnet3_bind_t;
+
 typedef struct
 {
   vlib_pci_addr_t addr;
@@ -614,7 +621,7 @@
   u16 rxq_num;
   u16 txq_size;
   u16 txq_num;
-  u8 bind;
+  vmxnet3_bind_t bind;
   u8 enable_gso;
   /* return */
   i32 rv;
diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c
index 9f0629f..83e9dde 100644
--- a/src/vlib/linux/pci.c
+++ b/src/vlib/linux/pci.c
@@ -453,8 +453,8 @@
 }
 
 clib_error_t *
-vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr,
-		      char *uio_drv_name)
+vlib_pci_bind_to_uio (vlib_main_t *vm, vlib_pci_addr_t *addr,
+		      char *uio_drv_name, int force)
 {
   clib_error_t *error = 0;
   u8 *s = 0, *driver_name = 0;
@@ -523,76 +523,80 @@
        (strcmp ("igb_uio", (char *) driver_name) == 0)))
     goto done;
 
-  /* walk trough all linux interfaces and if interface belonging to
-     this device is founf check if interface is admin up  */
-  dir = opendir ("/sys/class/net");
-  s = format (s, "%U%c", format_vlib_pci_addr, addr, 0);
-
-  if (!dir)
+  if (!force)
     {
-      error = clib_error_return (0, "Skipping PCI device %U: failed to "
-				 "read /sys/class/net",
-				 format_vlib_pci_addr, addr);
-      goto done;
-    }
+      /* walk trough all linux interfaces and if interface belonging to
+	 this device is found check if interface is admin up  */
+      dir = opendir ("/sys/class/net");
+      s = format (s, "%U%c", format_vlib_pci_addr, addr, 0);
 
-  fd = socket (PF_INET, SOCK_DGRAM, 0);
-  if (fd < 0)
-    {
-      error = clib_error_return_unix (0, "socket");
-      goto done;
-    }
-
-  while ((e = readdir (dir)))
-    {
-      struct ifreq ifr;
-      struct ethtool_drvinfo drvinfo;
-
-      if (e->d_name[0] == '.')	/* skip . and .. */
-	continue;
-
-      clib_memset (&ifr, 0, sizeof ifr);
-      clib_memset (&drvinfo, 0, sizeof drvinfo);
-      ifr.ifr_data = (char *) &drvinfo;
-      clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1);
-
-      drvinfo.cmd = ETHTOOL_GDRVINFO;
-      if (ioctl (fd, SIOCETHTOOL, &ifr) < 0)
+      if (!dir)
 	{
-	  /* Some interfaces (eg "lo") don't support this ioctl */
-	  if ((errno != ENOTSUP) && (errno != ENODEV))
-	    clib_unix_warning ("ioctl fetch intf %s bus info error",
-			       e->d_name);
-	  continue;
-	}
-
-      if (strcmp ((char *) s, drvinfo.bus_info))
-	continue;
-
-      clib_memset (&ifr, 0, sizeof (ifr));
-      clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1);
-
-      if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0)
-	{
-	  error = clib_error_return_unix (0, "ioctl fetch intf %s flags",
-					  e->d_name);
-	  close (fd);
+	  error = clib_error_return (0,
+				     "Skipping PCI device %U: failed to "
+				     "read /sys/class/net",
+				     format_vlib_pci_addr, addr);
 	  goto done;
 	}
 
-      if (ifr.ifr_flags & IFF_UP)
+      fd = socket (PF_INET, SOCK_DGRAM, 0);
+      if (fd < 0)
 	{
-	  vlib_log (VLIB_LOG_LEVEL_WARNING, pci_main.log_default,
-		    "Skipping PCI device %U as host "
-		    "interface %s is up", format_vlib_pci_addr, addr,
-		    e->d_name);
-	  close (fd);
+	  error = clib_error_return_unix (0, "socket");
 	  goto done;
 	}
-    }
 
-  close (fd);
-  vec_reset_length (s);
+      while ((e = readdir (dir)))
+	{
+	  struct ifreq ifr;
+	  struct ethtool_drvinfo drvinfo;
+
+	  if (e->d_name[0] == '.') /* skip . and .. */
+	    continue;
+
+	  clib_memset (&ifr, 0, sizeof ifr);
+	  clib_memset (&drvinfo, 0, sizeof drvinfo);
+	  ifr.ifr_data = (char *) &drvinfo;
+	  clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1);
+
+	  drvinfo.cmd = ETHTOOL_GDRVINFO;
+	  if (ioctl (fd, SIOCETHTOOL, &ifr) < 0)
+	    {
+	      /* Some interfaces (eg "lo") don't support this ioctl */
+	      if ((errno != ENOTSUP) && (errno != ENODEV))
+		clib_unix_warning ("ioctl fetch intf %s bus info error",
+				   e->d_name);
+	      continue;
+	    }
+
+	  if (strcmp ((char *) s, drvinfo.bus_info))
+	    continue;
+
+	  clib_memset (&ifr, 0, sizeof (ifr));
+	  clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1);
+
+	  if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0)
+	    {
+	      error = clib_error_return_unix (0, "ioctl fetch intf %s flags",
+					      e->d_name);
+	      close (fd);
+	      goto done;
+	    }
+
+	  if (ifr.ifr_flags & IFF_UP)
+	    {
+	      vlib_log (VLIB_LOG_LEVEL_WARNING, pci_main.log_default,
+			"Skipping PCI device %U as host "
+			"interface %s is up",
+			format_vlib_pci_addr, addr, e->d_name);
+	      close (fd);
+	      goto done;
+	    }
+	}
+
+      close (fd);
+      vec_reset_length (s);
+    }
 
   s = format (s, "%v/driver/unbind%c", dev_dir_name, 0);
   clib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, addr);
diff --git a/src/vlib/pci/pci.h b/src/vlib/pci/pci.h
index 1dc4ce6..4e9cf4a 100644
--- a/src/vlib/pci/pci.h
+++ b/src/vlib/pci/pci.h
@@ -182,8 +182,8 @@
 }                                                               \
 __VA_ARGS__ pci_device_registration_t x
 
-clib_error_t *vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr,
-				    char *uio_driver_name);
+clib_error_t *vlib_pci_bind_to_uio (vlib_main_t *vm, vlib_pci_addr_t *addr,
+				    char *uio_driver_name, int force);
 
 /* Configuration space read/write. */
 clib_error_t *vlib_pci_read_write_config (vlib_main_t * vm,