physmem: register region with IOMMU

VFIO file descriptor is extracted in a hackish way, as DPDK doesn't
provide imethod to retrieve it.

This fixes issue with DPDK drivers not working correctly when IOMMU is
enabled and external buffer memory is used.

Change-Id: I5eaa8e78741c50504d87d44e706b5997c8189554
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c
index dad4ef0..b5f84ec 100644
--- a/src/vlib/linux/physmem.c
+++ b/src/vlib/linux/physmem.c
@@ -43,6 +43,8 @@
 #include <sys/mman.h>
 #include <sys/fcntl.h>
 #include <sys/stat.h>
+#include <linux/vfio.h>
+#include <unistd.h>
 
 #include <vppinfra/linux/syscall.h>
 #include <vppinfra/linux/sysfs.h>
@@ -50,6 +52,8 @@
 #include <vlib/physmem.h>
 #include <vlib/unix/unix.h>
 
+static int vfio_container_fd = -1;
+
 static void *
 unix_physmem_alloc_aligned (vlib_main_t * vm, vlib_physmem_region_index_t idx,
 			    uword n_bytes, uword alignment)
@@ -110,6 +114,56 @@
 }
 
 static clib_error_t *
+scan_vfio_fd (void *arg, u8 * path_name, u8 * file_name)
+{
+  const char fn[] = "/dev/vfio/vfio";
+  char buff[sizeof (fn)] = { 0 };
+
+  if (readlink ((char *) path_name, buff, sizeof (fn)) + 1 != sizeof (fn))
+    return 0;
+
+  if (strncmp (fn, buff, sizeof (fn)))
+    return 0;
+
+  vfio_container_fd = atoi ((char *) file_name);
+  return 0;
+}
+
+static clib_error_t *
+unix_physmem_region_iommu_register (vlib_physmem_region_t * pr)
+{
+  struct vfio_iommu_type1_dma_map dma_map = { 0 };
+  int i, fd;
+
+  if (vfio_container_fd == -1)
+    foreach_directory_file ("/proc/self/fd", scan_vfio_fd, 0, 0);
+
+  fd = vfio_container_fd;
+
+  if (fd < 0)
+    return 0;
+
+  if (ioctl (fd, VFIO_GET_API_VERSION) != VFIO_API_VERSION)
+    return 0;
+
+  if (ioctl (fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU) == 0)
+    return 0;
+
+  dma_map.argsz = sizeof (struct vfio_iommu_type1_dma_map);
+  dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+
+  vec_foreach_index (i, pr->page_table)
+  {
+    dma_map.vaddr = pointer_to_uword (pr->mem) + (i << pr->log2_page_size);
+    dma_map.size = 1 << pr->log2_page_size;
+    dma_map.iova = pr->page_table[i];
+    if (ioctl (fd, VFIO_IOMMU_MAP_DMA, &dma_map) != 0)
+      return clib_error_return_unix (0, "ioctl (VFIO_IOMMU_MAP_DMA)");
+  }
+  return 0;
+}
+
+static clib_error_t *
 unix_physmem_region_alloc (vlib_main_t * vm, char *name, u32 size,
 			   u8 numa_node, u32 flags,
 			   vlib_physmem_region_index_t * idx)
@@ -180,6 +234,9 @@
 	}
       pr->page_table = clib_mem_vm_get_paddr (pr->mem, pr->log2_page_size,
 					      pr->n_pages);
+      error = unix_physmem_region_iommu_register (pr);
+      if (error)
+	clib_error_report (error);
     }
 
   if (flags & VLIB_PHYSMEM_F_INIT_MHEAP)