dpdk: move DPDK vfio hack to dpdk plugin
Change-Id: I806cbf8c6c49643fe6c317bcceab93c1b9d441ab
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c
index f6379a1..47d6648 100644
--- a/src/plugins/dpdk/buffer.c
+++ b/src/plugins/dpdk/buffer.c
@@ -43,6 +43,10 @@
* Allocate/free network buffers.
*/
+#include <unistd.h>
+#include <linux/vfio.h>
+#include <sys/ioctl.h>
+
#include <rte_config.h>
#include <rte_common.h>
@@ -71,11 +75,13 @@
#include <rte_version.h>
#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/pci/pci.h>
+#include <vlib/linux/vfio.h>
#include <vnet/vnet.h>
#include <dpdk/device/dpdk.h>
#include <dpdk/device/dpdk_priv.h>
-
STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM,
"VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM");
@@ -91,6 +97,7 @@
typedef struct
{
+ int vfio_container_fd;
dpdk_buffer_per_thread_data *ptd;
} dpdk_buffer_main_t;
@@ -459,11 +466,38 @@
vlib_worker_thread_barrier_release (vm);
}
+static clib_error_t *
+scan_vfio_fd (void *arg, u8 * path_name, u8 * file_name)
+{
+ dpdk_buffer_main_t *dbm = &dpdk_buffer_main;
+ linux_vfio_main_t *lvm = &vfio_main;
+ const char fn[] = "/dev/vfio/vfio";
+ char buff[sizeof (fn)] = { 0 };
+ int fd;
+ u8 *path = format (0, "%v%c", path_name, 0);
+
+ if (readlink ((char *) path, buff, sizeof (fn)) + 1 != sizeof (fn))
+ goto done;
+
+ if (strncmp (fn, buff, sizeof (fn)))
+ goto done;
+
+ fd = atoi ((char *) file_name);
+ if (fd != lvm->container_fd)
+ dbm->vfio_container_fd = fd;
+
+done:
+ vec_free (path);
+ return 0;
+}
+
clib_error_t *
dpdk_pool_create (vlib_main_t * vm, u8 * pool_name, u32 elt_size,
u32 num_elts, u32 pool_priv_size, u16 cache_size, u8 numa,
- struct rte_mempool **_mp, vlib_physmem_region_index_t * pri)
+ struct rte_mempool ** _mp,
+ vlib_physmem_region_index_t * pri)
{
+ dpdk_buffer_main_t *dbm = &dpdk_buffer_main;
struct rte_mempool *mp;
vlib_physmem_region_t *pr;
clib_error_t *error = 0;
@@ -501,6 +535,33 @@
_mp[0] = mp;
+ /* DPDK currently doesn't provide API to map DMA memory for empty mempool
+ so we are using this hack, will be nice to have at least API to get
+ VFIO container FD */
+ if (dbm->vfio_container_fd == -1)
+ foreach_directory_file ("/proc/self/fd", scan_vfio_fd, 0, 0);
+
+ if (dbm->vfio_container_fd != -1)
+ {
+ struct vfio_iommu_type1_dma_map dm = { 0 };
+ int i, rv = 0;
+ dm.argsz = sizeof (struct vfio_iommu_type1_dma_map);
+ dm.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+
+ /* *INDENT-OFF* */
+ vec_foreach_index (i, pr->page_table)
+ {
+ dm.vaddr = pointer_to_uword (pr->mem) + (i << pr->log2_page_size);
+ dm.size = 1 << pr->log2_page_size;
+ dm.iova = pr->page_table[i];
+ if ((rv = ioctl (dbm->vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dm)))
+ break;
+ }
+ /* *INDENT-ON* */
+ if (rv != 0 && rv != EINVAL)
+ clib_unix_warning ("ioctl(VFIO_IOMMU_MAP_DMA) pool '%s'", pool_name);
+ }
+
return 0;
}
@@ -665,8 +726,12 @@
{
dpdk_buffer_main_t *dbm = &dpdk_buffer_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
+
vec_validate_aligned (dbm->ptd, tm->n_vlib_mains - 1,
CLIB_CACHE_LINE_BYTES);
+
+ dbm->vfio_container_fd = -1;
+
return 0;
}
diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c
index cf2961b..1a44992 100644
--- a/src/vlib/linux/physmem.c
+++ b/src/vlib/linux/physmem.c
@@ -174,16 +174,8 @@
}
}
-#if 0
- if ((vpm->flags & VLIB_PHYSMEM_MAIN_F_HAVE_IOMMU) ||
- (vpm->flags & VLIB_PHYSMEM_MAIN_F_HAVE_PAGEMAP) == 0)
- for (i = 0; i < pr->n_pages; i++)
- vec_add1 (pr->page_table, pointer_to_uword (pr->mem) +
- i * (1 << pr->log2_page_size));
- else
-#endif
- pr->page_table = clib_mem_vm_get_paddr (pr->mem, pr->log2_page_size,
- pr->n_pages);
+ pr->page_table = clib_mem_vm_get_paddr (pr->mem, pr->log2_page_size,
+ pr->n_pages);
linux_vfio_dma_map_regions (vm);
@@ -224,7 +216,6 @@
unix_physmem_init (vlib_main_t * vm)
{
vlib_physmem_main_t *vpm = &physmem_main;
- linux_vfio_main_t *lvm = &vfio_main;
clib_error_t *error = 0;
u64 *pt = 0;
@@ -241,9 +232,6 @@
if ((error = linux_vfio_init (vm)))
return error;
- if (lvm->flags & LINUX_VFIO_F_HAVE_IOMMU)
- vpm->flags |= VLIB_PHYSMEM_MAIN_F_HAVE_IOMMU;
-
vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned;
vm->os_physmem_free = unix_physmem_free;
vm->os_physmem_region_alloc = unix_physmem_region_alloc;
diff --git a/src/vlib/linux/vfio.c b/src/vlib/linux/vfio.c
index e39bf01..621dfb2 100644
--- a/src/vlib/linux/vfio.c
+++ b/src/vlib/linux/vfio.c
@@ -73,7 +73,7 @@
int rv;
dm.vaddr = pointer_to_uword (pr->mem) + (i << pr->log2_page_size);
dm.size = 1 << pr->log2_page_size;
- dm.iova = pr->page_table[i];
+ dm.iova = dm.vaddr;
if ((rv = ioctl (fd, VFIO_IOMMU_MAP_DMA, &dm)))
return rv;
}
@@ -82,30 +82,6 @@
return 0;
}
-static clib_error_t *
-scan_vfio_fd (void *arg, u8 * path_name, u8 * file_name)
-{
- linux_vfio_main_t *lvm = &vfio_main;
- const char fn[] = "/dev/vfio/vfio";
- char buff[sizeof (fn)] = { 0 };
- int fd;
- u8 *path = format (0, "%v%c", path_name, 0);
-
- if (readlink ((char *) path, buff, sizeof (fn)) + 1 != sizeof (fn))
- goto done;
-
- if (strncmp (fn, buff, sizeof (fn)))
- goto done;
-
- fd = atoi ((char *) file_name);
- if (fd != lvm->container_fd)
- lvm->ext_container_fd = atoi ((char *) file_name);
-
-done:
- vec_free (path);
- return 0;
-}
-
void
linux_vfio_dma_map_regions (vlib_main_t * vm)
{
@@ -113,12 +89,6 @@
if (lvm->container_fd != -1)
map_regions (vm, lvm->container_fd);
-
- if (lvm->ext_container_fd == -1)
- foreach_directory_file ("/proc/self/fd", scan_vfio_fd, 0, 0);
-
- if (lvm->ext_container_fd != -1)
- map_regions (vm, lvm->ext_container_fd);
}
static linux_pci_vfio_iommu_group_t *
@@ -252,8 +222,6 @@
linux_vfio_main_t *lvm = &vfio_main;
int fd;
- lvm->ext_container_fd = -1;
-
fd = open ("/dev/vfio/vfio", O_RDWR);
/* check if iommu is available */
diff --git a/src/vlib/linux/vfio.h b/src/vlib/linux/vfio.h
index 8e0758c..e23cee1 100644
--- a/src/vlib/linux/vfio.h
+++ b/src/vlib/linux/vfio.h
@@ -28,7 +28,6 @@
u32 flags;
#define LINUX_VFIO_F_HAVE_IOMMU (1 << 0)
int container_fd;
- int ext_container_fd; /* container fd used by external library, i.e DPDK */
/* VFIO */
int iommu_mode;