vlib: add support for multiple buffer pools

Change-Id: Icaf7d7ad47284aea7a56e8006b69f45874d64202
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c
index a44428a..b3dca95 100644
--- a/src/plugins/dpdk/buffer.c
+++ b/src/plugins/dpdk/buffer.c
@@ -193,6 +193,8 @@
   if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0)
     return 0;
 
+  dpdk_mempool_private_t *privp = rte_mempool_get_priv (rmp);
+
   _vec_len (vm->mbuf_alloc_list) = n;
 
   i = 0;
@@ -233,6 +235,11 @@
       vlib_buffer_init_for_free_list (b2, fl);
       vlib_buffer_init_for_free_list (b3, fl);
 
+      b0->buffer_pool_index = privp->buffer_pool_index;
+      b1->buffer_pool_index = privp->buffer_pool_index;
+      b2->buffer_pool_index = privp->buffer_pool_index;
+      b3->buffer_pool_index = privp->buffer_pool_index;
+
       if (fl->buffer_init_function)
 	{
 	  fl->buffer_init_function (vm, fl, &bi0, 1);
@@ -253,6 +260,7 @@
       vec_add1_aligned (fl->buffers, bi0, CLIB_CACHE_LINE_BYTES);
 
       vlib_buffer_init_for_free_list (b0, fl);
+      b0->buffer_pool_index = privp->buffer_pool_index;
 
       if (fl->buffer_init_function)
 	fl->buffer_init_function (vm, fl, &bi0, 1);
@@ -409,13 +417,6 @@
   vlib_worker_thread_barrier_release (vm);
 }
 
-typedef struct
-{
-  /* must be first */
-  struct rte_pktmbuf_pool_private mbp_priv;
-  vlib_physmem_region_index_t region_index;
-} dpdk_mempool_private_t;
-
 clib_error_t *
 dpdk_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
 			 unsigned socket_id)
@@ -446,9 +447,8 @@
   size = rte_mempool_xmem_size (num_mbufs, obj_size, 21);
 
   clib_error_t *error = 0;
-  error =
-    vlib_physmem_region_alloc (vm, (char *) pool_name, size, socket_id,
-			       VLIB_PHYSMEM_F_HAVE_BUFFERS, &pri);
+  error = vlib_physmem_region_alloc (vm, (char *) pool_name, size, socket_id,
+				     0, &pri);
   if (error)
     clib_error_report (error);
 
@@ -487,7 +487,7 @@
 	  rte_mempool_obj_iter (rmp, rte_pktmbuf_init, 0);
 
 	  dpdk_mempool_private_t *privp = rte_mempool_get_priv (rmp);
-	  privp->region_index = pri;
+	  privp->buffer_pool_index = vlib_buffer_add_physmem_region (vm, pri);
 
 	  dm->pktmbuf_pools[socket_id] = rmp;
 
diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c
index aedc3f5..c65cec6 100644
--- a/src/plugins/dpdk/device/common.c
+++ b/src/plugins/dpdk/device/common.c
@@ -78,8 +78,11 @@
 	dpdk_device_error (xd, "rte_eth_tx_queue_setup", rv);
     }
 
+  vec_validate_aligned (xd->buffer_pool_for_queue, xd->rx_q_used - 1,
+			CLIB_CACHE_LINE_BYTES);
   for (j = 0; j < xd->rx_q_used; j++)
     {
+      dpdk_mempool_private_t *privp;
       uword tidx = vnet_get_device_input_thread_index (dm->vnet_main,
 						       xd->hw_if_index, j);
       unsigned lcore = vlib_worker_threads[tidx].lcore_id;
@@ -95,6 +98,9 @@
 				     SOCKET_ID_ANY, 0,
 				     dm->pktmbuf_pools[socket_id]);
 
+      privp = rte_mempool_get_priv (dm->pktmbuf_pools[socket_id]);
+      xd->buffer_pool_for_queue[j] = privp->buffer_pool_index;
+
       if (rv < 0)
 	dpdk_device_error (xd, "rte_eth_rx_queue_setup", rv);
     }
diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h
index a0c57ea..1ed3b28 100644
--- a/src/plugins/dpdk/device/dpdk.h
+++ b/src/plugins/dpdk/device/dpdk.h
@@ -194,6 +194,7 @@
   u16 rx_q_used;
   u16 nb_rx_desc;
   u16 *cpu_socket_id_by_queue;
+  u8 *buffer_pool_for_queue;
   struct rte_eth_conf port_conf;
   struct rte_eth_txconf tx_conf;
 
diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h
index 52b4ca4..a95d03c 100644
--- a/src/plugins/dpdk/device/dpdk_priv.h
+++ b/src/plugins/dpdk/device/dpdk_priv.h
@@ -51,6 +51,14 @@
 _(file-prefix)                                  \
 _(vdev)
 
+typedef struct
+{
+  /* must be first */
+  struct rte_pktmbuf_pool_private mbp_priv;
+  u8 buffer_pool_index;
+} dpdk_mempool_private_t;
+
+
 static inline void
 dpdk_get_xstats (dpdk_device_t * xd)
 {
diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c
index cf8b969..8297821 100644
--- a/src/plugins/dpdk/device/node.c
+++ b/src/plugins/dpdk/device/node.c
@@ -347,6 +347,9 @@
   /* Update buffer template */
   vnet_buffer (bt)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
   bt->error = node->errors[DPDK_ERROR_NONE];
+  /* as DPDK is allocating empty buffers from mempool provided before interface
+     start for each queue, it is safe to store this in the template */
+  bt->buffer_pool_index = xd->buffer_pool_for_queue[queue_id];
 
   mb_index = 0;
 
diff --git a/src/plugins/ixge/ixge.c b/src/plugins/ixge/ixge.c
index 5fd6a90..f9d01d5 100644
--- a/src/plugins/ixge/ixge.c
+++ b/src/plugins/ixge/ixge.c
@@ -2528,8 +2528,9 @@
       u32 i;
 
       dq->tx.head_index_write_back =
-	vlib_physmem_alloc (vm, vm->buffer_main->physmem_region, &error,
-			    CLIB_CACHE_LINE_BYTES);
+	vlib_physmem_alloc (vm,
+			    vm->buffer_main->buffer_pools[0].physmem_region,
+			    &error, CLIB_CACHE_LINE_BYTES);
 
       for (i = 0; i < dq->n_descriptors; i++)
 	dq->descriptors[i].tx = xm->tx_descriptor_template;
@@ -2542,7 +2543,9 @@
     u64 a;
 
     a =
-      vlib_physmem_virtual_to_physical (vm, vm->buffer_main->physmem_region,
+      vlib_physmem_virtual_to_physical (vm,
+					vm->buffer_main->
+					buffer_pools[0].physmem_region,
 					dq->descriptors);
     dr->descriptor_address[0] = a & 0xFFFFFFFF;
     dr->descriptor_address[1] = a >> (u64) 32;
@@ -2570,7 +2573,8 @@
 
 	a =
 	  vlib_physmem_virtual_to_physical (vm,
-					    vm->buffer_main->physmem_region,
+					    vm->buffer_main->
+					    buffer_pools[0].physmem_region,
 					    dq->tx.head_index_write_back);
 	dr->tx.head_index_write_back_address[0] = /* enable bit */ 1 | a;
 	dr->tx.head_index_write_back_address[1] = (u64) a >> (u64) 32;
diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c
index d0d8f60..f00e885 100644
--- a/src/vlib/buffer.c
+++ b/src/vlib/buffer.c
@@ -462,7 +462,7 @@
   u32 i;
 
   for (i = 0; i < vec_len (f->buffer_memory_allocated); i++)
-    vm->os_physmem_free (vm, vm->buffer_main->physmem_region,
+    vm->os_physmem_free (vm, vm->buffer_main->buffer_pools[0].physmem_region,
 			 f->buffer_memory_allocated[i]);
   vec_free (f->name);
   vec_free (f->buffer_memory_allocated);
@@ -555,8 +555,10 @@
 
       /* drb: removed power-of-2 ASSERT */
       buffers =
-	vm->os_physmem_alloc_aligned (vm, vm->buffer_main->physmem_region,
-				      n_bytes, sizeof (vlib_buffer_t));
+	vm->os_physmem_alloc_aligned (vm,
+				      vm->buffer_main->
+				      buffer_pools[0].physmem_region, n_bytes,
+				      sizeof (vlib_buffer_t));
       if (!buffers)
 	return n_alloc;
 
@@ -960,10 +962,15 @@
   return copied;
 }
 
-void
-vlib_buffer_add_mem_range (vlib_main_t * vm, uword start, uword size)
+u8
+vlib_buffer_add_physmem_region (vlib_main_t * vm,
+				vlib_physmem_region_index_t pri)
 {
   vlib_buffer_main_t *bm = vm->buffer_main;
+  vlib_physmem_region_t *pr = vlib_physmem_get_region (vm, pri);
+  vlib_buffer_pool_t *p;
+  uword start = pointer_to_uword (pr->mem);
+  uword size = pr->size;
 
   if (bm->buffer_mem_size == 0)
     {
@@ -989,6 +996,12 @@
     {
       clib_panic ("buffer memory size out of range!");
     }
+
+  vec_add2 (bm->buffer_pools, p, 1);
+  p->start = start;
+  p->size = size;
+  p->physmem_region = pri;
+  return p - bm->buffer_pools;
 }
 
 static u8 *
@@ -1057,6 +1070,7 @@
 vlib_buffer_main_init (struct vlib_main_t * vm)
 {
   vlib_buffer_main_t *bm;
+  vlib_physmem_region_index_t pri;
   clib_error_t *error;
 
   vec_validate (vm->buffer_main, 0);
@@ -1085,12 +1099,10 @@
   /* allocate default region */
   error = vlib_physmem_region_alloc (vm, "buffers",
 				     vlib_buffer_physmem_sz, 0,
-				     VLIB_PHYSMEM_F_INIT_MHEAP |
-				     VLIB_PHYSMEM_F_HAVE_BUFFERS,
-				     &bm->physmem_region);
+				     VLIB_PHYSMEM_F_INIT_MHEAP, &pri);
 
   if (error == 0)
-    return 0;
+    goto done;
 
   clib_error_free (error);
 
@@ -1098,9 +1110,9 @@
   error = vlib_physmem_region_alloc (vm, "buffers (fake)",
 				     vlib_buffer_physmem_sz, 0,
 				     VLIB_PHYSMEM_F_FAKE |
-				     VLIB_PHYSMEM_F_INIT_MHEAP |
-				     VLIB_PHYSMEM_F_HAVE_BUFFERS,
-				     &bm->physmem_region);
+				     VLIB_PHYSMEM_F_INIT_MHEAP, &pri);
+done:
+  vlib_buffer_add_physmem_region (vm, pri);
   return error;
 }
 
diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h
index e5c1d21..6170323 100644
--- a/src/vlib/buffer.h
+++ b/src/vlib/buffer.h
@@ -123,7 +123,8 @@
 
   u8 n_add_refs; /**< Number of additional references to this buffer. */
 
-  u8 dont_waste_me[2]; /**< Available space in the (precious)
+  u8 buffer_pool_index;	/**< index of buffer pool this buffer belongs. */
+  u8 dont_waste_me[1]; /**< Available space in the (precious)
                           first 32 octets of buffer metadata
                           Before allocating any of it, discussion required!
                        */
@@ -405,11 +406,19 @@
 typedef struct
 {
   CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+  uword start;
+  uword size;
+  vlib_physmem_region_index_t physmem_region;
+} vlib_buffer_pool_t;
+
+typedef struct
+{
+  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
   /* Virtual memory address and size of buffer memory, used for calculating
      buffer index */
   uword buffer_mem_start;
   uword buffer_mem_size;
-  vlib_physmem_region_index_t physmem_region;
+  vlib_buffer_pool_t *buffer_pools;
 
   /* Buffer free callback, for subversive activities */
     u32 (*buffer_free_callback) (struct vlib_main_t * vm,
@@ -442,8 +451,9 @@
   int callbacks_registered;
 } vlib_buffer_main_t;
 
-void vlib_buffer_add_mem_range (struct vlib_main_t *vm, uword start,
-				uword size);
+u8 vlib_buffer_add_physmem_region (struct vlib_main_t *vm,
+				   vlib_physmem_region_index_t region);
+
 clib_error_t *vlib_buffer_main_init (struct vlib_main_t *vm);
 
 typedef struct
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index d51de6b..7224e08 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -162,7 +162,10 @@
 always_inline u64
 vlib_get_buffer_data_physical_address (vlib_main_t * vm, u32 buffer_index)
 {
-  return vlib_physmem_offset_to_physical (vm, vm->buffer_main->physmem_region,
+  vlib_physmem_region_index_t pri;
+  vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
+  pri = vm->buffer_main->buffer_pools[b->buffer_pool_index].physmem_region;
+  return vlib_physmem_offset_to_physical (vm, pri,
 					  (((uword) buffer_index) <<
 					   CLIB_LOG2_CACHE_LINE_BYTES) +
 					  STRUCT_OFFSET_OF (vlib_buffer_t,
diff --git a/src/vlib/linux/physmem.c b/src/vlib/linux/physmem.c
index 6d3f7c5..dad4ef0 100644
--- a/src/vlib/linux/physmem.c
+++ b/src/vlib/linux/physmem.c
@@ -190,11 +190,6 @@
 					 MHEAP_FLAG_THREAD_SAFE);
     }
 
-  if (flags & VLIB_PHYSMEM_F_HAVE_BUFFERS)
-    {
-      vlib_buffer_add_mem_range (vm, pointer_to_uword (pr->mem), pr->size);
-    }
-
   *idx = pr->index;
 
   goto done;
diff --git a/src/vlib/physmem.h b/src/vlib/physmem.h
index a7fed12..1e053d6 100644
--- a/src/vlib/physmem.h
+++ b/src/vlib/physmem.h
@@ -55,7 +55,6 @@
   void *heap;
   u32 flags;
 #define VLIB_PHYSMEM_F_INIT_MHEAP (1<<0)
-#define VLIB_PHYSMEM_F_HAVE_BUFFERS (1<<1)
 #define VLIB_PHYSMEM_F_FAKE (1<<2)
 
   u8 numa_node;