libmemif: zero-copy-slave mode + header space
Slave is now able to dequeue buffers from rx queue and enqueue them to tx queue
(zero-copy operation). Slave can produce buffers with headroom, which will allow adding
encap without copy.
Change-Id: Ia189f8de1a68be787545ed46cf78d36403e7e9bf
Signed-off-by: Jakub Grajciar <jgrajcia@cisco.com>
diff --git a/extras/libmemif/src/main.c b/extras/libmemif/src/main.c
index dc4a623..5645851 100644
--- a/extras/libmemif/src/main.c
+++ b/extras/libmemif/src/main.c
@@ -144,7 +144,7 @@
/* MEMIF_ERR_POLL_CANCEL */
"Memif event polling was canceled.",
/* MEMIF_ERR_MAX_RING */
- "Maximum log2 ring size is 15.",
+ "Maximum log2 ring size is 15",
/* MEMIF_ERR_PRIVHDR */
"Private headers not supported."
};
@@ -1324,7 +1324,6 @@
memif_init_regions_and_queues (memif_connection_t * conn)
{
memif_ring_t *ring = NULL;
- uint64_t buffer_offset;
memif_region_t *r;
int i, j;
libmemif_main_t *lm = &libmemif_main;
@@ -1335,18 +1334,19 @@
return MEMIF_ERR_NOMEM;
r = conn->regions;
- buffer_offset =
+ r->buffer_offset =
(conn->run_args.num_s2m_rings +
conn->run_args.num_m2s_rings) * (sizeof (memif_ring_t) +
sizeof (memif_desc_t) *
(1 << conn->run_args.log2_ring_size));
- r->region_size = buffer_offset +
+ r->region_size = r->buffer_offset +
conn->run_args.buffer_size * (1 << conn->run_args.log2_ring_size) *
(conn->run_args.num_s2m_rings + conn->run_args.num_m2s_rings);
if ((r->fd =
memfd_create ("memif region 0", MFD_ALLOW_SEALING)) == -1)
+ if ((r->fd = memfd_create ("memif region 0", MFD_ALLOW_SEALING)) == -1)
return memif_syscall_error_handler (errno);
if ((fcntl (r->fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1)
@@ -1370,7 +1370,7 @@
{
uint16_t slot = i * (1 << conn->run_args.log2_ring_size) + j;
ring->desc[j].region = 0;
- ring->desc[j].offset = buffer_offset +
+ ring->desc[j].offset = r->buffer_offset +
(uint32_t) (slot * conn->run_args.buffer_size);
ring->desc[j].length = conn->run_args.buffer_size;
}
@@ -1389,7 +1389,7 @@
conn->run_args.num_s2m_rings) *
(1 << conn->run_args.log2_ring_size) + j;
ring->desc[j].region = 0;
- ring->desc[j].offset = buffer_offset +
+ ring->desc[j].offset = r->buffer_offset +
(uint32_t) (slot * conn->run_args.buffer_size);
ring->desc[j].length = conn->run_args.buffer_size;
}
@@ -1417,7 +1417,6 @@
mq[x].offset =
(void *) mq[x].ring - (void *) conn->regions[mq->region].shm;
mq[x].last_head = 0;
- mq[x].last_tail = 0;
mq[x].alloc_bufs = 0;
}
conn->tx_queues = mq;
@@ -1443,7 +1442,6 @@
mq[x].offset =
(void *) mq[x].ring - (void *) conn->regions[mq->region].shm;
mq[x].last_head = 0;
- mq[x].last_tail = 0;
mq[x].alloc_bufs = 0;
}
conn->rx_queues = mq;
@@ -1452,6 +1450,87 @@
}
int
+memif_buffer_enq_tx (memif_conn_handle_t conn, uint16_t qid,
+ memif_buffer_t * bufs, uint16_t count,
+ uint16_t * count_out)
+{
+ memif_connection_t *c = (memif_connection_t *) conn;
+ if (EXPECT_FALSE (c == NULL))
+ return MEMIF_ERR_NOCONN;
+ if (EXPECT_FALSE (c->fd < 0))
+ return MEMIF_ERR_DISCONNECTED;
+ uint8_t num =
+ (c->args.is_master) ? c->run_args.num_m2s_rings : c->run_args.
+ num_s2m_rings;
+ if (EXPECT_FALSE (qid >= num))
+ return MEMIF_ERR_QID;
+ if (EXPECT_FALSE (!count_out))
+ return MEMIF_ERR_INVAL_ARG;
+ if (EXPECT_FALSE (c->args.is_master))
+ return MEMIF_ERR_INVAL_ARG;
+
+ memif_queue_t *mq = &c->tx_queues[qid];
+ memif_ring_t *ring = mq->ring;
+ memif_buffer_t *b0;
+ uint16_t mask = (1 << mq->log2_ring_size) - 1;
+ uint16_t ring_size;
+ uint16_t slot, ns;
+ int i, err = MEMIF_ERR_SUCCESS; /* 0 */
+ *count_out = 0;
+
+ ring_size = (1 << mq->log2_ring_size);
+ ns = ring->tail - mq->last_tail;
+ mq->last_tail += ns;
+ slot = (c->args.is_master) ? ring->tail : ring->head;
+ slot += mq->alloc_bufs;
+
+ /* can only be called by slave */
+ ns = ring_size - ring->head + mq->alloc_bufs + mq->last_tail;
+
+ b0 = bufs;
+
+ while (count && ns)
+ {
+ if (EXPECT_FALSE ((b0->flags & MEMIF_BUFFER_FLAG_RX) == 0))
+ {
+ /* not a valid buffer */
+ count--;
+ continue;
+ }
+ b0->flags &= ~MEMIF_BUFFER_FLAG_RX;
+
+ ((memif_ring_t *) b0->ring)->desc[b0->desc_index & mask].offset = ring->desc[slot & mask].offset; /* put free buffer on rx ring */
+
+ ring->desc[slot & mask].offset =
+ (uint32_t) (b0->data - c->regions->shm);
+ ring->desc[slot & mask].flags |=
+ (b0->flags & MEMIF_BUFFER_FLAG_NEXT) ? MEMIF_DESC_FLAG_NEXT : 0;
+
+ b0->desc_index = slot;
+
+ mq->alloc_bufs++;
+ slot++;
+
+ count--;
+ ns--;
+ b0++;
+ *count_out += 1;
+ }
+
+ DBG ("allocated: %u/%u bufs. Total %u allocated bufs", *count_out, count,
+ mq->alloc_bufs);
+
+ if (count)
+ {
+ DBG ("ring buffer full! qid: %u", qid);
+ err = MEMIF_ERR_NOBUF_RING;
+ }
+
+error:
+ return err;
+}
+
+int
memif_buffer_alloc (memif_conn_handle_t conn, uint16_t qid,
memif_buffer_t * bufs, uint16_t count,
uint16_t * count_out, uint16_t size)
@@ -1503,12 +1582,13 @@
ring->desc[slot & mask].flags = 0;
/* slave can produce buffer with original length */
- dst_left = (c->args.is_master) ? ring->desc[slot & mask].length : c->run_args.buffer_size; /* - headroom */
+ dst_left = (c->args.is_master) ? ring->desc[slot & mask].length :
+ c->run_args.buffer_size;
src_left = size;
while (src_left)
{
- if (dst_left == 0)
+ if (EXPECT_FALSE (dst_left == 0))
{
if (count && ns)
{
@@ -1524,7 +1604,9 @@
b0 = (bufs + *count_out);
b0->desc_index = slot;
- dst_left = (c->args.is_master) ? ring->desc[slot & mask].length : c->run_args.buffer_size; /* - headroom */
+ dst_left =
+ (c->args.is_master) ? ring->desc[slot & mask].length : c->
+ run_args.buffer_size;
ring->desc[slot & mask].flags = 0;
}
else
@@ -1538,7 +1620,18 @@
}
}
b0->len = memif_min (dst_left, src_left);
- b0->data = memif_get_buffer (c, ring, slot & mask);
+
+ /* slave resets buffer offset */
+ if (c->args.is_master == 0)
+ {
+ uint16_t x =
+ (ring->desc[slot & mask].offset -
+ c->regions->buffer_offset) / c->run_args.buffer_size;
+ ring->desc[slot & mask].offset =
+ c->regions->buffer_offset + (x * c->run_args.buffer_size);
+ }
+
+ b0->data = c->regions->shm + ring->desc[slot & mask].offset;
src_left -= b0->len;
dst_left -= b0->len;
@@ -1562,11 +1655,13 @@
err = MEMIF_ERR_NOBUF_RING;
}
+error:
return err;
}
int
-memif_refill_queue (memif_conn_handle_t conn, uint16_t qid, uint16_t count)
+memif_refill_queue (memif_conn_handle_t conn, uint16_t qid, uint16_t count,
+ uint16_t headroom)
{
memif_connection_t *c = (memif_connection_t *) conn;
if (EXPECT_FALSE (c == NULL))
@@ -1581,6 +1676,8 @@
libmemif_main_t *lm = &libmemif_main;
memif_queue_t *mq = &c->rx_queues[qid];
memif_ring_t *ring = mq->ring;
+ uint16_t mask = (1 << mq->log2_ring_size) - 1;
+ uint16_t slot;
if (c->args.is_master)
{
@@ -1588,16 +1685,32 @@
ring->tail =
(ring->tail + count <=
mq->last_head) ? ring->tail + count : mq->last_head;
+ return MEMIF_ERR_SUCCESS;
}
- else
+
+ uint16_t head = ring->head;
+ uint16_t ns = (1 << mq->log2_ring_size) - head + mq->last_tail;
+ head += (count < ns) ? count : ns;
+
+ if (headroom)
{
- uint16_t head = ring->head;
- uint16_t ns = (1 << mq->log2_ring_size) - head + mq->last_tail;
- head += ns;
- MEMIF_MEMORY_BARRIER ();
- ring->head = (ring->head + count <= head) ? ring->head + count : head;
+ slot = (c->args.is_master) ? ring->head : ring->tail;
+ while (slot < head)
+ {
+ uint16_t x =
+ (ring->desc[slot & mask].offset -
+ c->regions->buffer_offset) / c->run_args.buffer_size;
+ ring->desc[slot & mask].offset =
+ c->regions->buffer_offset + (x * c->run_args.buffer_size) +
+ headroom;
+
+ slot++;
+ }
}
+ MEMIF_MEMORY_BARRIER ();
+ ring->head = head;
+
return MEMIF_ERR_SUCCESS; /* 0 */
}
@@ -1620,32 +1733,40 @@
memif_queue_t *mq = &c->tx_queues[qid];
memif_ring_t *ring = mq->ring;
- uint16_t slot;
-
- slot = (c->args.is_master) ? ring->tail : ring->head;
- *tx = (count <= mq->alloc_bufs) ? count : mq->alloc_bufs;
-
-#ifdef MEMIF_DBG_SHM
- uint16_t curr_buf = 0;
uint16_t mask = (1 << mq->log2_ring_size) - 1;
memif_buffer_t *b0;
- for (curr_buf = 0; curr_buf < count; curr_buf++)
+ *tx = 0;
+
+ if (count > mq->alloc_bufs)
+ count = mq->alloc_bufs;
+
+ if (EXPECT_FALSE (count == 0))
+ return MEMIF_ERR_SUCCESS;
+
+ while (count)
{
- b0 = (bufs + curr_buf);
+ b0 = (bufs + *tx);
+ ring->desc[b0->desc_index & mask].length = b0->len;
+
+#ifdef MEMIF_DBG_SHM
+ printf ("offset: %-6d\n", ring->desc[b0->desc_index & mask].offset);
+ printf ("data: %p\n",
+ memif_get_buffer (c, ring, b0->desc_index & mask));
+ printf ("index: %u\n", b0->desc_index);
print_bytes (memif_get_buffer (c, ring, b0->desc_index & mask),
ring->desc[b0->desc_index & mask].length, DBG_TX_BUF);
-
- }
#endif /* MEMIF_DBG_SHM */
+ *tx += 1;
+ count--;
+ }
+
+
MEMIF_MEMORY_BARRIER ();
if (c->args.is_master)
- ring->tail = slot + *tx;
+ ring->tail = b0->desc_index + 1;
else
- ring->head = slot + *tx;
-
- /* zero out buffer fields so the client cant modify transmitted data */
- memset (bufs, 0, sizeof (memif_buffer_t) * *tx);
+ ring->head = b0->desc_index + 1;
mq->alloc_bufs -= *tx;
@@ -1687,7 +1808,7 @@
uint64_t b;
ssize_t r = read (mq->int_fd, &b, sizeof (b));
- if ((r == -1) && (errno != EAGAIN))
+ if (EXPECT_FALSE ((r == -1) && (errno != EAGAIN)))
return memif_syscall_error_handler (errno);
cur_slot = (c->args.is_master) ? mq->last_head : mq->last_tail;
@@ -1709,13 +1830,19 @@
{
ring->desc[cur_slot & mask].length = c->run_args.buffer_size;
}
+
+ b0->flags = MEMIF_BUFFER_FLAG_RX;
if (ring->desc[cur_slot & mask].flags & MEMIF_DESC_FLAG_NEXT)
{
- b0->flags = MEMIF_BUFFER_FLAG_NEXT;
+ b0->flags |= MEMIF_BUFFER_FLAG_NEXT;
ring->desc[cur_slot & mask].flags &= ~MEMIF_DESC_FLAG_NEXT;
}
-
+/* b0->offset = ring->desc[cur_slot & mask].offset;*/
+ b0->ring = ring;
#ifdef MEMIF_DBG_SHM
+ printf ("data: %p\n", b0->data);
+ printf ("index: %u\n", b0->desc_index);
+ printf ("ring: %p\n", b0->ring);
print_bytes (b0->data, b0->len, DBG_RX_BUF);
#endif /* MEMIF_DBG_SHM */
ns--;