blob: ee6d7e8def039680b0f5b09e12ec6b62ff8d474a [file] [log] [blame]
/* SPDX-License-Identifier: Apache-2.0
* Copyright (c) 2023 Cisco Systems, Inc.
*/
#include <vlib/vlib.h>
#include <vnet/dev/dev.h>
#include <vnet/ethernet/ethernet.h>
#include <dev_iavf/iavf.h>
#define IAVF_RX_REFILL_TRESHOLD 32
static const iavf_rx_desc_qw1_t mask_eop = { .eop = 1 };
static const iavf_rx_desc_qw1_t mask_flm = { .flm = 1 };
static const iavf_rx_desc_qw1_t mask_dd = { .dd = 1 };
static const iavf_rx_desc_qw1_t mask_ipe = { .ipe = 1 };
static const iavf_rx_desc_qw1_t mask_dd_eop = { .dd = 1, .eop = 1 };
static_always_inline int
iavf_rxd_is_not_eop (iavf_rx_desc_t *d)
{
return (d->qw1.as_u64 & mask_eop.as_u64) == 0;
}
static_always_inline int
iavf_rxd_is_not_dd (iavf_rx_desc_t *d)
{
return (d->qw1.as_u64 & mask_dd.as_u64) == 0;
}
static_always_inline void
iavf_rx_desc_write (iavf_rx_desc_t *d, u64 addr)
{
#ifdef CLIB_HAVE_VEC256
*(u64x4 *) d = (u64x4){ addr, 0, 0, 0 };
#else
d->qword[0] = addr;
d->qword[1] = 0;
#endif
}
static_always_inline void
iavf_rxq_refill (vlib_main_t *vm, vlib_node_runtime_t *node,
vnet_dev_rx_queue_t *rxq, int use_va_dma)
{
u16 n_refill, mask, n_alloc, slot, size;
iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
vlib_buffer_t *b[8];
iavf_rx_desc_t *d, *first_d;
void *p[8];
size = rxq->size;
mask = size - 1;
n_refill = mask - arq->n_enqueued;
if (PREDICT_TRUE (n_refill <= IAVF_RX_REFILL_TRESHOLD))
return;
slot = (arq->next - n_refill - 1) & mask;
n_refill &= ~7; /* round to 8 */
n_alloc = vlib_buffer_alloc_to_ring_from_pool (
vm, arq->buffer_indices, slot, size, n_refill,
vnet_dev_get_rx_queue_buffer_pool_index (rxq));
if (PREDICT_FALSE (n_alloc != n_refill))
{
vlib_error_count (vm, node->node_index, IAVF_RX_NODE_CTR_BUFFER_ALLOC,
1);
if (n_alloc)
vlib_buffer_free_from_ring (vm, arq->buffer_indices, slot, size,
n_alloc);
return;
}
arq->n_enqueued += n_alloc;
first_d = arq->descs;
ASSERT (slot % 8 == 0);
while (n_alloc >= 8)
{
d = first_d + slot;
if (use_va_dma)
{
vlib_get_buffers_with_offset (vm, arq->buffer_indices + slot, p, 8,
sizeof (vlib_buffer_t));
iavf_rx_desc_write (d + 0, pointer_to_uword (p[0]));
iavf_rx_desc_write (d + 1, pointer_to_uword (p[1]));
iavf_rx_desc_write (d + 2, pointer_to_uword (p[2]));
iavf_rx_desc_write (d + 3, pointer_to_uword (p[3]));
iavf_rx_desc_write (d + 4, pointer_to_uword (p[4]));
iavf_rx_desc_write (d + 5, pointer_to_uword (p[5]));
iavf_rx_desc_write (d + 6, pointer_to_uword (p[6]));
iavf_rx_desc_write (d + 7, pointer_to_uword (p[7]));
}
else
{
vlib_get_buffers (vm, arq->buffer_indices + slot, b, 8);
iavf_rx_desc_write (d + 0, vlib_buffer_get_pa (vm, b[0]));
iavf_rx_desc_write (d + 1, vlib_buffer_get_pa (vm, b[1]));
iavf_rx_desc_write (d + 2, vlib_buffer_get_pa (vm, b[2]));
iavf_rx_desc_write (d + 3, vlib_buffer_get_pa (vm, b[3]));
iavf_rx_desc_write (d + 4, vlib_buffer_get_pa (vm, b[4]));
iavf_rx_desc_write (d + 5, vlib_buffer_get_pa (vm, b[5]));
iavf_rx_desc_write (d + 6, vlib_buffer_get_pa (vm, b[6]));
iavf_rx_desc_write (d + 7, vlib_buffer_get_pa (vm, b[7]));
}
/* next */
slot = (slot + 8) & mask;
n_alloc -= 8;
}
/* RXQ can be smaller than 256 packets, especially if jumbo. */
arq->descs[slot].qword[1] = 0;
__atomic_store_n (arq->qrx_tail, slot, __ATOMIC_RELEASE);
}
static_always_inline uword
iavf_rx_attach_tail (vlib_main_t *vm, vlib_buffer_template_t *bt,
vlib_buffer_t *b, u64 qw1, iavf_rx_tail_t *t)
{
vlib_buffer_t *hb = b;
u32 tlnifb = 0, i = 0;
if (qw1 & mask_eop.as_u64)
return 0;
while ((qw1 & mask_eop.as_u64) == 0)
{
ASSERT (i < IAVF_RX_MAX_DESC_IN_CHAIN - 1);
ASSERT (qw1 & mask_dd.as_u64);
qw1 = t->qw1s[i];
b->next_buffer = t->buffers[i];
b->flags |= VLIB_BUFFER_NEXT_PRESENT;
b = vlib_get_buffer (vm, b->next_buffer);
b->template = *bt;
tlnifb += b->current_length = ((iavf_rx_desc_qw1_t) qw1).length;
i++;
}
hb->total_length_not_including_first_buffer = tlnifb;
hb->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
return tlnifb;
}
static_always_inline void
iavf_process_flow_offload (vnet_dev_port_t *port, iavf_rt_data_t *rtd,
uword n_rx_packets)
{
uword n;
iavf_flow_lookup_entry_t fle;
iavf_port_t *ap = vnet_dev_get_port_data (port);
for (n = 0; n < n_rx_packets; n++)
{
if ((rtd->qw1s[n] & mask_flm.as_u64) == 0)
continue;
fle = *pool_elt_at_index (ap->flow_lookup_entries, rtd->flow_ids[n]);
if (fle.next_index != (u16) ~0)
rtd->next[n] = fle.next_index;
if (fle.flow_id != ~0)
rtd->bufs[n]->flow_id = fle.flow_id;
if (fle.buffer_advance != ~0)
vlib_buffer_advance (rtd->bufs[n], fle.buffer_advance);
}
}
static_always_inline uword
iavf_process_rx_burst (vlib_main_t *vm, vlib_node_runtime_t *node,
vnet_dev_rx_queue_t *rxq, iavf_rt_data_t *rtd,
vlib_buffer_template_t *bt, u32 n_left,
int maybe_multiseg)
{
vlib_buffer_t **b = rtd->bufs;
u64 *qw1 = rtd->qw1s;
iavf_rx_tail_t *tail = rtd->tails;
uword n_rx_bytes = 0;
while (n_left >= 4)
{
if (n_left >= 12)
{
vlib_prefetch_buffer_header (b[8], LOAD);
vlib_prefetch_buffer_header (b[9], LOAD);
vlib_prefetch_buffer_header (b[10], LOAD);
vlib_prefetch_buffer_header (b[11], LOAD);
}
b[0]->template = *bt;
b[1]->template = *bt;
b[2]->template = *bt;
b[3]->template = *bt;
n_rx_bytes += b[0]->current_length =
((iavf_rx_desc_qw1_t) qw1[0]).length;
n_rx_bytes += b[1]->current_length =
((iavf_rx_desc_qw1_t) qw1[1]).length;
n_rx_bytes += b[2]->current_length =
((iavf_rx_desc_qw1_t) qw1[2]).length;
n_rx_bytes += b[3]->current_length =
((iavf_rx_desc_qw1_t) qw1[3]).length;
if (maybe_multiseg)
{
n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[0], qw1[0], tail + 0);
n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[1], qw1[1], tail + 1);
n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[2], qw1[2], tail + 2);
n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[3], qw1[3], tail + 3);
}
/* next */
qw1 += 4;
tail += 4;
b += 4;
n_left -= 4;
}
while (n_left)
{
b[0]->template = *bt;
n_rx_bytes += b[0]->current_length =
((iavf_rx_desc_qw1_t) qw1[0]).length;
if (maybe_multiseg)
n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[0], qw1[0], tail + 0);
/* next */
qw1 += 1;
tail += 1;
b += 1;
n_left -= 1;
}
return n_rx_bytes;
}
static_always_inline uword
iavf_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, vnet_dev_port_t *port,
vnet_dev_rx_queue_t *rxq, int with_flows)
{
vnet_main_t *vnm = vnet_get_main ();
u32 thr_idx = vlib_get_thread_index ();
iavf_rt_data_t *rtd = vnet_dev_get_rt_temp_space (vm);
iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
vlib_buffer_template_t bt = rxq->buffer_template;
u32 n_trace, n_rx_packets = 0, n_rx_bytes = 0;
u16 n_tail_desc = 0;
u64 or_qw1 = 0;
u32 *bi, *to_next, n_left_to_next;
u32 next_index = rxq->next_index;
u32 sw_if_index = port->intf.sw_if_index;
u32 hw_if_index = port->intf.hw_if_index;
u16 next = arq->next;
u16 size = rxq->size;
u16 mask = size - 1;
iavf_rx_desc_t *d, *descs = arq->descs;
#ifdef CLIB_HAVE_VEC256
u64x4 q1x4, or_q1x4 = { 0 };
u32x4 fdidx4;
u64x4 dd_eop_mask4 = u64x4_splat (mask_dd_eop.as_u64);
#elif defined(CLIB_HAVE_VEC128)
u32x4 q1x4_lo, q1x4_hi, or_q1x4 = { 0 };
u32x4 fdidx4;
u32x4 dd_eop_mask4 = u32x4_splat (mask_dd_eop.as_u64);
#endif
int single_next = 1;
/* is there anything on the ring */
d = descs + next;
if ((d->qword[1] & mask_dd.as_u64) == 0)
goto done;
vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
/* fetch up to IAVF_RX_VECTOR_SZ from the rx ring, unflatten them and
copy needed data from descriptor to rx vector */
bi = to_next;
while (n_rx_packets < IAVF_RX_VECTOR_SZ)
{
if (next + 11 < size)
{
int stride = 8;
clib_prefetch_load ((void *) (descs + (next + stride)));
clib_prefetch_load ((void *) (descs + (next + stride + 1)));
clib_prefetch_load ((void *) (descs + (next + stride + 2)));
clib_prefetch_load ((void *) (descs + (next + stride + 3)));
}
#ifdef CLIB_HAVE_VEC256
if (n_rx_packets >= IAVF_RX_VECTOR_SZ - 4 || next >= size - 4)
goto one_by_one;
q1x4 = u64x4_gather ((void *) &d[0].qword[1], (void *) &d[1].qword[1],
(void *) &d[2].qword[1], (void *) &d[3].qword[1]);
/* not all packets are ready or at least one of them is chained */
if (!u64x4_is_equal (q1x4 & dd_eop_mask4, dd_eop_mask4))
goto one_by_one;
or_q1x4 |= q1x4;
u64x4_store_unaligned (q1x4, rtd->qw1s + n_rx_packets);
#elif defined(CLIB_HAVE_VEC128)
if (n_rx_packets >= IAVF_RX_VECTOR_SZ - 4 || next >= size - 4)
goto one_by_one;
q1x4_lo =
u32x4_gather ((void *) &d[0].qword[1], (void *) &d[1].qword[1],
(void *) &d[2].qword[1], (void *) &d[3].qword[1]);
/* not all packets are ready or at least one of them is chained */
if (!u32x4_is_equal (q1x4_lo & dd_eop_mask4, dd_eop_mask4))
goto one_by_one;
q1x4_hi = u32x4_gather (
(void *) &d[0].qword[1] + 4, (void *) &d[1].qword[1] + 4,
(void *) &d[2].qword[1] + 4, (void *) &d[3].qword[1] + 4);
or_q1x4 |= q1x4_lo;
rtd->qw1s[n_rx_packets + 0] = (u64) q1x4_hi[0] << 32 | (u64) q1x4_lo[0];
rtd->qw1s[n_rx_packets + 1] = (u64) q1x4_hi[1] << 32 | (u64) q1x4_lo[1];
rtd->qw1s[n_rx_packets + 2] = (u64) q1x4_hi[2] << 32 | (u64) q1x4_lo[2];
rtd->qw1s[n_rx_packets + 3] = (u64) q1x4_hi[3] << 32 | (u64) q1x4_lo[3];
#endif
#if defined(CLIB_HAVE_VEC256) || defined(CLIB_HAVE_VEC128)
if (with_flows)
{
fdidx4 = u32x4_gather (
(void *) &d[0].fdid_flex_hi, (void *) &d[1].fdid_flex_hi,
(void *) &d[2].fdid_flex_hi, (void *) &d[3].fdid_flex_hi);
u32x4_store_unaligned (fdidx4, rtd->flow_ids + n_rx_packets);
}
vlib_buffer_copy_indices (bi, arq->buffer_indices + next, 4);
/* next */
next = (next + 4) & mask;
d = descs + next;
n_rx_packets += 4;
bi += 4;
continue;
one_by_one:
#endif
clib_prefetch_load ((void *) (descs + ((next + 8) & mask)));
if (iavf_rxd_is_not_dd (d))
break;
bi[0] = arq->buffer_indices[next];
/* deal with chained buffers */
if (PREDICT_FALSE (iavf_rxd_is_not_eop (d)))
{
u16 tail_desc = 0;
u16 tail_next = next;
iavf_rx_tail_t *tail = rtd->tails + n_rx_packets;
iavf_rx_desc_t *td;
do
{
tail_next = (tail_next + 1) & mask;
td = descs + tail_next;
/* bail out in case of incomplete transaction */
if (iavf_rxd_is_not_dd (td))
goto no_more_desc;
or_qw1 |= tail->qw1s[tail_desc] = td[0].qword[1];
tail->buffers[tail_desc] = arq->buffer_indices[tail_next];
tail_desc++;
}
while (iavf_rxd_is_not_eop (td));
next = tail_next;
n_tail_desc += tail_desc;
}
or_qw1 |= rtd->qw1s[n_rx_packets] = d[0].qword[1];
if (PREDICT_FALSE (with_flows))
{
rtd->flow_ids[n_rx_packets] = d[0].fdid_flex_hi;
}
/* next */
next = (next + 1) & mask;
d = descs + next;
n_rx_packets++;
bi++;
}
no_more_desc:
if (n_rx_packets == 0)
goto done;
arq->next = next;
arq->n_enqueued -= n_rx_packets + n_tail_desc;
#if defined(CLIB_HAVE_VEC256) || defined(CLIB_HAVE_VEC128)
or_qw1 |= or_q1x4[0] | or_q1x4[1] | or_q1x4[2] | or_q1x4[3];
#endif
vlib_get_buffers (vm, to_next, rtd->bufs, n_rx_packets);
n_rx_bytes =
n_tail_desc ?
iavf_process_rx_burst (vm, node, rxq, rtd, &bt, n_rx_packets, 1) :
iavf_process_rx_burst (vm, node, rxq, rtd, &bt, n_rx_packets, 0);
/* the MARKed packets may have different next nodes */
if (PREDICT_FALSE (with_flows && (or_qw1 & mask_flm.as_u64)))
{
u32 n;
single_next = 0;
for (n = 0; n < n_rx_packets; n++)
rtd->next[n] = next_index;
iavf_process_flow_offload (port, rtd, n_rx_packets);
}
/* packet trace if enabled */
if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node))))
{
u32 n_left = n_rx_packets;
u32 i, j;
u16 *next_indices = rtd->next;
i = 0;
while (n_trace && n_left)
{
vlib_buffer_t *b = rtd->bufs[i];
if (PREDICT_FALSE (single_next == 0))
next_index = next_indices[0];
if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b,
/* follow_chain */ 0)))
{
iavf_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr));
tr->next_index = next_index;
tr->qid = rxq->queue_id;
tr->hw_if_index = hw_if_index;
tr->qw1s[0] = rtd->qw1s[i];
tr->flow_id =
(tr->qw1s[0] & mask_flm.as_u64) ? rtd->flow_ids[i] : 0;
for (j = 1; j < IAVF_RX_MAX_DESC_IN_CHAIN; j++)
tr->qw1s[j] = rtd->tails[i].qw1s[j - 1];
n_trace--;
}
/* next */
n_left--;
i++;
next_indices++;
}
vlib_set_trace_count (vm, node, n_trace);
}
/* enqueu the packets to the next nodes */
if (PREDICT_FALSE (with_flows && (or_qw1 & mask_flm.as_u64)))
{
/* release next node's frame vector, in this case we use
vlib_buffer_enqueue_to_next to place the packets
*/
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
/* enqueue buffers to the next node */
vlib_buffer_enqueue_to_next (vm, node, to_next, rtd->next, n_rx_packets);
}
else
{
if (PREDICT_TRUE (next_index == VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT))
{
vlib_next_frame_t *nf;
vlib_frame_t *f;
ethernet_input_frame_t *ef;
nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
f = vlib_get_frame (vm, nf->frame);
f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
ef = vlib_frame_scalar_args (f);
ef->sw_if_index = sw_if_index;
ef->hw_if_index = hw_if_index;
if ((or_qw1 & mask_ipe.as_u64) == 0)
f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
vlib_frame_no_append (f);
}
n_left_to_next -= n_rx_packets;
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
vlib_increment_combined_counter (
vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
thr_idx, hw_if_index, n_rx_packets, n_rx_bytes);
done:
return n_rx_packets;
}
VNET_DEV_NODE_FN (iavf_rx_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
u32 n_rx = 0;
foreach_vnet_dev_rx_queue_runtime (rxq, node)
{
vnet_dev_port_t *port = rxq->port;
iavf_port_t *ap = vnet_dev_get_port_data (port);
if (PREDICT_FALSE (ap->flow_offload))
n_rx += iavf_device_input_inline (vm, node, frame, port, rxq, 1);
else
n_rx += iavf_device_input_inline (vm, node, frame, port, rxq, 0);
/* refill rx ring */
if (rxq->port->dev->va_dma)
iavf_rxq_refill (vm, node, rxq, 1 /* use_va_dma */);
else
iavf_rxq_refill (vm, node, rxq, 0 /* use_va_dma */);
}
return n_rx;
}