src/plugins/dev_octeon/rx_node.c - fdio/vpp - Gitiles

 /* SPDX-License-Identifier: Apache-2.0
  * Copyright (c) 2023 Cisco Systems, Inc.
  */

 #include <vlib/vlib.h>
 #include <vnet/dev/dev.h>
 #include <vnet/ethernet/ethernet.h>
 #include <dev_octeon/octeon.h>
 #include <dev_octeon/hw_defs.h>

 typedef struct
 {
   u32 next_index;
   u32 sw_if_index;
   u32 hw_if_index;
   u32 trace_count;
   u32 n_traced;
   oct_nix_rx_cqe_desc_t *next_desc;
   u64 parse_w0_or;
   u32 n_left_to_next;
   u32 *to_next;
   u32 n_rx_pkts;
   u32 n_rx_bytes;
   u32 n_segs;
 } oct_rx_node_ctx_t;

 static_always_inline vlib_buffer_t *
 oct_seg_to_bp (void *p)
 {
   return (vlib_buffer_t *) p - 1;
 }

 static_always_inline void
 oct_rx_attach_tail (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, vlib_buffer_t *h,
 		    oct_nix_rx_cqe_desc_t *d)
 {
   u32 tail_sz = 0, n_tail_segs = 0;
   vlib_buffer_t *p, *b;
   u8 segs0 = d->sg0.segs, segs1 = 0;

   if (segs0 < 2)
     return;

   b = oct_seg_to_bp (d->segs0[1]);
   h->next_buffer = vlib_get_buffer_index (vm, b);
   tail_sz += b->current_length = d->sg0.seg2_size;
   n_tail_segs++;

   if (segs0 == 2)
     goto done;

   p = b;
   p->flags = VLIB_BUFFER_NEXT_PRESENT;
   b = oct_seg_to_bp (d->segs0[2]);
   p->next_buffer = vlib_get_buffer_index (vm, b);
   tail_sz += b->current_length = d->sg0.seg3_size;
   n_tail_segs++;

   if (d->sg1.subdc != NIX_SUBDC_SG)
     goto done;

   segs1 = d->sg1.segs;
   if (segs1 == 0)
     goto done;

   p = b;
   p->flags = VLIB_BUFFER_NEXT_PRESENT;
   b = oct_seg_to_bp (d->segs1[0]);
   p->next_buffer = vlib_get_buffer_index (vm, b);
   tail_sz += b->current_length = d->sg1.seg1_size;
   n_tail_segs++;

   if (segs1 == 1)
     goto done;

   p = b;
   p->flags = VLIB_BUFFER_NEXT_PRESENT;
   b = oct_seg_to_bp (d->segs1[1]);
   p->next_buffer = vlib_get_buffer_index (vm, b);
   tail_sz += b->current_length = d->sg1.seg2_size;
   n_tail_segs++;

   if (segs1 == 2)
     goto done;

   p = b;
   p->flags = VLIB_BUFFER_NEXT_PRESENT;
   b = oct_seg_to_bp (d->segs1[2]);
   p->next_buffer = vlib_get_buffer_index (vm, b);
   tail_sz += b->current_length = d->sg1.seg3_size;
   n_tail_segs++;

 done:
   b->flags = 0;
   h->total_length_not_including_first_buffer = tail_sz;
   h->flags |= VLIB_BUFFER_NEXT_PRESENT | VLIB_BUFFER_TOTAL_LENGTH_VALID;
   ctx->n_rx_bytes += tail_sz;
   ctx->n_segs += n_tail_segs;
 }

 static_always_inline u32
 oct_rx_batch (vlib_main_t *vm, oct_rx_node_ctx_t *ctx,
 	      vnet_dev_rx_queue_t *rxq, u32 n)
 {
   oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
   vlib_buffer_template_t bt = rxq->buffer_template;
   u32 n_left;
   oct_nix_rx_cqe_desc_t *d = ctx->next_desc;
   vlib_buffer_t *b[4];

   for (n_left = n; n_left >= 8; d += 4, n_left -= 4, ctx->to_next += 4)
     {
       u32 segs = 0;
       clib_prefetch_store (oct_seg_to_bp (d[4].segs0[0]));
       clib_prefetch_store (oct_seg_to_bp (d[5].segs0[0]));
       b[0] = oct_seg_to_bp (d[0].segs0[0]);
       clib_prefetch_store (oct_seg_to_bp (d[6].segs0[0]));
       b[1] = oct_seg_to_bp (d[1].segs0[0]);
       clib_prefetch_store (oct_seg_to_bp (d[7].segs0[0]));
       b[2] = oct_seg_to_bp (d[2].segs0[0]);
       b[3] = oct_seg_to_bp (d[3].segs0[0]);
       ctx->to_next[0] = vlib_get_buffer_index (vm, b[0]);
       ctx->to_next[1] = vlib_get_buffer_index (vm, b[1]);
       ctx->to_next[2] = vlib_get_buffer_index (vm, b[2]);
       ctx->to_next[3] = vlib_get_buffer_index (vm, b[3]);
       b[0]->template = bt;
       b[1]->template = bt;
       b[2]->template = bt;
       b[3]->template = bt;
       ctx->n_rx_bytes += b[0]->current_length = d[0].sg0.seg1_size;
       ctx->n_rx_bytes += b[1]->current_length = d[1].sg0.seg1_size;
       ctx->n_rx_bytes += b[2]->current_length = d[2].sg0.seg1_size;
       ctx->n_rx_bytes += b[3]->current_length = d[3].sg0.seg1_size;
       ctx->n_segs += 4;
       segs = d[0].sg0.segs + d[1].sg0.segs + d[2].sg0.segs + d[3].sg0.segs;

       if (PREDICT_FALSE (segs > 4))
 	{
 	  oct_rx_attach_tail (vm, ctx, b[0], d + 0);
 	  oct_rx_attach_tail (vm, ctx, b[1], d + 1);
 	  oct_rx_attach_tail (vm, ctx, b[2], d + 2);
 	  oct_rx_attach_tail (vm, ctx, b[3], d + 3);
 	}
     }

   for (; n_left; d += 1, n_left -= 1, ctx->to_next += 1)
     {
       b[0] = (vlib_buffer_t *) d->segs0[0] - 1;
       ctx->to_next[0] = vlib_get_buffer_index (vm, b[0]);
       b[0]->template = bt;
       ctx->n_rx_bytes += b[0]->current_length = d[0].sg0.seg1_size;
       ctx->n_segs += 1;
       if (d[0].sg0.segs > 1)
 	oct_rx_attach_tail (vm, ctx, b[0], d + 0);
     }

   plt_write64 ((crq->cq.wdata | n), crq->cq.door);
   ctx->n_rx_pkts += n;
   ctx->n_left_to_next -= n;
   return n;
 }

 static_always_inline void
 oct_rxq_refill_batch (vlib_main_t *vm, u64 lmt_id, u64 addr,
 		      oct_npa_lf_aura_batch_free_line_t *lines, u32 *bi,
 		      oct_npa_lf_aura_batch_free0_t w0, u64 n_lines)
 {
   u64 data;

   for (u32 i = 0; i < n_lines; i++, bi += 15)
     {
       lines[i].w0 = w0;
       vlib_get_buffers (vm, bi, (vlib_buffer_t **) lines[i].data, 15);
     }

   data = lmt_id | ((n_lines - 1) << 12) | ((1ULL << (n_lines * 3)) - 1) << 19;
   roc_lmt_submit_steorl (data, addr);

   /* Data Store Memory Barrier - outer shareable domain */
   asm volatile("dmb oshst" ::: "memory");
 }

 static_always_inline u32
 oct_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, u16 n_refill)
 {
   const u32 batch_max_lines = 16;
   const u32 bufs_per_line = 15;
   const u32 batch_max_bufs = 15 * 16;

   u32 batch_bufs, n_lines, n_alloc;
   u32 buffer_indices[batch_max_bufs];
   u64 lmt_addr, lmt_id, addr, n_enq = 0;
   u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq);
   oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
   oct_npa_lf_aura_batch_free_line_t *lines;

   if (n_refill < bufs_per_line)
     return 0;

   n_lines = n_refill / bufs_per_line;

   addr = crq->aura_batch_free_ioaddr;
   lmt_addr = crq->lmt_base_addr;
   lmt_id = vm->thread_index << ROC_LMT_LINES_PER_CORE_LOG2;
   lmt_addr += lmt_id << ROC_LMT_LINE_SIZE_LOG2;
   lines = (oct_npa_lf_aura_batch_free_line_t *) lmt_addr;

   oct_npa_lf_aura_batch_free0_t w0 = {
     .aura = roc_npa_aura_handle_to_aura (crq->aura_handle),
     .count_eot = 1,
   };

   while (n_lines >= batch_max_lines)
     {
       n_alloc = vlib_buffer_alloc (vm, buffer_indices, batch_max_bufs);
       if (PREDICT_FALSE (n_alloc < batch_max_bufs))
 	goto alloc_fail;
       oct_rxq_refill_batch (vm, lmt_id, addr, lines, buffer_indices, w0,
 			    batch_max_lines);
       n_lines -= batch_max_lines;
       n_enq += batch_max_bufs;
     }

   if (n_lines == 0)
     return n_enq;

   batch_bufs = n_lines * bufs_per_line;
   n_alloc = vlib_buffer_alloc_from_pool (vm, buffer_indices, batch_bufs, bpi);

   if (PREDICT_FALSE (n_alloc < batch_bufs))
     {
     alloc_fail:
       if (n_alloc >= bufs_per_line)
 	{
 	  u32 n_unalloc;
 	  n_lines = n_alloc / bufs_per_line;
 	  batch_bufs = n_lines * bufs_per_line;
 	  n_unalloc = n_alloc - batch_bufs;

 	  if (n_unalloc)
 	    vlib_buffer_unalloc_to_pool (vm, buffer_indices + batch_bufs,
 					 n_unalloc, bpi);
 	}
       else
 	{
 	  if (n_alloc)
 	    vlib_buffer_unalloc_to_pool (vm, buffer_indices, n_alloc, bpi);
 	  return n_enq;
 	}
     }

   oct_rxq_refill_batch (vm, lmt_id, addr, lines, buffer_indices, w0, n_lines);
   n_enq += batch_bufs;

   return n_enq;
 }

 static_always_inline void
 oct_rx_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
 	      oct_rx_node_ctx_t *ctx, oct_nix_rx_cqe_desc_t *d, u32 n_desc)
 {
   u32 i = 0;
   if (PREDICT_TRUE (ctx->trace_count == 0))
     return;

   while (ctx->n_traced < ctx->trace_count && i < n_desc)
     {
       vlib_buffer_t *b = (vlib_buffer_t *) d[i].segs0[0] - 1;

       if (PREDICT_TRUE (vlib_trace_buffer (vm, node, ctx->next_index, b,
 					   /* follow_chain */ 0)))
 	{
 	  oct_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr));
 	  tr->next_index = ctx->next_index;
 	  tr->sw_if_index = ctx->sw_if_index;
 	  tr->desc = d[i];
 	  ctx->n_traced++;
 	}
       i++;
     }
 }

 static_always_inline uword
 oct_rx_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
 		    vlib_frame_t *frame, vnet_dev_port_t *port,
 		    vnet_dev_rx_queue_t *rxq, int with_flows)
 {
   vnet_main_t *vnm = vnet_get_main ();
   u32 thr_idx = vlib_get_thread_index ();
   oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
   u32 n_desc, head, n, n_enq;
   u32 cq_size = crq->cq.nb_desc;
   u32 cq_mask = crq->cq.qmask;
   oct_nix_rx_cqe_desc_t *descs = crq->cq.desc_base;
   oct_nix_lf_cq_op_status_t status;
   oct_rx_node_ctx_t _ctx = {
     .next_index = rxq->next_index,
     .sw_if_index = port->intf.sw_if_index,
     .hw_if_index = port->intf.hw_if_index,
   }, *ctx = &_ctx;

   /* get head and tail from NIX_LF_CQ_OP_STATUS */
   status.as_u64 = roc_atomic64_add_sync (crq->cq.wdata, crq->cq.status);
   if (status.cq_err || status.op_err)
     return 0;

   head = status.head;
   n_desc = (status.tail - head) & cq_mask;

   if (n_desc == 0)
     return 0;

   vlib_get_new_next_frame (vm, node, ctx->next_index, ctx->to_next,
 			   ctx->n_left_to_next);

   ctx->trace_count = vlib_get_trace_count (vm, node);

   while (1)
     {
       ctx->next_desc = descs + head;
       n = clib_min (cq_size - head, clib_min (n_desc, ctx->n_left_to_next));
       n = oct_rx_batch (vm, ctx, rxq, n);
       oct_rx_trace (vm, node, ctx, descs + head, n);

       if (ctx->n_left_to_next == 0)
 	break;

       status.as_u64 = roc_atomic64_add_sync (crq->cq.wdata, crq->cq.status);
       if (status.cq_err || status.op_err)
 	break;

       head = status.head;
       n_desc = (status.tail - head) & cq_mask;
       if (n_desc == 0)
 	break;
     }

   if (ctx->n_traced)
     vlib_set_trace_count (vm, node, ctx->trace_count - ctx->n_traced);

   if (PREDICT_TRUE (ctx->next_index == VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT))
     {
       vlib_next_frame_t *nf;
       vlib_frame_t *f;
       ethernet_input_frame_t *ef;
       oct_nix_rx_parse_t p = { .w[0] = ctx->parse_w0_or };
       nf = vlib_node_runtime_get_next_frame (vm, node, ctx->next_index);
       f = vlib_get_frame (vm, nf->frame);
       f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;

       ef = vlib_frame_scalar_args (f);
       ef->sw_if_index = ctx->sw_if_index;
       ef->hw_if_index = ctx->hw_if_index;

       if (p.f.errcode == 0 && p.f.errlev == 0)
 	f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;

       vlib_frame_no_append (f);
     }

   vlib_put_next_frame (vm, node, ctx->next_index, ctx->n_left_to_next);

   n_enq = crq->n_enq - ctx->n_segs;
   n_enq += oct_rxq_refill (vm, rxq, rxq->size - n_enq);
   crq->n_enq = n_enq;

   vlib_increment_combined_counter (
     vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
     thr_idx, ctx->hw_if_index, ctx->n_rx_pkts, ctx->n_rx_bytes);

   return ctx->n_rx_pkts;
 }

 VNET_DEV_NODE_FN (oct_rx_node)
 (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
 {
   u32 n_rx = 0;
   foreach_vnet_dev_rx_queue_runtime (rxq, node)
     {
       vnet_dev_port_t *port = rxq->port;
       n_rx += oct_rx_node_inline (vm, node, frame, port, rxq, 0);
     }

   return n_rx;
 }
	/* SPDX-License-Identifier: Apache-2.0
	* Copyright (c) 2023 Cisco Systems, Inc.
	*/

	#include <vlib/vlib.h>
	#include <vnet/dev/dev.h>
	#include <vnet/ethernet/ethernet.h>
	#include <dev_octeon/octeon.h>
	#include <dev_octeon/hw_defs.h>

	typedef struct
	{
	u32 next_index;
	u32 sw_if_index;
	u32 hw_if_index;
	u32 trace_count;
	u32 n_traced;
	oct_nix_rx_cqe_desc_t *next_desc;
	u64 parse_w0_or;
	u32 n_left_to_next;
	u32 *to_next;
	u32 n_rx_pkts;
	u32 n_rx_bytes;
	u32 n_segs;
	} oct_rx_node_ctx_t;

	static_always_inline vlib_buffer_t *
	oct_seg_to_bp (void *p)
	{
	return (vlib_buffer_t *) p - 1;
	}

	static_always_inline void
	oct_rx_attach_tail (vlib_main_t vm, oct_rx_node_ctx_t ctx, vlib_buffer_t *h,
	oct_nix_rx_cqe_desc_t *d)
	{
	u32 tail_sz = 0, n_tail_segs = 0;
	vlib_buffer_t p, b;
	u8 segs0 = d->sg0.segs, segs1 = 0;

	if (segs0 < 2)
	return;

	b = oct_seg_to_bp (d->segs0[1]);
	h->next_buffer = vlib_get_buffer_index (vm, b);
	tail_sz += b->current_length = d->sg0.seg2_size;
	n_tail_segs++;

	if (segs0 == 2)
	goto done;

	p = b;
	p->flags = VLIB_BUFFER_NEXT_PRESENT;
	b = oct_seg_to_bp (d->segs0[2]);
	p->next_buffer = vlib_get_buffer_index (vm, b);
	tail_sz += b->current_length = d->sg0.seg3_size;
	n_tail_segs++;

	if (d->sg1.subdc != NIX_SUBDC_SG)
	goto done;

	segs1 = d->sg1.segs;
	if (segs1 == 0)
	goto done;

	p = b;
	p->flags = VLIB_BUFFER_NEXT_PRESENT;
	b = oct_seg_to_bp (d->segs1[0]);
	p->next_buffer = vlib_get_buffer_index (vm, b);
	tail_sz += b->current_length = d->sg1.seg1_size;
	n_tail_segs++;

	if (segs1 == 1)
	goto done;

	p = b;
	p->flags = VLIB_BUFFER_NEXT_PRESENT;
	b = oct_seg_to_bp (d->segs1[1]);
	p->next_buffer = vlib_get_buffer_index (vm, b);
	tail_sz += b->current_length = d->sg1.seg2_size;
	n_tail_segs++;

	if (segs1 == 2)
	goto done;

	p = b;
	p->flags = VLIB_BUFFER_NEXT_PRESENT;
	b = oct_seg_to_bp (d->segs1[2]);
	p->next_buffer = vlib_get_buffer_index (vm, b);
	tail_sz += b->current_length = d->sg1.seg3_size;
	n_tail_segs++;

	done:
	b->flags = 0;
	h->total_length_not_including_first_buffer = tail_sz;
	h->flags \|= VLIB_BUFFER_NEXT_PRESENT \| VLIB_BUFFER_TOTAL_LENGTH_VALID;
	ctx->n_rx_bytes += tail_sz;
	ctx->n_segs += n_tail_segs;
	}

	static_always_inline u32
	oct_rx_batch (vlib_main_t vm, oct_rx_node_ctx_t ctx,
	vnet_dev_rx_queue_t *rxq, u32 n)
	{
	oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
	vlib_buffer_template_t bt = rxq->buffer_template;
	u32 n_left;
	oct_nix_rx_cqe_desc_t *d = ctx->next_desc;
	vlib_buffer_t *b[4];

	for (n_left = n; n_left >= 8; d += 4, n_left -= 4, ctx->to_next += 4)
	{
	u32 segs = 0;
	clib_prefetch_store (oct_seg_to_bp (d[4].segs0[0]));
	clib_prefetch_store (oct_seg_to_bp (d[5].segs0[0]));
	b[0] = oct_seg_to_bp (d[0].segs0[0]);
	clib_prefetch_store (oct_seg_to_bp (d[6].segs0[0]));
	b[1] = oct_seg_to_bp (d[1].segs0[0]);
	clib_prefetch_store (oct_seg_to_bp (d[7].segs0[0]));
	b[2] = oct_seg_to_bp (d[2].segs0[0]);
	b[3] = oct_seg_to_bp (d[3].segs0[0]);
	ctx->to_next[0] = vlib_get_buffer_index (vm, b[0]);
	ctx->to_next[1] = vlib_get_buffer_index (vm, b[1]);
	ctx->to_next[2] = vlib_get_buffer_index (vm, b[2]);
	ctx->to_next[3] = vlib_get_buffer_index (vm, b[3]);
	b[0]->template = bt;
	b[1]->template = bt;
	b[2]->template = bt;
	b[3]->template = bt;
	ctx->n_rx_bytes += b[0]->current_length = d[0].sg0.seg1_size;
	ctx->n_rx_bytes += b[1]->current_length = d[1].sg0.seg1_size;
	ctx->n_rx_bytes += b[2]->current_length = d[2].sg0.seg1_size;
	ctx->n_rx_bytes += b[3]->current_length = d[3].sg0.seg1_size;
	ctx->n_segs += 4;
	segs = d[0].sg0.segs + d[1].sg0.segs + d[2].sg0.segs + d[3].sg0.segs;

	if (PREDICT_FALSE (segs > 4))
	{
	oct_rx_attach_tail (vm, ctx, b[0], d + 0);
	oct_rx_attach_tail (vm, ctx, b[1], d + 1);
	oct_rx_attach_tail (vm, ctx, b[2], d + 2);
	oct_rx_attach_tail (vm, ctx, b[3], d + 3);
	}
	}

	for (; n_left; d += 1, n_left -= 1, ctx->to_next += 1)
	{
	b[0] = (vlib_buffer_t *) d->segs0[0] - 1;
	ctx->to_next[0] = vlib_get_buffer_index (vm, b[0]);
	b[0]->template = bt;
	ctx->n_rx_bytes += b[0]->current_length = d[0].sg0.seg1_size;
	ctx->n_segs += 1;
	if (d[0].sg0.segs > 1)
	oct_rx_attach_tail (vm, ctx, b[0], d + 0);
	}

	plt_write64 ((crq->cq.wdata \| n), crq->cq.door);
	ctx->n_rx_pkts += n;
	ctx->n_left_to_next -= n;
	return n;
	}

	static_always_inline void
	oct_rxq_refill_batch (vlib_main_t *vm, u64 lmt_id, u64 addr,
	oct_npa_lf_aura_batch_free_line_t lines, u32 bi,
	oct_npa_lf_aura_batch_free0_t w0, u64 n_lines)
	{
	u64 data;

	for (u32 i = 0; i < n_lines; i++, bi += 15)
	{
	lines[i].w0 = w0;
	vlib_get_buffers (vm, bi, (vlib_buffer_t **) lines[i].data, 15);
	}

	data = lmt_id \| ((n_lines - 1) << 12) \| ((1ULL << (n_lines * 3)) - 1) << 19;
	roc_lmt_submit_steorl (data, addr);

	/* Data Store Memory Barrier - outer shareable domain */
	asm volatile("dmb oshst" ::: "memory");
	}

	static_always_inline u32
	oct_rxq_refill (vlib_main_t vm, vnet_dev_rx_queue_t rxq, u16 n_refill)
	{
	const u32 batch_max_lines = 16;
	const u32 bufs_per_line = 15;
	const u32 batch_max_bufs = 15 * 16;

	u32 batch_bufs, n_lines, n_alloc;
	u32 buffer_indices[batch_max_bufs];
	u64 lmt_addr, lmt_id, addr, n_enq = 0;
	u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq);
	oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
	oct_npa_lf_aura_batch_free_line_t *lines;

	if (n_refill < bufs_per_line)
	return 0;

	n_lines = n_refill / bufs_per_line;

	addr = crq->aura_batch_free_ioaddr;
	lmt_addr = crq->lmt_base_addr;
	lmt_id = vm->thread_index << ROC_LMT_LINES_PER_CORE_LOG2;
	lmt_addr += lmt_id << ROC_LMT_LINE_SIZE_LOG2;
	lines = (oct_npa_lf_aura_batch_free_line_t *) lmt_addr;

	oct_npa_lf_aura_batch_free0_t w0 = {
	.aura = roc_npa_aura_handle_to_aura (crq->aura_handle),
	.count_eot = 1,
	};

	while (n_lines >= batch_max_lines)
	{
	n_alloc = vlib_buffer_alloc (vm, buffer_indices, batch_max_bufs);
	if (PREDICT_FALSE (n_alloc < batch_max_bufs))
	goto alloc_fail;
	oct_rxq_refill_batch (vm, lmt_id, addr, lines, buffer_indices, w0,
	batch_max_lines);
	n_lines -= batch_max_lines;
	n_enq += batch_max_bufs;
	}

	if (n_lines == 0)
	return n_enq;

	batch_bufs = n_lines * bufs_per_line;
	n_alloc = vlib_buffer_alloc_from_pool (vm, buffer_indices, batch_bufs, bpi);

	if (PREDICT_FALSE (n_alloc < batch_bufs))
	{
	alloc_fail:
	if (n_alloc >= bufs_per_line)
	{
	u32 n_unalloc;
	n_lines = n_alloc / bufs_per_line;
	batch_bufs = n_lines * bufs_per_line;
	n_unalloc = n_alloc - batch_bufs;

	if (n_unalloc)
	vlib_buffer_unalloc_to_pool (vm, buffer_indices + batch_bufs,
	n_unalloc, bpi);
	}
	else
	{
	if (n_alloc)
	vlib_buffer_unalloc_to_pool (vm, buffer_indices, n_alloc, bpi);
	return n_enq;
	}
	}

	oct_rxq_refill_batch (vm, lmt_id, addr, lines, buffer_indices, w0, n_lines);
	n_enq += batch_bufs;

	return n_enq;
	}

	static_always_inline void
	oct_rx_trace (vlib_main_t vm, vlib_node_runtime_t node,
	oct_rx_node_ctx_t ctx, oct_nix_rx_cqe_desc_t d, u32 n_desc)
	{
	u32 i = 0;
	if (PREDICT_TRUE (ctx->trace_count == 0))
	return;

	while (ctx->n_traced < ctx->trace_count && i < n_desc)
	{
	vlib_buffer_t b = (vlib_buffer_t ) d[i].segs0[0] - 1;

	if (PREDICT_TRUE (vlib_trace_buffer (vm, node, ctx->next_index, b,
	/* follow_chain */ 0)))
	{
	oct_rx_trace_t tr = vlib_add_trace (vm, node, b, sizeof (tr));
	tr->next_index = ctx->next_index;
	tr->sw_if_index = ctx->sw_if_index;
	tr->desc = d[i];
	ctx->n_traced++;
	}
	i++;
	}
	}

	static_always_inline uword
	oct_rx_node_inline (vlib_main_t vm, vlib_node_runtime_t node,
	vlib_frame_t frame, vnet_dev_port_t port,
	vnet_dev_rx_queue_t *rxq, int with_flows)
	{
	vnet_main_t *vnm = vnet_get_main ();
	u32 thr_idx = vlib_get_thread_index ();
	oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
	u32 n_desc, head, n, n_enq;
	u32 cq_size = crq->cq.nb_desc;
	u32 cq_mask = crq->cq.qmask;
	oct_nix_rx_cqe_desc_t *descs = crq->cq.desc_base;
	oct_nix_lf_cq_op_status_t status;
	oct_rx_node_ctx_t _ctx = {
	.next_index = rxq->next_index,
	.sw_if_index = port->intf.sw_if_index,
	.hw_if_index = port->intf.hw_if_index,
	}, *ctx = &_ctx;

	/* get head and tail from NIX_LF_CQ_OP_STATUS */
	status.as_u64 = roc_atomic64_add_sync (crq->cq.wdata, crq->cq.status);
	if (status.cq_err \|\| status.op_err)
	return 0;

	head = status.head;
	n_desc = (status.tail - head) & cq_mask;

	if (n_desc == 0)
	return 0;

	vlib_get_new_next_frame (vm, node, ctx->next_index, ctx->to_next,
	ctx->n_left_to_next);

	ctx->trace_count = vlib_get_trace_count (vm, node);

	while (1)
	{
	ctx->next_desc = descs + head;
	n = clib_min (cq_size - head, clib_min (n_desc, ctx->n_left_to_next));
	n = oct_rx_batch (vm, ctx, rxq, n);
	oct_rx_trace (vm, node, ctx, descs + head, n);

	if (ctx->n_left_to_next == 0)
	break;

	status.as_u64 = roc_atomic64_add_sync (crq->cq.wdata, crq->cq.status);
	if (status.cq_err \|\| status.op_err)
	break;

	head = status.head;
	n_desc = (status.tail - head) & cq_mask;
	if (n_desc == 0)
	break;
	}

	if (ctx->n_traced)
	vlib_set_trace_count (vm, node, ctx->trace_count - ctx->n_traced);

	if (PREDICT_TRUE (ctx->next_index == VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT))
	{
	vlib_next_frame_t *nf;
	vlib_frame_t *f;
	ethernet_input_frame_t *ef;
	oct_nix_rx_parse_t p = { .w[0] = ctx->parse_w0_or };
	nf = vlib_node_runtime_get_next_frame (vm, node, ctx->next_index);
	f = vlib_get_frame (vm, nf->frame);
	f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;

	ef = vlib_frame_scalar_args (f);
	ef->sw_if_index = ctx->sw_if_index;
	ef->hw_if_index = ctx->hw_if_index;

	if (p.f.errcode == 0 && p.f.errlev == 0)
	f->flags \|= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;

	vlib_frame_no_append (f);
	}

	vlib_put_next_frame (vm, node, ctx->next_index, ctx->n_left_to_next);

	n_enq = crq->n_enq - ctx->n_segs;
	n_enq += oct_rxq_refill (vm, rxq, rxq->size - n_enq);
	crq->n_enq = n_enq;

	vlib_increment_combined_counter (
	vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
	thr_idx, ctx->hw_if_index, ctx->n_rx_pkts, ctx->n_rx_bytes);

	return ctx->n_rx_pkts;
	}

	VNET_DEV_NODE_FN (oct_rx_node)
	(vlib_main_t vm, vlib_node_runtime_t node, vlib_frame_t *frame)
	{
	u32 n_rx = 0;
	foreach_vnet_dev_rx_queue_runtime (rxq, node)
	{
	vnet_dev_port_t *port = rxq->port;
	n_rx += oct_rx_node_inline (vm, node, frame, port, rxq, 0);
	}

	return n_rx;
	}