vmxnet3: error handling

outbound -- if we have to drop the packet due to no descriptor space is available, drop the
whole packet, not fragments.
inbound -- check and drop error packets

Change-Id: Ida1d32e61521bafd67f714d729ad53cd7c487dc6
Signed-off-by: Steven <sluong@cisco.com>
diff --git a/src/plugins/vmxnet3/cli.c b/src/plugins/vmxnet3/cli.c
index 1e4ac4a..40d379b 100644
--- a/src/plugins/vmxnet3/cli.c
+++ b/src/plugins/vmxnet3/cli.c
@@ -226,6 +226,8 @@
 	vlib_cli_output (vm, "  Queue %u (RX)", qid);
 	vlib_cli_output (vm, "    RX completion next index %u",
 			 rxq->rx_comp_ring.next);
+	vlib_cli_output (vm, "    RX completion generation flag 0x%x",
+			 rxq->rx_comp_ring.gen);
 	for (rid = 0; rid < VMXNET3_RX_RING_SIZE; rid++)
 	  {
 	    vmxnet3_rx_ring *ring = &rxq->rx_ring[rid];
@@ -266,6 +268,8 @@
 	vlib_cli_output (vm, "  Queue %u (TX)", qid);
 	vlib_cli_output (vm, "    TX completion next index %u",
 			 txq->tx_comp_ring.next);
+	vlib_cli_output (vm, "    TX completion generation flag 0x%x",
+			 txq->tx_comp_ring.gen);
 	vlib_cli_output (vm, "    size %u consume %u produce %u",
 			 txq->size, txq->tx_ring.consume,
 			 txq->tx_ring.produce);
diff --git a/src/plugins/vmxnet3/input.c b/src/plugins/vmxnet3/input.c
index a0b9cbc..cdd3c7b 100644
--- a/src/plugins/vmxnet3/input.c
+++ b/src/plugins/vmxnet3/input.c
@@ -25,6 +25,7 @@
 
 #define foreach_vmxnet3_input_error \
   _(BUFFER_ALLOC, "buffer alloc error") \
+  _(RX_PACKET_NO_SOP, "Rx packet error - no SOP") \
   _(RX_PACKET, "Rx packet error") \
   _(NO_BUFFER, "Rx no buffer error")
 
@@ -90,6 +91,8 @@
   vlib_buffer_t *prev_b0 = 0, *hb = 0;
   u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
   u8 known_next = 0;
+  vmxnet3_rx_desc *rxd;
+  clib_error_t *error;
 
   rxq = vec_elt_at_index (vd->rxqs, qid);
   comp_ring = &rxq->rx_comp_ring;
@@ -120,6 +123,7 @@
       vmxnet3_rx_comp_ring_advance_next (rxq);
       desc_idx = rx_comp->index & VMXNET3_RXC_INDEX;
       ring->consume = desc_idx;
+      rxd = &rxq->rx_desc[rid][desc_idx];
 
       bi0 = ring->bufs[desc_idx];
       ring->bufs[desc_idx] = ~0;
@@ -137,15 +141,30 @@
       b0->current_config_index = 0;
       ASSERT (b0->current_length != 0);
 
+      if (PREDICT_FALSE ((rx_comp->index & VMXNET3_RXCI_EOP) &&
+			 (rx_comp->len & VMXNET3_RXCL_ERROR)))
+	{
+	  vlib_buffer_free_one (vm, bi0);
+	  vlib_error_count (vm, node->node_index,
+			    VMXNET3_INPUT_ERROR_RX_PACKET, 1);
+	  if (hb && vlib_get_buffer_index (vm, hb) != bi0)
+	    {
+	      vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, hb));
+	      hb = 0;
+	    }
+	  prev_b0 = 0;
+	  continue;
+	}
+
       if (rx_comp->index & VMXNET3_RXCI_SOP)
 	{
+	  ASSERT (!(rxd->flags & VMXNET3_RXF_BTYPE));
 	  /* start segment */
 	  hb = b0;
 	  bi[0] = bi0;
 	  if (!(rx_comp->index & VMXNET3_RXCI_EOP))
 	    {
 	      hb->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
-	      b0->flags |= VLIB_BUFFER_NEXT_PRESENT;
 	      prev_b0 = b0;
 	    }
 	  else
@@ -161,8 +180,8 @@
 	  /* end of segment */
 	  if (prev_b0)
 	    {
-	      prev_b0->next_buffer = bi0;
 	      prev_b0->flags |= VLIB_BUFFER_NEXT_PRESENT;
+	      prev_b0->next_buffer = bi0;
 	      hb->total_length_not_including_first_buffer +=
 		b0->current_length;
 	      prev_b0 = 0;	// Get next packet
@@ -170,17 +189,22 @@
 	  else
 	    {
 	      /* EOP without SOP, error */
-	      hb = 0;
 	      vlib_error_count (vm, node->node_index,
-				VMXNET3_INPUT_ERROR_RX_PACKET, 1);
+				VMXNET3_INPUT_ERROR_RX_PACKET_NO_SOP, 1);
 	      vlib_buffer_free_one (vm, bi0);
+	      if (hb && vlib_get_buffer_index (vm, hb) != bi0)
+		{
+		  vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, hb));
+		  hb = 0;
+		}
 	      continue;
 	    }
 	}
       else if (prev_b0)		// !sop && !eop
 	{
 	  /* mid chain */
-	  b0->flags |= VLIB_BUFFER_NEXT_PRESENT;
+	  ASSERT (rxd->flags & VMXNET3_RXF_BTYPE);
+	  prev_b0->flags |= VLIB_BUFFER_NEXT_PRESENT;
 	  prev_b0->next_buffer = bi0;
 	  prev_b0 = b0;
 	  hb->total_length_not_including_first_buffer += b0->current_length;
@@ -278,27 +302,25 @@
 
   if (PREDICT_TRUE (n_rx_packets))
     {
-      clib_error_t *error;
-
       vlib_buffer_enqueue_to_next (vm, node, buffer_indices, nexts,
 				   n_rx_packets);
       vlib_increment_combined_counter
 	(vnm->interface_main.combined_sw_if_counters +
 	 VNET_INTERFACE_COUNTER_RX, thread_index,
 	 vd->hw_if_index, n_rx_packets, n_rx_bytes);
+    }
 
-      error = vmxnet3_rxq_refill_ring0 (vm, vd, rxq);
-      if (PREDICT_FALSE (error != 0))
-	{
-	  vlib_error_count (vm, node->node_index,
-			    VMXNET3_INPUT_ERROR_BUFFER_ALLOC, 1);
-	}
-      error = vmxnet3_rxq_refill_ring1 (vm, vd, rxq);
-      if (PREDICT_FALSE (error != 0))
-	{
-	  vlib_error_count (vm, node->node_index,
-			    VMXNET3_INPUT_ERROR_BUFFER_ALLOC, 1);
-	}
+  error = vmxnet3_rxq_refill_ring0 (vm, vd, rxq);
+  if (PREDICT_FALSE (error != 0))
+    {
+      vlib_error_count (vm, node->node_index,
+			VMXNET3_INPUT_ERROR_BUFFER_ALLOC, 1);
+    }
+  error = vmxnet3_rxq_refill_ring1 (vm, vd, rxq);
+  if (PREDICT_FALSE (error != 0))
+    {
+      vlib_error_count (vm, node->node_index,
+			VMXNET3_INPUT_ERROR_BUFFER_ALLOC, 1);
     }
 
   return n_rx_packets;
diff --git a/src/plugins/vmxnet3/output.c b/src/plugins/vmxnet3/output.c
index 32b77de..d6b7aaa 100644
--- a/src/plugins/vmxnet3/output.c
+++ b/src/plugins/vmxnet3/output.c
@@ -106,14 +106,13 @@
   u32 *buffers = vlib_frame_args (frame);
   u32 bi0;
   vlib_buffer_t *b0;
-  vmxnet3_tx_desc *txd;
+  vmxnet3_tx_desc *txd = 0;
   u32 desc_idx, generation, first_idx;
   u16 space_left;
   u16 n_left = frame->n_vectors;
   vmxnet3_txq_t *txq;
   u32 thread_index = vm->thread_index;
   u16 qid = thread_index;
-  u16 n_retry = 5;
 
   if (PREDICT_FALSE (!(vd->flags & VMXNET3_DEVICE_F_LINK_UP)))
     {
@@ -126,13 +125,37 @@
   txq = vec_elt_at_index (vd->txqs, qid % vd->num_tx_queues);
   clib_spinlock_lock_if_init (&txq->lock);
 
-retry:
   vmxnet3_txq_release (vm, vd, txq);
 
   while (n_left)
     {
+      u16 space_needed = 1, i;
+      vlib_buffer_t *b;
+
       bi0 = buffers[0];
-      txd = 0;
+      b0 = vlib_get_buffer (vm, bi0);
+      b = b0;
+
+      space_left = vmxnet3_tx_ring_space_left (txq);
+      while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+	{
+	  u32 next_buffer = b->next_buffer;
+
+	  b = vlib_get_buffer (vm, next_buffer);
+	  space_needed++;
+	}
+      if (PREDICT_FALSE (space_left < space_needed))
+	{
+	  vlib_buffer_free_one (vm, bi0);
+	  vlib_error_count (vm, node->node_index,
+			    VMXNET3_TX_ERROR_NO_FREE_SLOTS, 1);
+	  buffers++;
+	  n_left--;
+	  /*
+	   * Drop this packet. But we may have enough room for the next packet
+	   */
+	  continue;
+	}
 
       /*
        * Toggle the generation bit for SOP fragment to avoid device starts
@@ -140,17 +163,11 @@
        */
       generation = txq->tx_ring.gen ^ VMXNET3_TXF_GEN;
       first_idx = txq->tx_ring.produce;
-      while (1)
+      for (i = 0; i < space_needed; i++)
 	{
 	  b0 = vlib_get_buffer (vm, bi0);
 	  VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
 
-	  space_left = vmxnet3_tx_ring_space_left (txq);
-	  if (PREDICT_FALSE (space_left == 0))
-	    {
-	      break;
-	    }
-
 	  desc_idx = txq->tx_ring.produce;
 
 	  vmxnet3_tx_ring_advance_produce (txq);
@@ -164,44 +181,24 @@
 	  txd->flags[0] = generation | b0->current_length;
 
 	  generation = txq->tx_ring.gen;
-	  if (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
-	    {
-	      txd->flags[1] = 0;
-	      bi0 = b0->next_buffer;
-	    }
-	  else
-	    break;
+
+	  txd->flags[1] = 0;
+	  bi0 = b0->next_buffer;
 	}
 
-      if (PREDICT_TRUE (txd != 0))
-	{
-	  txd->flags[1] = VMXNET3_TXF_CQ | VMXNET3_TXF_EOP;
-	  asm volatile ("":::"memory");
-	  /*
-	   * Now toggle back the generation bit for the first segment.
-	   * Device can start reading the packet
-	   */
-	  txq->tx_desc[first_idx].flags[0] ^= VMXNET3_TXF_GEN;
-	  vmxnet3_reg_write (vd, 0, VMXNET3_REG_TXPROD, txq->tx_ring.produce);
-	}
-
-      if (PREDICT_FALSE (space_left == 0))
-	{
-	  break;
-	}
+      txd->flags[1] = VMXNET3_TXF_CQ | VMXNET3_TXF_EOP;
+      asm volatile ("":::"memory");
+      /*
+       * Now toggle back the generation bit for the first segment.
+       * Device can start reading the packet
+       */
+      txq->tx_desc[first_idx].flags[0] ^= VMXNET3_TXF_GEN;
+      vmxnet3_reg_write (vd, 0, VMXNET3_REG_TXPROD, txq->tx_ring.produce);
 
       buffers++;
       n_left--;
     }
 
-  if (PREDICT_FALSE (n_left))
-    {
-      if (PREDICT_TRUE (n_retry--))
-	goto retry;
-      vlib_buffer_free (vm, buffers, n_left);
-      vlib_error_count (vm, node->node_index, VMXNET3_TX_ERROR_NO_FREE_SLOTS,
-			n_left);
-    }
   clib_spinlock_unlock_if_init (&txq->lock);
 
   return (frame->n_vectors - n_left);
diff --git a/src/plugins/vmxnet3/vmxnet3.c b/src/plugins/vmxnet3/vmxnet3.c
index 9f2b8a8..ac99411 100644
--- a/src/plugins/vmxnet3/vmxnet3.c
+++ b/src/plugins/vmxnet3/vmxnet3.c
@@ -253,6 +253,7 @@
 
       ring = &rxq->rx_ring[rid];
       ring->gen = VMXNET3_RXF_GEN;
+      ring->rid = rid;
       vec_validate_aligned (ring->bufs, rxq->size, CLIB_CACHE_LINE_BYTES);
     }
   rxq->rx_comp_ring.gen = VMXNET3_RXCF_GEN;