devices: af_packet, fix tx stall by retrying failed sendto

Change-Id: I6bed66f740b34673a4883eda1c7f7310c57e131b
Type: fix
Signed-off-by: Mohammed Hawari <mohammed@hawari.fr>
Signed-off-by: Benoît Ganne <bganne@cisco.com>
diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c
index 83c1068..ec65bf6 100644
--- a/src/vnet/devices/af_packet/af_packet.c
+++ b/src/vnet/devices/af_packet/af_packet.c
@@ -437,6 +437,7 @@
 
       tx_queue->next_tx_frame = 0;
       tx_queue->queue_id = queue_id;
+      tx_queue->is_tx_pending = 0;
       clib_spinlock_init (&tx_queue->lockp);
     }
 
diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h
index 10a7aaf..e5dc615 100644
--- a/src/vnet/devices/af_packet/af_packet.h
+++ b/src/vnet/devices/af_packet/af_packet.h
@@ -82,6 +82,7 @@
   u32 rx_frame_offset;
   u16 num_rx_pkts;
   u8 is_rx_pending;
+  u8 is_tx_pending;
   vnet_hw_if_rx_mode mode;
 } af_packet_queue_t;
 
diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c
index 1e177f6..e1eb46a 100644
--- a/src/vnet/devices/af_packet/device.c
+++ b/src/vnet/devices/af_packet/device.c
@@ -418,9 +418,10 @@
 
   CLIB_MEMORY_BARRIER ();
 
-  if (PREDICT_TRUE (n_sent))
+  if (PREDICT_TRUE (n_sent || tx_queue->is_tx_pending))
     {
       tx_queue->next_tx_frame = tx_frame;
+      tx_queue->is_tx_pending = 0;
 
       if (PREDICT_FALSE (
 	    sendto (tx_queue->fd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1))
@@ -429,11 +430,26 @@
 	   * Note that we have no reliable way to properly determine the
 	   * disposition of the packets we just enqueued for delivery.
 	   */
-	  vlib_error_count (vm, node->node_index,
-			    unix_error_is_fatal (errno) ?
-			      AF_PACKET_TX_ERROR_TXRING_FATAL :
-			      AF_PACKET_TX_ERROR_TXRING_EAGAIN,
-			    n_sent);
+	  uword counter;
+
+	  if (unix_error_is_fatal (errno))
+	    {
+	      counter = AF_PACKET_TX_ERROR_TXRING_FATAL;
+	    }
+	  else
+	    {
+	      counter = AF_PACKET_TX_ERROR_TXRING_EAGAIN;
+	      /* non-fatal error: kick again next time
+	       * note that you could still end up in a deadlock: if you do not
+	       * try to send new packets (ie reschedule this tx node), eg.
+	       * because your peer is waiting for the unsent packets to reply
+	       * to you but your waiting for its reply etc., you are not going
+	       * to kick again, and everybody is waiting for the other to talk
+	       * 1st... */
+	      tx_queue->is_tx_pending = 1;
+	    }
+
+	  vlib_error_count (vm, node->node_index, counter, 1);
 	}
     }