Overall tcp performance improvements (VPP-846)

- limit minimum rto per connection
- cleanup sack scoreboard
- switched svm fifo out-of-order data handling from absolute offsets to
  relative offsets.
- improve cwnd handling when using sacks
- add cc event debug stats
- improved uri tcp test client/server:  bugfixes and added half-duplex mode
- expanded builtin client/server
- updated uri socket client/server code to work in half-duplex
- ensure session node unsets fifo event for empty fifo
- fix session detach

Change-Id: Ia446972340e32a65e0694ee2844355167d0c170d
Signed-off-by: Florin Coras <fcoras@cisco.com>
diff --git a/src/uri/uri_socket_server.c b/src/uri/uri_socket_server.c
index 2366f42..4f4c5f3 100644
--- a/src/uri/uri_socket_server.c
+++ b/src/uri/uri_socket_server.c
@@ -22,6 +22,7 @@
 #include <vppinfra/format.h>
 #include <signal.h>
 #include <sys/ucontext.h>
+#include <sys/time.h>
 
 volatile int signal_received;
 
@@ -78,7 +79,10 @@
   struct sockaddr_in serv_addr;
   struct sockaddr_in client;
   struct hostent *server;
-  u8 *rx_buffer = 0;
+  u8 *rx_buffer = 0, no_echo = 0;
+  struct timeval start, end;
+  long rcvd = 0;
+  double deltat;
 
   if (argc > 1 && argc < 3)
     {
@@ -86,8 +90,9 @@
       exit (0);
     }
 
-  if (argc >= 3)
+  if (argc >= 4)
     {
+      no_echo = atoi (argv[3]);
       portno = atoi (argv[2]);
       server = gethostbyname (argv[1]);
       if (server == NULL)
@@ -137,7 +142,7 @@
       exit (1);
     }
 
-  vec_validate (rx_buffer, 8999 /* jumbo mtu */ );
+  vec_validate (rx_buffer, 128 << 10);
 
   if (listen (sockfd, 5 /* backlog */ ) < 0)
     {
@@ -160,6 +165,8 @@
 	}
       fformat (stderr, "Accepted connection from: %s : %d\n",
 	       inet_ntoa (client.sin_addr), client.sin_port);
+      gettimeofday (&start, NULL);
+
       while (1)
 	{
 	  n = recv (accfd, rx_buffer, vec_len (rx_buffer), 0 /* flags */ );
@@ -167,6 +174,14 @@
 	    {
 	      /* Graceful exit */
 	      close (accfd);
+	      gettimeofday (&end, NULL);
+	      deltat = (end.tv_sec - start.tv_sec);
+	      deltat += (end.tv_usec - start.tv_usec) / 1000000.0;
+	      clib_warning ("Finished in %.6f", deltat);
+	      clib_warning ("%.4f Gbit/second %s",
+			    (((f64) rcvd * 8.0) / deltat / 1e9),
+			    no_echo ? "half" : "full");
+	      rcvd = 0;
 	      break;
 	    }
 	  if (n < 0)
@@ -179,6 +194,10 @@
 	  if (signal_received)
 	    break;
 
+	  rcvd += n;
+	  if (no_echo)
+	    continue;
+
 	  sent = send (accfd, rx_buffer, n, 0 /* flags */ );
 	  if (n < 0)
 	    {
diff --git a/src/uri/uri_socket_test.c b/src/uri/uri_socket_test.c
index 9f049bd..5f7084d 100644
--- a/src/uri/uri_socket_test.c
+++ b/src/uri/uri_socket_test.c
@@ -19,6 +19,7 @@
 #include <netinet/in.h>
 #include <netdb.h>
 #include <vppinfra/format.h>
+#include <sys/time.h>
 
 int
 main (int argc, char *argv[])
@@ -26,28 +27,44 @@
   int sockfd, portno, n;
   struct sockaddr_in serv_addr;
   struct hostent *server;
-  u8 *rx_buffer = 0, *tx_buffer = 0;
+  u8 *rx_buffer = 0, *tx_buffer = 0, no_echo = 0, test_bytes = 0;
   u32 offset;
-  int iter, i;
-  if (0 && argc < 3)
+  long bytes = 1 << 20, to_send;
+  int i;
+  struct timeval start, end;
+  double deltat;
+
+  if (argc >= 3)
     {
-      fformat (stderr, "usage %s hostname port\n", argv[0]);
-      exit (0);
+      bytes = ((long) atoi (argv[4])) << 20;
+      no_echo = atoi (argv[3]);
+      portno = atoi (argv[2]);
+      server = gethostbyname (argv[1]);
+      if (server == NULL)
+	{
+	  clib_unix_warning ("gethostbyname");
+	  exit (1);
+	}
+    }
+  else
+    {
+      portno = 1234;		// atoi(argv[2]);
+      server = gethostbyname ("6.0.1.1" /* argv[1] */ );
+      if (server == NULL)
+	{
+	  clib_unix_warning ("gethostbyname");
+	  exit (1);
+	}
     }
 
-  portno = 1234;		// atoi(argv[2]);
+  to_send = bytes;
   sockfd = socket (AF_INET, SOCK_STREAM, 0);
   if (sockfd < 0)
     {
       clib_unix_error ("socket");
       exit (1);
     }
-  server = gethostbyname ("6.0.1.1" /* argv[1] */ );
-  if (server == NULL)
-    {
-      clib_unix_warning ("gethostbyname");
-      exit (1);
-    }
+
   bzero ((char *) &serv_addr, sizeof (serv_addr));
   serv_addr.sin_family = AF_INET;
   bcopy ((char *) server->h_addr,
@@ -59,8 +76,8 @@
       exit (1);
     }
 
-  vec_validate (rx_buffer, 1400);
-  vec_validate (tx_buffer, 1400);
+  vec_validate (rx_buffer, 128 << 10);
+  vec_validate (tx_buffer, 128 << 10);
 
   for (i = 0; i < vec_len (tx_buffer); i++)
     tx_buffer[i] = (i + 1) % 0xff;
@@ -75,19 +92,28 @@
       exit (0);
     }
 
-  for (iter = 0; iter < 100000; iter++)
+  gettimeofday (&start, NULL);
+  while (bytes > 0)
     {
-      if (iter < 99999)
+      /*
+       * TX
+       */
+      n = send (sockfd, tx_buffer, vec_len (tx_buffer), 0 /* flags */ );
+      if (n != vec_len (tx_buffer))
 	{
-	  n = send (sockfd, tx_buffer, vec_len (tx_buffer), 0 /* flags */ );
-	  if (n != vec_len (tx_buffer))
-	    {
-	      clib_unix_warning ("write");
-	      exit (0);
-	    }
+	  clib_unix_warning ("write");
+	  exit (0);
 	}
-      offset = 0;
+      bytes -= n;
 
+      if (no_echo)
+	continue;
+
+      /*
+       * RX
+       */
+
+      offset = 0;
       do
 	{
 	  n = recv (sockfd, rx_buffer + offset,
@@ -101,18 +127,27 @@
 	}
       while (offset < vec_len (rx_buffer));
 
-      for (i = 0; i < vec_len (rx_buffer); i++)
+      if (test_bytes)
 	{
-	  if (rx_buffer[i] != tx_buffer[i])
+	  for (i = 0; i < vec_len (rx_buffer); i++)
 	    {
-	      clib_warning ("[%d] read 0x%x not 0x%x",
-			    rx_buffer[i], tx_buffer[i]);
-	      exit (1);
+	      if (rx_buffer[i] != tx_buffer[i])
+		{
+		  clib_warning ("[%d] read 0x%x not 0x%x", rx_buffer[i],
+				tx_buffer[i]);
+		  exit (1);
+		}
 	    }
 	}
-
     }
   close (sockfd);
+  gettimeofday (&end, NULL);
+
+  deltat = (end.tv_sec - start.tv_sec);
+  deltat += (end.tv_usec - start.tv_usec) / 1000000.0;	// us to ms
+  clib_warning ("Finished in %.6f", deltat);
+  clib_warning ("%.4f Gbit/second %s", (((f64) to_send * 8.0) / deltat / 1e9),
+		no_echo ? "half" : "full");
   return 0;
 }
 
diff --git a/src/uri/uri_tcp_test.c b/src/uri/uri_tcp_test.c
index e201a35..d1694cf 100755
--- a/src/uri/uri_tcp_test.c
+++ b/src/uri/uri_tcp_test.c
@@ -46,6 +46,8 @@
   svm_fifo_t *server_tx_fifo;
 
   u64 vpp_session_handle;
+  u64 bytes_received;
+  f64 start;
 } session_t;
 
 typedef enum
@@ -174,7 +176,7 @@
       if (utm->state == STATE_FAILED)
 	return -1;
       if (utm->time_to_stop == 1)
-	return -1;
+	return 0;
     }
   clib_warning ("timeout waiting for STATE_READY");
   return -1;
@@ -184,7 +186,7 @@
 application_send_attach (uri_tcp_test_main_t * utm)
 {
   vl_api_application_attach_t *bmp;
-  u32 fifo_size = 3 << 20;
+  u32 fifo_size = 4 << 20;
   bmp = vl_msg_api_alloc (sizeof (*bmp));
   memset (bmp, 0, sizeof (*bmp));
 
@@ -344,10 +346,22 @@
 }
 
 static void
+session_print_stats (uri_tcp_test_main_t * utm, session_t * session)
+{
+  f64 deltat;
+  u64 bytes;
+
+  deltat = clib_time_now (&utm->clib_time) - session->start;
+  bytes = utm->i_am_master ? session->bytes_received : utm->bytes_to_send;
+  fformat (stdout, "Finished in %.6f\n", deltat);
+  fformat (stdout, "%.4f Gbit/second\n", (bytes * 8.0) / deltat / 1e9);
+}
+
+static void
 vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp)
 {
   uri_tcp_test_main_t *utm = &uri_tcp_test_main;
-  session_t *session;
+  session_t *session = 0;
   vl_api_disconnect_session_reply_t *rmp;
   uword *p;
   int rv = 0;
@@ -366,7 +380,7 @@
       rv = -11;
     }
 
-  utm->time_to_stop = 1;
+//  utm->time_to_stop = 1;
 
   rmp = vl_msg_api_alloc (sizeof (*rmp));
   memset (rmp, 0, sizeof (*rmp));
@@ -375,6 +389,9 @@
   rmp->retval = rv;
   rmp->handle = mp->handle;
   vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp);
+
+  if (session)
+    session_print_stats (utm, session);
 }
 
 static void
@@ -431,14 +448,19 @@
       if (n_read > 0)
 	{
 	  bytes -= n_read;
-	  for (i = 0; i < n_read; i++)
+	  if (utm->test_return_packets)
 	    {
-	      if (utm->rx_buf[i] != ((utm->client_bytes_received + i) & 0xff))
+	      for (i = 0; i < n_read; i++)
 		{
-		  clib_warning ("error at byte %lld, 0x%x not 0x%x",
-				utm->client_bytes_received + i,
-				utm->rx_buf[i],
-				((utm->client_bytes_received + i) & 0xff));
+		  if (utm->rx_buf[i]
+		      != ((utm->client_bytes_received + i) & 0xff))
+		    {
+		      clib_warning ("error at byte %lld, 0x%x not 0x%x",
+				    utm->client_bytes_received + i,
+				    utm->rx_buf[i],
+				    ((utm->client_bytes_received +
+				      i) & 0xff));
+		    }
 		}
 	    }
 	  utm->client_bytes_received += n_read;
@@ -545,6 +567,7 @@
   session->server_rx_fifo = rx_fifo;
   session->server_tx_fifo = tx_fifo;
   session->vpp_session_handle = mp->handle;
+  session->start = clib_time_now (&utm->clib_time);
 
   /* Save handle */
   utm->connected_session_index = session_index;
@@ -571,7 +594,7 @@
   u64 bytes_sent = 0;
   int test_buf_offset = 0;
   u32 bytes_to_snd;
-  u32 queue_max_chunk = 64 << 10, actual_write;
+  u32 queue_max_chunk = 128 << 10, actual_write;
   session_fifo_event_t evt;
   static int serial_number = 0;
   int rv;
@@ -582,8 +605,8 @@
 
   while (bytes_to_snd > 0)
     {
-      actual_write =
-	bytes_to_snd > queue_max_chunk ? queue_max_chunk : bytes_to_snd;
+      actual_write = (bytes_to_snd > queue_max_chunk) ?
+	queue_max_chunk : bytes_to_snd;
       rv = svm_fifo_enqueue_nowait (tx_fifo, actual_write,
 				    test_data + test_buf_offset);
 
@@ -635,9 +658,9 @@
   if (leftover)
     send_test_chunk (utm, tx_fifo, mypid, leftover);
 
-  if (utm->test_return_packets)
+  if (!utm->drop_packets)
     {
-      f64 timeout = clib_time_now (&utm->clib_time) + 2;
+      f64 timeout = clib_time_now (&utm->clib_time) + 10;
 
       /* Wait for the outstanding packets */
       while (utm->client_bytes_received <
@@ -698,6 +721,7 @@
 client_disconnect (uri_tcp_test_main_t * utm)
 {
   client_send_disconnect (utm);
+  clib_warning ("Sent disconnect");
   if (wait_for_state_change (utm, STATE_START))
     {
       clib_warning ("Disconnect failed");
@@ -721,7 +745,7 @@
     }
 
   /* Init test data */
-  vec_validate (utm->connect_test_data, 64 * 1024 - 1);
+  vec_validate (utm->connect_test_data, 128 * 1024 - 1);
   for (i = 0; i < vec_len (utm->connect_test_data); i++)
     utm->connect_test_data[i] = i & 0xff;
 
@@ -899,6 +923,9 @@
   rmp->_vl_msg_id = ntohs (VL_API_ACCEPT_SESSION_REPLY);
   rmp->handle = mp->handle;
   vl_msg_api_send_shmem (utm->vl_input_queue, (u8 *) & rmp);
+
+  session->bytes_received = 0;
+  session->start = clib_time_now (&utm->clib_time);
 }
 
 void
@@ -909,37 +936,50 @@
   int n_read;
   session_fifo_event_t evt;
   unix_shared_memory_queue_t *q;
-  int rv, bytes;
+  session_t *session;
+  int rv;
+  u32 max_dequeue, offset, max_transfer, rx_buf_len;
 
+  rx_buf_len = vec_len (utm->rx_buf);
   rx_fifo = e->fifo;
-  tx_fifo = utm->sessions[rx_fifo->client_session_index].server_tx_fifo;
+  session = &utm->sessions[rx_fifo->client_session_index];
+  tx_fifo = session->server_tx_fifo;
 
-  bytes = svm_fifo_max_dequeue (rx_fifo);
+  max_dequeue = svm_fifo_max_dequeue (rx_fifo);
   /* Allow enqueuing of a new event */
   svm_fifo_unset_event (rx_fifo);
 
-  if (bytes == 0)
-    return;
+  if (PREDICT_FALSE (max_dequeue == 0))
+    {
+      return;
+    }
 
-  /* Read the bytes */
+  /* Read the max_dequeue */
   do
     {
-      n_read = svm_fifo_dequeue_nowait (rx_fifo, vec_len (utm->rx_buf),
-					utm->rx_buf);
-      if (n_read > 0)
-	bytes -= n_read;
-
-      if (utm->drop_packets)
-	continue;
-
-      /* Reflect if a non-drop session */
+      max_transfer = clib_min (rx_buf_len, max_dequeue);
+      n_read = svm_fifo_dequeue_nowait (rx_fifo, max_transfer, utm->rx_buf);
       if (n_read > 0)
 	{
+	  max_dequeue -= n_read;
+	  session->bytes_received += n_read;
+	}
+
+      /* Reflect if a non-drop session */
+      if (!utm->drop_packets && n_read > 0)
+	{
+	  offset = 0;
 	  do
 	    {
-	      rv = svm_fifo_enqueue_nowait (tx_fifo, n_read, utm->rx_buf);
+	      rv = svm_fifo_enqueue_nowait (tx_fifo, n_read,
+					    &utm->rx_buf[offset]);
+	      if (rv > 0)
+		{
+		  n_read -= rv;
+		  offset += rv;
+		}
 	    }
-	  while (rv <= 0 && !utm->time_to_stop);
+	  while ((rv <= 0 || n_read > 0) && !utm->time_to_stop);
 
 	  /* If event wasn't set, add one */
 	  if (svm_fifo_set_event (tx_fifo))
@@ -951,11 +991,11 @@
 
 	      q = utm->vpp_event_queue;
 	      unix_shared_memory_queue_add (q, (u8 *) & evt,
-					    0 /* do wait for mutex */ );
+					    1 /* do wait for mutex */ );
 	    }
 	}
     }
-  while ((n_read < 0 || bytes > 0) && !utm->time_to_stop);
+  while ((n_read < 0 || max_dequeue > 0) && !utm->time_to_stop);
 }
 
 void
@@ -1068,9 +1108,18 @@
 					   mp)
 {
   uri_tcp_test_main_t *utm = &uri_tcp_test_main;
+  session_t *session;
 
-  clib_warning ("retval %d", ntohl (mp->retval));
+  if (mp->retval)
+    {
+      clib_warning ("vpp complained about disconnect: %d",
+		    ntohl (mp->retval));
+    }
+
   utm->state = STATE_START;
+  session = pool_elt_at_index (utm->sessions, utm->connected_session_index);
+  if (session)
+    session_print_stats (utm, session);
 }
 
 #define foreach_uri_msg                                 \
@@ -1123,7 +1172,7 @@
   /* make the main heap thread-safe */
   h->flags |= MHEAP_FLAG_THREAD_SAFE;
 
-  vec_validate (utm->rx_buf, 65536);
+  vec_validate (utm->rx_buf, 128 << 10);
 
   utm->session_index_by_vpp_handles = hash_create (0, sizeof (uword));
 
@@ -1186,6 +1235,7 @@
   utm->drop_packets = drop_packets;
   utm->test_return_packets = test_return_packets;
   utm->bytes_to_send = bytes_to_send;
+  utm->time_to_stop = 0;
 
   setup_signal_handlers ();
   uri_api_hookup (utm);