Implement sack based tcp loss recovery (RFC 6675)

- refactor existing congestion control code (RFC 6582/5681). Handling of ack
  feedback now consists of: ack parsing, cc event detection, event handling,
  congestion control update
- extend sack scoreboard to support sack based retransmissions
- basic implementation of Eifel detection algorithm (RFC 3522) for
  detecting spurious retransmissions
- actually initialize the per-thread frame freelist hash tables
- increase worker stack size to 2mb
- fix session queue node out-of-buffer handling
  - ensure that the local buffer cache vec_len matches reality
  - avoid 2x spurious event requeues when short of buffers
  - count out-of-buffer events
- make the builtin server thread-safe
- fix bihash template threading issue: need to paint -1 across uninitialized
  working_copy_length vector elements (via rebase from master)

Change-Id: I646cb9f1add9a67d08f4a87badbcb117980ebfc4
Signed-off-by: Florin Coras <fcoras@cisco.com>
Signed-off-by: Dave Barach <dbarach@cisco.com>
diff --git a/src/vnet/tcp/builtin_server.c b/src/vnet/tcp/builtin_server.c
index 4f0e211..8bd2f36 100644
--- a/src/vnet/tcp/builtin_server.c
+++ b/src/vnet/tcp/builtin_server.c
@@ -39,7 +39,8 @@
 
 typedef struct
 {
-  u8 *rx_buf;
+  /* Per-thread RX buffer */
+  u8 **rx_buf;
   unix_shared_memory_queue_t **vpp_queue;
   u64 byte_index;
 
@@ -117,13 +118,15 @@
 test_bytes (builtin_server_main_t * bsm, int actual_transfer)
 {
   int i;
+  u32 my_thread_id = vlib_get_thread_index ();
 
   for (i = 0; i < actual_transfer; i++)
     {
-      if (bsm->rx_buf[i] != ((bsm->byte_index + i) & 0xff))
+      if (bsm->rx_buf[my_thread_id][i] != ((bsm->byte_index + i) & 0xff))
 	{
 	  clib_warning ("at %lld expected %d got %d", bsm->byte_index + i,
-			(bsm->byte_index + i) & 0xff, bsm->rx_buf[i]);
+			(bsm->byte_index + i) & 0xff,
+			bsm->rx_buf[my_thread_id][i]);
 	}
     }
   bsm->byte_index += actual_transfer;
@@ -138,6 +141,7 @@
   builtin_server_main_t *bsm = &builtin_server_main;
   session_fifo_event_t evt;
   static int serial_number = 0;
+  u32 my_thread_id = vlib_get_thread_index ();
 
   tx_fifo = s->server_tx_fifo;
   rx_fifo = s->server_rx_fifo;
@@ -171,11 +175,12 @@
       return 0;
     }
 
-  vec_validate (bsm->rx_buf, max_transfer - 1);
-  _vec_len (bsm->rx_buf) = max_transfer;
+  vec_validate (bsm->rx_buf, my_thread_id);
+  vec_validate (bsm->rx_buf[my_thread_id], max_transfer - 1);
+  _vec_len (bsm->rx_buf[my_thread_id]) = max_transfer;
 
   actual_transfer = svm_fifo_dequeue_nowait (rx_fifo, max_transfer,
-					     bsm->rx_buf);
+					     bsm->rx_buf[my_thread_id]);
   ASSERT (actual_transfer == max_transfer);
 
 //  test_bytes (bsm, actual_transfer);
@@ -184,7 +189,8 @@
    * Echo back
    */
 
-  n_written = svm_fifo_enqueue_nowait (tx_fifo, actual_transfer, bsm->rx_buf);
+  n_written = svm_fifo_enqueue_nowait (tx_fifo, actual_transfer,
+				       bsm->rx_buf[my_thread_id]);
 
   if (n_written != max_transfer)
     clib_warning ("short trout!");