Horizontal (nSessions) scaling draft

- Data structure preallocation.
- Input state machine fixes for mid-stream 3-way handshake retries.
- Batch connections in the builtin_client
- Multiple private fifo segment support
- Fix elog simultaneous event type registration
- Fix sacks when segment hole is added after highest sacked
- Add "accepting" session state for sessions pending accept
- Add ssvm non-recursive locking
- Estimate RTT for syn-ack
- Don't init fifo pointers. We're using relative offsets for ooo
  segments
- CLI to dump individual session

Change-Id: Ie0598563fd246537bafba4feed7985478ea1d415
Signed-off-by: Dave Barach <dbarach@cisco.com>
Signed-off-by: Florin Coras <fcoras@cisco.com>
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
index 3cc56f3..8a95371 100644
--- a/src/vnet/session/application.c
+++ b/src/vnet/session/application.c
@@ -174,6 +174,8 @@
   props->preallocated_fifo_pairs = options[APP_OPTIONS_PREALLOC_FIFO_PAIRS];
   props->use_private_segment = options[APP_OPTIONS_FLAGS]
     & APP_OPTIONS_FLAGS_BUILTIN_APP;
+  props->private_segment_count = options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT];
+  props->private_segment_size = options[APP_OPTIONS_PRIVATE_SEGMENT_SIZE];
 
   first_seg_size = options[SESSION_OPTIONS_SEGMENT_SIZE];
   if ((rv = segment_manager_init (sm, props, first_seg_size)))
diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c
index 338ae85..566a52d 100644
--- a/src/vnet/session/application_interface.c
+++ b/src/vnet/session/application_interface.c
@@ -275,27 +275,6 @@
   return 0;
 }
 
-session_type_t
-session_type_from_proto_and_ip (session_api_proto_t proto, u8 is_ip4)
-{
-  if (proto == SESSION_PROTO_TCP)
-    {
-      if (is_ip4)
-	return SESSION_TYPE_IP4_TCP;
-      else
-	return SESSION_TYPE_IP6_TCP;
-    }
-  else
-    {
-      if (is_ip4)
-	return SESSION_TYPE_IP4_UDP;
-      else
-	return SESSION_TYPE_IP6_UDP;
-    }
-
-  return SESSION_N_TYPES;
-}
-
 int
 vnet_bind_uri (vnet_bind_args_t * a)
 {
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
index 4d6f9de..ed9f89b 100644
--- a/src/vnet/session/application_interface.h
+++ b/src/vnet/session/application_interface.h
@@ -22,12 +22,6 @@
 #include <vnet/session/application.h>
 #include <vnet/session/transport.h>
 
-typedef enum _session_api_proto
-{
-  SESSION_PROTO_TCP,
-  SESSION_PROTO_UDP
-} session_api_proto_t;
-
 typedef struct _vnet_app_attach_args_t
 {
   /** Binary API client index */
@@ -65,7 +59,7 @@
     struct
     {
       transport_endpoint_t tep;
-      session_api_proto_t proto;
+      transport_proto_t proto;
     };
   };
 
@@ -98,7 +92,7 @@
     struct
     {
       transport_endpoint_t tep;
-      session_api_proto_t proto;
+      transport_proto_t proto;
     };
   };
   u32 app_index;
@@ -120,6 +114,8 @@
   APP_EVT_QUEUE_SIZE,
   APP_OPTIONS_FLAGS,
   APP_OPTIONS_PREALLOC_FIFO_PAIRS,
+  APP_OPTIONS_PRIVATE_SEGMENT_COUNT,
+  APP_OPTIONS_PRIVATE_SEGMENT_SIZE,
   SESSION_OPTIONS_SEGMENT_SIZE,
   SESSION_OPTIONS_ADD_SEGMENT_SIZE,
   SESSION_OPTIONS_RX_FIFO_SIZE,
diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c
index b24f5fd..56e6263 100644
--- a/src/vnet/session/node.c
+++ b/src/vnet/session/node.c
@@ -378,24 +378,12 @@
 					 n_tx_pkts, 0);
 }
 
-stream_session_t *
-session_event_get_session (session_fifo_event_t * e0, u8 thread_index)
+always_inline stream_session_t *
+session_event_get_session (session_fifo_event_t * e, u8 thread_index)
 {
-  svm_fifo_t *f0;
-  stream_session_t *s0;
-  u32 session_index0;
-
-  f0 = e0->fifo;
-  session_index0 = f0->master_session_index;
-
-  /* $$$ add multiple event queues, per vpp worker thread */
-  ASSERT (f0->master_thread_index == thread_index);
-
-  s0 = stream_session_get_if_valid (session_index0, thread_index);
-
-  ASSERT (s0 == 0 || s0->thread_index == thread_index);
-
-  return s0;
+  ASSERT (e->fifo->master_thread_index == thread_index);
+  return stream_session_get_if_valid (e->fifo->master_session_index,
+				      thread_index);
 }
 
 void
@@ -569,7 +557,6 @@
 	case FIFO_EVENT_BUILTIN_RX:
 	  s0 = session_event_get_session (e0, my_thread_index);
 	  svm_fifo_unset_event (s0->server_rx_fifo);
-	  /* Get session's server */
 	  app = application_get (s0->app_index);
 	  app->cb_fns.builtin_server_rx_callback (s0);
 	  break;
diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c
index dcef626..262b7fa 100644
--- a/src/vnet/session/segment_manager.c
+++ b/src/vnet/session/segment_manager.c
@@ -30,7 +30,7 @@
 /**
  * Process private segment index
  */
-u32 private_segment_index = ~0;
+u32 *private_segment_indices;
 
 /**
  * Default fifo and segment size. TODO config.
@@ -70,7 +70,8 @@
       return VNET_API_ERROR_SVM_SEGMENT_CREATE_FAIL;
     }
 
-  vec_add1 (sm->segment_indices, ca->new_segment_index);
+  vec_append (sm->segment_indices, ca->new_segment_indices);
+  vec_free (ca->new_segment_indices);
 
   return 0;
 }
@@ -111,22 +112,23 @@
 {
   svm_fifo_segment_create_args_t _a, *a = &_a;
 
-  if (private_segment_index != ~0)
+  if (private_segment_indices)
     return;
 
   memset (a, 0, sizeof (*a));
   a->segment_name = "process-private-segment";
   a->segment_size = ~0;
-  a->new_segment_index = ~0;
   a->rx_fifo_size = props->rx_fifo_size;
   a->tx_fifo_size = props->tx_fifo_size;
   a->preallocated_fifo_pairs = props->preallocated_fifo_pairs;
+  a->private_segment_count = props->private_segment_count;
+  a->private_segment_size = props->private_segment_size;
 
   if (svm_fifo_segment_create_process_private (a))
     clib_warning ("Failed to create process private segment");
 
-  private_segment_index = a->new_segment_index;
-  ASSERT (private_segment_index != ~0);
+  private_segment_indices = a->new_segment_indices;
+  ASSERT (vec_len (private_segment_indices));
 }
 
 /**
@@ -156,10 +158,10 @@
     }
   else
     {
-      if (private_segment_index == ~0)
+      if (vec_len (private_segment_indices) == 0)
 	segment_manager_alloc_process_private_segment (properties);
-      ASSERT (private_segment_index != ~0);
-      vec_add1 (sm->segment_indices, private_segment_index);
+      ASSERT (vec_len (private_segment_indices));
+      vec_append (sm->segment_indices, private_segment_indices);
     }
 
   clib_spinlock_init (&sm->lockp);
@@ -320,7 +322,7 @@
   /* See if we're supposed to create another segment */
   if (*server_rx_fifo == 0)
     {
-      if (sm->properties->add_segment)
+      if (sm->properties->add_segment && !sm->properties->use_private_segment)
 	{
 	  if (added_a_segment)
 	    {
@@ -379,6 +381,10 @@
   svm_fifo_segment_free_fifo (fifo_segment, tx_fifo,
 			      FIFO_SEGMENT_TX_FREELIST);
 
+  /* Don't try to delete process-private segments */
+  if (sm->properties->private_segment_count > 0)
+    return;
+
   /* Remove segment only if it holds no fifos and not the first */
   if (sm->segment_indices[0] != svm_segment_index
       && !svm_fifo_segment_has_fifos (fifo_segment))
diff --git a/src/vnet/session/segment_manager.h b/src/vnet/session/segment_manager.h
index df38d2b..41abeb2 100644
--- a/src/vnet/session/segment_manager.h
+++ b/src/vnet/session/segment_manager.h
@@ -39,6 +39,10 @@
 
   /** Use private memory segment instead of shared memory */
   u8 use_private_segment;
+
+  /** Use one or more private mheaps, instead of the global heap */
+  u32 private_segment_count;
+  u32 private_segment_size;
 } segment_manager_properties_t;
 
 typedef struct _segment_manager
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index fe19804..0a86d56 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -198,21 +198,28 @@
  */
 stream_session_t *
 stream_session_lookup4 (ip4_address_t * lcl, ip4_address_t * rmt,
-			u16 lcl_port, u16 rmt_port, u8 proto,
-			u32 my_thread_index)
+			u16 lcl_port, u16 rmt_port, u8 proto)
 {
   session_manager_main_t *smm = &session_manager_main;
   session_kv4_t kv4;
+  stream_session_t *s;
   int rv;
 
   /* Lookup session amongst established ones */
   make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto);
   rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4);
   if (rv == 0)
-    return stream_session_get_tsi (kv4.value, my_thread_index);
+    return stream_session_get_from_handle (kv4.value);
 
   /* If nothing is found, check if any listener is available */
-  return stream_session_lookup_listener4 (lcl, lcl_port, proto);
+  if ((s = stream_session_lookup_listener4 (lcl, lcl_port, proto)))
+    return s;
+
+  /* Finally, try half-open connections */
+  rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4);
+  if (rv == 0)
+    return stream_session_get_from_handle (kv4.value);
+  return 0;
 }
 
 stream_session_t *
@@ -242,20 +249,27 @@
  * wildcarded local source (listener bound to all interfaces) */
 stream_session_t *
 stream_session_lookup6 (ip6_address_t * lcl, ip6_address_t * rmt,
-			u16 lcl_port, u16 rmt_port, u8 proto,
-			u32 my_thread_index)
+			u16 lcl_port, u16 rmt_port, u8 proto)
 {
   session_manager_main_t *smm = vnet_get_session_manager_main ();
   session_kv6_t kv6;
+  stream_session_t *s;
   int rv;
 
   make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto);
   rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6);
   if (rv == 0)
-    return stream_session_get_tsi (kv6.value, my_thread_index);
+    return stream_session_get_from_handle (kv6.value);
 
   /* If nothing is found, check if any listener is available */
-  return stream_session_lookup_listener6 (lcl, lcl_port, proto);
+  if ((s = stream_session_lookup_listener6 (lcl, lcl_port, proto)))
+    return s;
+
+  /* Finally, try half-open connections */
+  rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6);
+  if (rv == 0)
+    return stream_session_get_from_handle (kv6.value);
+  return 0;
 }
 
 stream_session_t *
@@ -340,7 +354,6 @@
   rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4);
   if (rv == 0)
     return tp_vfts[proto].get_half_open (kv4.value & 0xFFFFFFFF);
-
   return 0;
 }
 
@@ -390,6 +403,8 @@
   u32 thread_index = tc->thread_index;
   int rv;
 
+  ASSERT (thread_index == vlib_get_thread_index ());
+
   if ((rv = segment_manager_alloc_session_fifos (sm, &server_rx_fifo,
 						 &server_tx_fifo,
 						 &fifo_segment_index)))
@@ -854,6 +869,7 @@
 
   s->app_index = server->index;
   s->listener_index = listener_index;
+  s->session_state = SESSION_STATE_ACCEPTING;
 
   /* Shoulder-tap the server */
   if (notify)
@@ -1088,6 +1104,27 @@
     }
 }
 
+session_type_t
+session_type_from_proto_and_ip (transport_proto_t proto, u8 is_ip4)
+{
+  if (proto == TRANSPORT_PROTO_TCP)
+    {
+      if (is_ip4)
+	return SESSION_TYPE_IP4_TCP;
+      else
+	return SESSION_TYPE_IP6_TCP;
+    }
+  else
+    {
+      if (is_ip4)
+	return SESSION_TYPE_IP4_UDP;
+      else
+	return SESSION_TYPE_IP6_UDP;
+    }
+
+  return SESSION_N_TYPES;
+}
+
 static clib_error_t *
 session_manager_main_enable (vlib_main_t * vm)
 {
@@ -1131,14 +1168,13 @@
     session_vpp_event_queue_allocate (smm, i);
 
   /* $$$$ preallocate hack config parameter */
-  for (i = 0; i < 200000; i++)
+  for (i = 0; i < smm->preallocated_sessions; i++)
     {
-      stream_session_t *ss;
+      stream_session_t *ss __attribute__ ((unused));
       pool_get_aligned (smm->sessions[0], ss, CLIB_CACHE_LINE_BYTES);
-      memset (ss, 0, sizeof (*ss));
     }
 
-  for (i = 0; i < 200000; i++)
+  for (i = 0; i < smm->preallocated_sessions; i++)
     pool_put_index (smm->sessions[0], i);
 
   clib_bihash_init_16_8 (&smm->v4_session_hash, "v4 session table",
@@ -1208,9 +1244,10 @@
   return 0;
 }
 
-VLIB_INIT_FUNCTION (session_manager_main_init)
-     static clib_error_t *session_config_fn (vlib_main_t * vm,
-					     unformat_input_t * input)
+VLIB_INIT_FUNCTION (session_manager_main_init);
+
+static clib_error_t *
+session_config_fn (vlib_main_t * vm, unformat_input_t * input)
 {
   session_manager_main_t *smm = &session_manager_main;
   u32 nitems;
@@ -1224,6 +1261,9 @@
 	  else
 	    clib_warning ("event queue length %d too small, ignored", nitems);
 	}
+      if (unformat (input, "preallocated-sessions %d",
+		    &smm->preallocated_sessions))
+	;
       else
 	return clib_error_return (0, "unknown input `%U'",
 				  format_unformat_error, input);
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index 5fa4225..b4507d4 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -80,6 +80,10 @@
     SESSION_N_TYPES,
 } session_type_t;
 
+
+session_type_t
+session_type_from_proto_and_ip (transport_proto_t proto, u8 is_ip4);
+
 /*
  * Application session state
  */
@@ -87,6 +91,7 @@
 {
   SESSION_STATE_LISTENING,
   SESSION_STATE_CONNECTING,
+  SESSION_STATE_ACCEPTING,
   SESSION_STATE_READY,
   SESSION_STATE_CLOSED,
   SESSION_STATE_N_STATES,
@@ -211,8 +216,12 @@
   /** Per transport rx function that can either dequeue or peek */
   session_fifo_rx_fn *session_tx_fns[SESSION_N_TYPES];
 
+  /** Session manager is enabled */
   u8 is_enabled;
 
+  /** Preallocate session config parameter */
+  u32 preallocated_sessions;
+
   /* Convenience */
   vlib_main_t *vlib_main;
   vnet_main_t *vnet_main;
@@ -247,13 +256,12 @@
 						   u16 lcl_port, u8 proto);
 stream_session_t *stream_session_lookup4 (ip4_address_t * lcl,
 					  ip4_address_t * rmt, u16 lcl_port,
-					  u16 rmt_port, u8 proto,
-					  u32 thread_index);
+					  u16 rmt_port, u8 proto);
 stream_session_t *stream_session_lookup_listener6 (ip6_address_t * lcl,
 						   u16 lcl_port, u8 proto);
 stream_session_t *stream_session_lookup6 (ip6_address_t * lcl,
 					  ip6_address_t * rmt, u16 lcl_port,
-					  u16 rmt_port, u8, u32 thread_index);
+					  u16 rmt_port, u8 proto);
 transport_connection_t
   * stream_session_lookup_transport4 (ip4_address_t * lcl,
 				      ip4_address_t * rmt, u16 lcl_port,
@@ -277,9 +285,24 @@
 			    ti_and_si & 0xFFFFFFFFULL);
 }
 
+always_inline u8
+stream_session_is_valid (u32 si, u8 thread_index)
+{
+  stream_session_t *s;
+  s = pool_elt_at_index (session_manager_main.sessions[thread_index], si);
+  if (s->thread_index != thread_index || s->session_index != si
+      || s->server_rx_fifo->master_session_index != si
+      || s->server_tx_fifo->master_session_index != si
+      || s->server_rx_fifo->master_thread_index != thread_index
+      || s->server_tx_fifo->master_thread_index != thread_index)
+    return 0;
+  return 1;
+}
+
 always_inline stream_session_t *
 stream_session_get (u32 si, u32 thread_index)
 {
+  ASSERT (stream_session_is_valid (si, thread_index));
   return pool_elt_at_index (session_manager_main.sessions[thread_index], si);
 }
 
@@ -292,6 +315,7 @@
   if (pool_is_free_index (session_manager_main.sessions[thread_index], si))
     return 0;
 
+  ASSERT (stream_session_is_valid (si, thread_index));
   return pool_elt_at_index (session_manager_main.sessions[thread_index], si);
 }
 
diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c
index 6b8341a..e06bc58 100755
--- a/src/vnet/session/session_cli.c
+++ b/src/vnet/session/session_cli.c
@@ -47,7 +47,8 @@
 		  svm_fifo_max_enqueue (ss->server_tx_fifo),
 		  stream_session_get_index (ss));
 
-  if (ss->session_state == SESSION_STATE_READY)
+  if (ss->session_state == SESSION_STATE_READY
+      || ss->session_state == SESSION_STATE_ACCEPTING)
     {
       s = format (s, "%U", tp_vft->format_connection, ss->connection_index,
 		  ss->thread_index, verbose);
@@ -68,8 +69,9 @@
     }
   else if (ss->session_state == SESSION_STATE_CLOSED)
     {
-      s = format (s, "[CL] %-40U", tp_vft->format_connection,
-		  ss->connection_index, ss->thread_index, verbose);
+      s =
+	format (s, "[CL] %U", tp_vft->format_connection, ss->connection_index,
+		ss->thread_index, verbose);
       if (verbose == 1)
 	s = format (s, "%v", str);
       if (verbose > 1)
@@ -93,7 +95,13 @@
   int verbose = 0, i;
   stream_session_t *pool;
   stream_session_t *s;
-  u8 *str = 0;
+  u8 *str = 0, one_session = 0, proto_set = 0, proto = 0;
+  u8 is_ip4 = 0, s_type = 0;
+  ip4_address_t lcl_ip4, rmt_ip4;
+  u32 lcl_port = 0, rmt_port = 0;
+
+  memset (&lcl_ip4, 0, sizeof (lcl_ip4));
+  memset (&rmt_ip4, 0, sizeof (rmt_ip4));
 
   if (!smm->is_enabled)
     {
@@ -106,10 +114,43 @@
 	;
       else if (unformat (input, "verbose"))
 	verbose = 1;
+      else if (unformat (input, "tcp"))
+	{
+	  proto_set = 1;
+	  proto = TRANSPORT_PROTO_TCP;
+	}
+      else if (unformat (input, "%U:%d->%U:%d",
+			 unformat_ip4_address, &lcl_ip4, &lcl_port,
+			 unformat_ip4_address, &rmt_ip4, &rmt_port))
+	{
+	  one_session = 1;
+	  is_ip4 = 1;
+	}
+
       else
 	break;
     }
 
+  if (one_session)
+    {
+      if (!proto_set)
+	{
+	  vlib_cli_output (vm, "proto not set");
+	  return clib_error_return (0, "proto not set");
+	}
+
+      s_type = session_type_from_proto_and_ip (proto, is_ip4);
+      s = stream_session_lookup4 (&lcl_ip4, &rmt_ip4,
+				  clib_host_to_net_u16 (lcl_port),
+				  clib_host_to_net_u16 (rmt_port), s_type);
+      if (s)
+	vlib_cli_output (vm, "%U", format_stream_session, s, 2);
+      else
+	vlib_cli_output (vm, "session does not exist");
+
+      return 0;
+    }
+
   for (i = 0; i < vec_len (smm->sessions); i++)
     {
       u32 once_per_pool;
@@ -146,6 +187,7 @@
 	}
       else
 	vlib_cli_output (vm, "Thread %d: no active sessions", i);
+      vec_reset_length (str);
     }
   vec_free (str);
 
@@ -161,15 +203,22 @@
 };
 /* *INDENT-ON* */
 
+static int
+clear_session (stream_session_t * s)
+{
+  application_t *server = application_get (s->app_index);
+  server->cb_fns.session_disconnect_callback (s);
+  return 0;
+}
+
 static clib_error_t *
 clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
 			  vlib_cli_command_t * cmd)
 {
   session_manager_main_t *smm = &session_manager_main;
-  u32 thread_index = 0;
+  u32 thread_index = 0, clear_all = 0;
   u32 session_index = ~0;
-  stream_session_t *pool, *session;
-  application_t *server;
+  stream_session_t **pool, *session;
 
   if (!smm->is_enabled)
     {
@@ -182,28 +231,36 @@
 	;
       else if (unformat (input, "session %d", &session_index))
 	;
+      else if (unformat (input, "all"))
+	clear_all = 1;
       else
 	return clib_error_return (0, "unknown input `%U'",
 				  format_unformat_error, input);
     }
 
-  if (session_index == ~0)
+  if (!clear_all && session_index == ~0)
     return clib_error_return (0, "session <nn> required, but not set.");
 
-  if (thread_index > vec_len (smm->sessions))
-    return clib_error_return (0, "thread %d out of range [0-%d]",
-			      thread_index, vec_len (smm->sessions));
+  if (session_index != ~0)
+    {
+      session = stream_session_get_if_valid (session_index, thread_index);
+      if (!session)
+	return clib_error_return (0, "no session %d on thread %d",
+				  session_index, thread_index);
+      clear_session (session);
+    }
 
-  pool = smm->sessions[thread_index];
-
-  if (pool_is_free_index (pool, session_index))
-    return clib_error_return (0, "session %d not active", session_index);
-
-  session = pool_elt_at_index (pool, session_index);
-  server = application_get (session->app_index);
-
-  /* Disconnect both app and transport */
-  server->cb_fns.session_disconnect_callback (session);
+  if (clear_all)
+    {
+      /* *INDENT-OFF* */
+      vec_foreach (pool, smm->sessions)
+	{
+	  pool_foreach(session, *pool, ({
+	    clear_session (session);
+	  }));
+	};
+      /* *INDENT-ON* */
+    }
 
   return 0;
 }
diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h
index 561a925..9c38bab 100644
--- a/src/vnet/session/transport.h
+++ b/src/vnet/session/transport.h
@@ -225,6 +225,12 @@
 		 t->rmt_port, t->proto);
 }
 
+typedef enum _transport_proto
+{
+  TRANSPORT_PROTO_TCP,
+  TRANSPORT_PROTO_UDP
+} transport_proto_t;
+
 typedef struct _transport_endpoint
 {
   ip46_address_t ip;	/** ip address */
diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c
index 6f8be08..a6c8a23 100644
--- a/src/vnet/tcp/builtin_client.c
+++ b/src/vnet/tcp/builtin_client.c
@@ -170,62 +170,90 @@
 {
   tclient_main_t *tm = &tclient_main;
   int my_thread_index = vlib_get_thread_index ();
-  vl_api_disconnect_session_t *dmp;
   session_t *sp;
   int i;
   int delete_session;
   u32 *connection_indices;
-  u32 tx_quota = 0;
-  u32 delta, prev_bytes_received_this_session;
+  u32 *connections_this_batch;
+  u32 nconnections_this_batch;
 
   connection_indices = tm->connection_index_by_thread[my_thread_index];
+  connections_this_batch =
+    tm->connections_this_batch_by_thread[my_thread_index];
 
-  if (tm->run_test == 0 || vec_len (connection_indices) == 0)
+  if ((tm->run_test == 0) ||
+      ((vec_len (connection_indices) == 0)
+       && vec_len (connections_this_batch) == 0))
     return 0;
 
-  for (i = 0; i < vec_len (connection_indices); i++)
+  /* Grab another pile of connections */
+  if (PREDICT_FALSE (vec_len (connections_this_batch) == 0))
+    {
+      nconnections_this_batch =
+	clib_min (tm->connections_per_batch, vec_len (connection_indices));
+
+      ASSERT (nconnections_this_batch > 0);
+      vec_validate (connections_this_batch, nconnections_this_batch - 1);
+      clib_memcpy (connections_this_batch,
+		   connection_indices + vec_len (connection_indices)
+		   - nconnections_this_batch,
+		   nconnections_this_batch * sizeof (u32));
+      _vec_len (connection_indices) -= nconnections_this_batch;
+    }
+
+  if (PREDICT_FALSE (tm->prev_conns != tm->connections_per_batch
+		     && tm->prev_conns == vec_len (connections_this_batch)))
+    {
+      tm->repeats++;
+      tm->prev_conns = vec_len (connections_this_batch);
+      if (tm->repeats == 500000)
+	{
+	  clib_warning ("stuck clients");
+	}
+    }
+  else
+    {
+      tm->prev_conns = vec_len (connections_this_batch);
+      tm->repeats = 0;
+    }
+
+  for (i = 0; i < vec_len (connections_this_batch); i++)
     {
       delete_session = 1;
 
-      sp = pool_elt_at_index (tm->sessions, connection_indices[i]);
+      sp = pool_elt_at_index (tm->sessions, connections_this_batch[i]);
 
-      if ((tm->no_return || tx_quota < 60) && sp->bytes_to_send > 0)
+      if (sp->bytes_to_send > 0)
 	{
 	  send_test_chunk (tm, sp);
 	  delete_session = 0;
-	  tx_quota++;
 	}
-      if (!tm->no_return && sp->bytes_to_receive > 0)
+      if (sp->bytes_to_receive > 0)
 	{
-	  prev_bytes_received_this_session = sp->bytes_received;
 	  receive_test_chunk (tm, sp);
-	  delta = sp->bytes_received - prev_bytes_received_this_session;
-	  if (delta > 0)
-	    tx_quota--;
 	  delete_session = 0;
 	}
       if (PREDICT_FALSE (delete_session == 1))
 	{
-	  __sync_fetch_and_add (&tm->tx_total, tm->bytes_to_send);
+	  u32 index, thread_index;
+	  stream_session_t *s;
+
+	  __sync_fetch_and_add (&tm->tx_total, sp->bytes_sent);
 	  __sync_fetch_and_add (&tm->rx_total, sp->bytes_received);
 
-	  dmp = vl_msg_api_alloc_as_if_client (sizeof (*dmp));
-	  memset (dmp, 0, sizeof (*dmp));
-	  dmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION);
-	  dmp->client_index = tm->my_client_index;
-	  dmp->handle = sp->vpp_session_handle;
-	  if (!unix_shared_memory_queue_add (tm->vl_input_queue, (u8 *) & dmp,
-					     1))
+	  stream_session_parse_handle (sp->vpp_session_handle,
+				       &index, &thread_index);
+	  s = stream_session_get_if_valid (index, thread_index);
+
+	  if (s)
 	    {
-	      vec_delete (connection_indices, 1, i);
-	      tm->connection_index_by_thread[my_thread_index] =
-		connection_indices;
+	      stream_session_disconnect (s);
+	      vec_delete (connections_this_batch, 1, i);
+	      i--;
 	      __sync_fetch_and_add (&tm->ready_connections, -1);
 	    }
 	  else
-	    {
-	      vl_msg_api_free (dmp);
-	    }
+	    clib_warning ("session AWOL?");
 
 	  /* Kick the debug CLI process */
 	  if (tm->ready_connections == 0)
@@ -236,6 +264,10 @@
 	    }
 	}
     }
+
+  tm->connection_index_by_thread[my_thread_index] = connection_indices;
+  tm->connections_this_batch_by_thread[my_thread_index] =
+    connections_this_batch;
   return 0;
 }
 
@@ -356,6 +388,8 @@
   tm->vlib_main = vm;
 
   vec_validate (tm->connection_index_by_thread, thread_main->n_vlib_mains);
+  vec_validate (tm->connections_this_batch_by_thread,
+		thread_main->n_vlib_mains);
   return 0;
 }
 
@@ -388,7 +422,8 @@
   pool_get (tm->sessions, session);
   memset (session, 0, sizeof (*session));
   session_index = session - tm->sessions;
-  session->bytes_to_receive = session->bytes_to_send = tm->bytes_to_send;
+  session->bytes_to_send = tm->bytes_to_send;
+  session->bytes_to_receive = tm->no_return ? 0ULL : tm->bytes_to_send;
   session->server_rx_fifo = s->server_rx_fifo;
   session->server_rx_fifo->client_session_index = session_index;
   session->server_tx_fifo = s->server_tx_fifo;
@@ -485,6 +520,8 @@
   options[SESSION_OPTIONS_SEGMENT_SIZE] = (2ULL << 32);
   options[SESSION_OPTIONS_RX_FIFO_SIZE] = tm->fifo_size;
   options[SESSION_OPTIONS_TX_FIFO_SIZE] = tm->fifo_size / 2;
+  options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = tm->private_segment_count;
+  options[APP_OPTIONS_PRIVATE_SEGMENT_SIZE] = tm->private_segment_size;
   options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = prealloc_fifos;
 
   options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
@@ -561,6 +598,9 @@
   tm->bytes_to_send = 8192;
   tm->no_return = 0;
   tm->fifo_size = 64 << 10;
+  tm->connections_per_batch = 1000;
+  tm->private_segment_count = 0;
+  tm->private_segment_size = 0;
 
   vec_free (tm->connect_uri);
 
@@ -582,6 +622,20 @@
 	tm->no_return = 1;
       else if (unformat (input, "fifo-size %d", &tm->fifo_size))
 	tm->fifo_size <<= 10;
+      else if (unformat (input, "private-segment-count %d",
+			 &tm->private_segment_count))
+	;
+      else if (unformat (input, "private-segment-size %dm", &tmp))
+	tm->private_segment_size = tmp << 20;
+      else if (unformat (input, "private-segment-size %dg", &tmp))
+	tm->private_segment_size = tmp << 30;
+      else if (unformat (input, "private-segment-size %d", &tmp))
+	tm->private_segment_size = tmp;
+      else if (unformat (input, "preallocate-fifos"))
+	tm->prealloc_fifos = 1;
+      else
+	if (unformat (input, "client-batch %d", &tm->connections_per_batch))
+	;
       else
 	return clib_error_return (0, "unknown input `%U'",
 				  format_unformat_error, input);
@@ -688,9 +742,13 @@
     vlib_cli_output (vm, "zero delta-t?");
 
 cleanup:
-  pool_free (tm->sessions);
+  tm->run_test = 0;
   for (i = 0; i < vec_len (tm->connection_index_by_thread); i++)
-    vec_reset_length (tm->connection_index_by_thread[i]);
+    {
+      vec_reset_length (tm->connection_index_by_thread[i]);
+      vec_reset_length (tm->connections_this_batch_by_thread[i]);
+    }
+  pool_free (tm->sessions);
 
   return 0;
 }
diff --git a/src/vnet/tcp/builtin_client.h b/src/vnet/tcp/builtin_client.h
index 3462e0e..38af231 100644
--- a/src/vnet/tcp/builtin_client.h
+++ b/src/vnet/tcp/builtin_client.h
@@ -63,6 +63,9 @@
   u32 configured_segment_size;
   u32 fifo_size;
   u32 expected_connections;		/**< Number of clients/connections */
+  u32 connections_per_batch;		/**< Connections to rx/tx at once */
+  u32 private_segment_count;		/**< Number of private fifo segs */
+  u32 private_segment_size;		/**< size of private fifo segs */
 
   /*
    * Test state variables
@@ -72,6 +75,7 @@
   uword *session_index_by_vpp_handles;	/**< Hash table for disconnecting */
   u8 *connect_test_data;		/**< Pre-computed test data */
   u32 **connection_index_by_thread;
+  u32 **connections_this_batch_by_thread; /**< active connection batch */
   pthread_t client_thread_handle;
 
   volatile u32 ready_connections;
@@ -82,7 +86,8 @@
 
   f64 test_start_time;
   f64 test_end_time;
-
+  u32 prev_conns;
+  u32 repeats;
   /*
    * Flags
    */
diff --git a/src/vnet/tcp/builtin_server.c b/src/vnet/tcp/builtin_server.c
index 775bfc2..8e958ac 100644
--- a/src/vnet/tcp/builtin_server.c
+++ b/src/vnet/tcp/builtin_server.c
@@ -56,12 +56,15 @@
   u32 fifo_size;		/**< Fifo size */
   u32 rcv_buffer_size;		/**< Rcv buffer size */
   u32 prealloc_fifos;		/**< Preallocate fifos */
+  u32 private_segment_count;	/**< Number of private segments  */
+  u32 private_segment_size;	/**< Size of private segments  */
 
   /*
    * Test state
    */
   u8 **rx_buf;			/**< Per-thread RX buffer */
   u64 byte_index;
+  u32 **rx_retries;
 
   vlib_main_t *vlib_main;
 } builtin_server_main_t;
@@ -77,6 +80,8 @@
     session_manager_get_vpp_event_queue (s->thread_index);
   s->session_state = SESSION_STATE_READY;
   bsm->byte_index = 0;
+  vec_validate (bsm->rx_retries[s->thread_index], s->session_index);
+  bsm->rx_retries[s->thread_index][s->session_index] = 0;
   return 0;
 }
 
@@ -173,11 +178,16 @@
   builtin_server_main_t *bsm = &builtin_server_main;
   session_fifo_event_t evt;
   static int serial_number = 0;
-  u32 my_thread_id = vlib_get_thread_index ();
+  u32 thread_index = vlib_get_thread_index ();
+
+  ASSERT (s->thread_index == thread_index);
 
   rx_fifo = s->server_rx_fifo;
   tx_fifo = s->server_tx_fifo;
 
+  ASSERT (rx_fifo->master_thread_index == thread_index);
+  ASSERT (tx_fifo->master_thread_index == thread_index);
+
   max_dequeue = svm_fifo_max_dequeue (s->server_rx_fifo);
   max_enqueue = svm_fifo_max_enqueue (s->server_tx_fifo);
 
@@ -201,21 +211,31 @@
 	  evt.event_type = FIFO_EVENT_BUILTIN_RX;
 	  evt.event_id = 0;
 
-	  q = bsm->vpp_queue[s->thread_index];
+	  q = bsm->vpp_queue[thread_index];
 	  if (PREDICT_FALSE (q->cursize == q->maxsize))
 	    clib_warning ("out of event queue space");
-	  else
-	    unix_shared_memory_queue_add (q, (u8 *) & evt,
-					  0 /* don't wait for mutex */ );
+	  else if (unix_shared_memory_queue_add (q, (u8 *) & evt, 0	/* don't wait for mutex */
+		   ))
+	    clib_warning ("failed to enqueue self-tap");
+
+	  bsm->rx_retries[thread_index][s->session_index]++;
+	  if (bsm->rx_retries[thread_index][s->session_index] == 500000)
+	    {
+	      clib_warning ("session stuck: %U", format_stream_session, s, 2);
+	    }
+	}
+      else
+	{
+	  bsm->rx_retries[thread_index][s->session_index] = 0;
 	}
 
       return 0;
     }
 
-  _vec_len (bsm->rx_buf[my_thread_id]) = max_transfer;
+  _vec_len (bsm->rx_buf[thread_index]) = max_transfer;
 
   actual_transfer = svm_fifo_dequeue_nowait (rx_fifo, max_transfer,
-					     bsm->rx_buf[my_thread_id]);
+					     bsm->rx_buf[thread_index]);
   ASSERT (actual_transfer == max_transfer);
 
 //  test_bytes (bsm, actual_transfer);
@@ -225,7 +245,7 @@
    */
 
   n_written = svm_fifo_enqueue_nowait (tx_fifo, actual_transfer,
-				       bsm->rx_buf[my_thread_id]);
+				       bsm->rx_buf[thread_index]);
 
   if (n_written != max_transfer)
     clib_warning ("short trout!");
@@ -237,11 +257,13 @@
       evt.event_type = FIFO_EVENT_APP_TX;
       evt.event_id = serial_number++;
 
-      unix_shared_memory_queue_add (bsm->vpp_queue[s->thread_index],
-				    (u8 *) & evt, 0 /* do wait for mutex */ );
+      if (unix_shared_memory_queue_add (bsm->vpp_queue[s->thread_index],
+					(u8 *) & evt,
+					0 /* do wait for mutex */ ))
+	clib_warning ("failed to enqueue tx evt");
     }
 
-  if (PREDICT_FALSE (max_enqueue < max_dequeue))
+  if (PREDICT_FALSE (n_written < max_dequeue))
     goto rx_event;
 
   return 0;
@@ -328,9 +350,13 @@
   a->options[SESSION_OPTIONS_SEGMENT_SIZE] = 512 << 20;
   a->options[SESSION_OPTIONS_RX_FIFO_SIZE] = bsm->fifo_size;
   a->options[SESSION_OPTIONS_TX_FIFO_SIZE] = bsm->fifo_size;
-  a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
+  a->options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = bsm->private_segment_count;
+  a->options[APP_OPTIONS_PRIVATE_SEGMENT_SIZE] = bsm->private_segment_size;
   a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] =
     bsm->prealloc_fifos ? bsm->prealloc_fifos : 1;
+
+  a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_BUILTIN_APP;
+
   a->segment_name = segment_name;
   a->segment_name_length = ARRAY_LEN (segment_name);
 
@@ -374,6 +400,8 @@
   num_threads = 1 /* main thread */  + vtm->n_threads;
   vec_validate (builtin_server_main.vpp_queue, num_threads - 1);
   vec_validate (bsm->rx_buf, num_threads - 1);
+  vec_validate (bsm->rx_retries, num_threads - 1);
+
   for (i = 0; i < num_threads; i++)
     vec_validate (bsm->rx_buf[i], bsm->rcv_buffer_size);
 
@@ -435,11 +463,14 @@
 {
   builtin_server_main_t *bsm = &builtin_server_main;
   int rv;
+  u32 tmp;
 
   bsm->no_echo = 0;
   bsm->fifo_size = 64 << 10;
   bsm->rcv_buffer_size = 128 << 10;
   bsm->prealloc_fifos = 0;
+  bsm->private_segment_count = 0;
+  bsm->private_segment_size = 0;
 
   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
     {
@@ -449,8 +480,17 @@
 	bsm->fifo_size <<= 10;
       else if (unformat (input, "rcv-buf-size %d", &bsm->rcv_buffer_size))
 	;
-      else if (unformat (input, "prealloc-fifos", &bsm->prealloc_fifos))
+      else if (unformat (input, "prealloc-fifos %d", &bsm->prealloc_fifos))
 	;
+      else if (unformat (input, "private-segment-count %d",
+			 &bsm->private_segment_count))
+	;
+      else if (unformat (input, "private-segment-size %dm", &tmp))
+	bsm->private_segment_size = tmp << 20;
+      else if (unformat (input, "private-segment-size %dg", &tmp))
+	bsm->private_segment_size = tmp << 30;
+      else if (unformat (input, "private-segment-size %d", &tmp))
+	bsm->private_segment_size = tmp;
       else
 	return clib_error_return (0, "unknown input `%U'",
 				  format_unformat_error, input);
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 4e85eb3..f379e69 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -74,8 +74,16 @@
 tcp_connection_unbind (u32 listener_index)
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
-  TCP_EVT_DBG (TCP_EVT_UNBIND,
-	       pool_elt_at_index (tm->listener_pool, listener_index));
+  tcp_connection_t *tc;
+
+  tc = pool_elt_at_index (tm->listener_pool, listener_index);
+
+  TCP_EVT_DBG (TCP_EVT_UNBIND, tc);
+
+  /* Poison the entry */
+  if (CLIB_DEBUG > 0)
+    memset (tc, 0xFA, sizeof (*tc));
+
   pool_put_index (tm->listener_pool, listener_index);
 }
 
@@ -124,9 +132,20 @@
 
   /* Check if half-open */
   if (tc->state == TCP_STATE_SYN_SENT)
-    pool_put (tm->half_open_connections, tc);
+    {
+      /* Poison the entry */
+      if (CLIB_DEBUG > 0)
+	memset (tc, 0xFA, sizeof (*tc));
+      pool_put (tm->half_open_connections, tc);
+    }
   else
-    pool_put (tm->connections[tc->c_thread_index], tc);
+    {
+      int thread_index = tc->c_thread_index;
+      /* Poison the entry */
+      if (CLIB_DEBUG > 0)
+	memset (tc, 0xFA, sizeof (*tc));
+      pool_put (tm->connections[thread_index], tc);
+    }
 }
 
 /**
@@ -168,13 +187,14 @@
 
       /* Make sure all timers are cleared */
       tcp_connection_timers_reset (tc);
-
       stream_session_reset_notify (&tc->connection);
+
+      /* Wait for cleanup from session layer but not forever */
+      tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
       break;
     case TCP_STATE_CLOSED:
       return;
     }
-
 }
 
 /**
@@ -278,6 +298,9 @@
   tries = max - min;
   time_now = tcp_time_now ();
 
+  /* Only support active opens from thread 0 */
+  ASSERT (vlib_get_thread_index () == 0);
+
   /* Start at random point or max */
   pool_get (tm->local_endpoints, tep);
   clib_memcpy (&tep->ip, ip, sizeof (*ip));
@@ -343,6 +366,7 @@
     }
 }
 
+#if 0
 typedef struct ip4_tcp_hdr
 {
   ip4_header_t ip;
@@ -435,6 +459,7 @@
 
   tcp_connection_stack_on_fib_entry (tc);
 }
+#endif /* 0 */
 
 /** Initialize tcp connection variables
  *
@@ -447,7 +472,7 @@
   tcp_init_mss (tc);
   scoreboard_init (&tc->sack_sb);
   tcp_cc_init (tc);
-  tcp_connection_fib_attach (tc);
+  //  tcp_connection_fib_attach (tc);
 }
 
 int
@@ -485,14 +510,38 @@
   if (is_ip4)
     {
       ip4_address_t *ip4;
-      ip4 = ip_interface_get_first_ip (sw_if_index, 1);
-      lcl_addr.ip4.as_u32 = ip4->as_u32;
+      int index;
+      if (vec_len (tm->ip4_src_addresses))
+	{
+	  index = tm->last_v4_address_rotor++;
+	  if (tm->last_v4_address_rotor >= vec_len (tm->ip4_src_addresses))
+	    tm->last_v4_address_rotor = 0;
+	  lcl_addr.ip4.as_u32 = tm->ip4_src_addresses[index].as_u32;
+	}
+      else
+	{
+	  ip4 = ip_interface_get_first_ip (sw_if_index, 1);
+	  lcl_addr.ip4.as_u32 = ip4->as_u32;
+	}
     }
   else
     {
       ip6_address_t *ip6;
-      ip6 = ip_interface_get_first_ip (sw_if_index, 0);
-      clib_memcpy (&lcl_addr.ip6, ip6, sizeof (*ip6));
+      int index;
+
+      if (vec_len (tm->ip6_src_addresses))
+	{
+	  index = tm->last_v6_address_rotor++;
+	  if (tm->last_v6_address_rotor >= vec_len (tm->ip6_src_addresses))
+	    tm->last_v6_address_rotor = 0;
+	  clib_memcpy (&lcl_addr.ip6, &tm->ip6_src_addresses[index],
+		       sizeof (*ip6));
+	}
+      else
+	{
+	  ip6 = ip_interface_get_first_ip (sw_if_index, 0);
+	  clib_memcpy (&lcl_addr.ip6, ip6, sizeof (*ip6));
+	}
     }
 
   /* Allocate source port */
@@ -614,7 +663,7 @@
 format_tcp_vars (u8 * s, va_list * args)
 {
   tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
-  s = format (s, " snd_una %u snd_nxt %u snd_una_max %u\n",
+  s = format (s, " snd_una %u snd_nxt %u snd_una_max %u",
 	      tc->snd_una - tc->iss, tc->snd_nxt - tc->iss,
 	      tc->snd_una_max - tc->iss);
   s = format (s, " rcv_nxt %u rcv_las %u\n",
@@ -628,12 +677,17 @@
   s = format (s, " cong %U ", format_tcp_congestion_status, tc);
   s = format (s, "cwnd %u ssthresh %u rtx_bytes %u bytes_acked %u\n",
 	      tc->cwnd, tc->ssthresh, tc->snd_rxt_bytes, tc->bytes_acked);
-  s = format (s, " prev_ssthresh %u snd_congestion %u dupack %u\n",
+  s = format (s, " prev_ssthresh %u snd_congestion %u dupack %u",
 	      tc->prev_ssthresh, tc->snd_congestion - tc->iss,
 	      tc->rcv_dupacks);
+  s = format (s, " limited_transmit %u\n", tc->limited_transmit - tc->iss);
+  s = format (s, " tsecr %u tsecr_last_ack %u\n", tc->rcv_opts.tsecr,
+	      tc->tsecr_last_ack);
   s = format (s, " rto %u rto_boff %u srtt %u rttvar %u rtt_ts %u ", tc->rto,
 	      tc->rto_boff, tc->srtt, tc->rttvar, tc->rtt_ts);
   s = format (s, "rtt_seq %u\n", tc->rtt_seq);
+  s = format (s, " tsval_recent %u tsval_recent_age %u\n", tc->tsval_recent,
+	      tcp_time_now () - tc->tsval_recent_age);
   s = format (s, " scoreboard: %U\n", format_tcp_scoreboard, &tc->sack_sb);
   if (vec_len (tc->snd_sacks))
     s = format (s, " sacks tx: %U\n", format_tcp_sacks, tc);
@@ -719,11 +773,21 @@
   tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
   sack_block_t *sacks = tc->snd_sacks;
   sack_block_t *block;
-  vec_foreach (block, sacks)
-  {
-    s = format (s, " start %u end %u\n", block->start - tc->irs,
-		block->end - tc->irs);
-  }
+  int i, len = 0;
+
+  len = vec_len (sacks);
+  for (i = 0; i < len - 1; i++)
+    {
+      block = &sacks[i];
+      s = format (s, " start %u end %u\n", block->start - tc->irs,
+		  block->end - tc->irs);
+    }
+  if (len)
+    {
+      block = &sacks[len - 1];
+      s = format (s, " start %u end %u", block->start - tc->irs,
+		  block->end - tc->irs);
+    }
   return s;
 }
 
@@ -796,14 +860,18 @@
 always_inline u32
 tcp_round_snd_space (tcp_connection_t * tc, u32 snd_space)
 {
-  if (tc->snd_wnd < tc->snd_mss)
+  if (PREDICT_FALSE (tc->snd_wnd < tc->snd_mss))
     {
       return tc->snd_wnd <= snd_space ? tc->snd_wnd : 0;
     }
 
   /* If we can't write at least a segment, don't try at all */
-  if (snd_space < tc->snd_mss)
-    return 0;
+  if (PREDICT_FALSE (snd_space < tc->snd_mss))
+    {
+      if (snd_space > clib_min (tc->mss, tc->rcv_opts.mss) - TCP_HDR_LEN_MAX)
+	return snd_space;
+      return 0;
+    }
 
   /* round down to mss multiple */
   return snd_space - (snd_space % tc->snd_mss);
@@ -1042,6 +1110,8 @@
   vlib_thread_main_t *vtm = vlib_get_thread_main ();
   clib_error_t *error = 0;
   u32 num_threads;
+  int thread, i;
+  tcp_connection_t *tc __attribute__ ((unused));
 
   if ((error = vlib_call_init_function (vm, ip_main_init)))
     return error;
@@ -1074,6 +1144,27 @@
   num_threads = 1 /* main thread */  + vtm->n_threads;
   vec_validate (tm->connections, num_threads - 1);
 
+  /*
+   * Preallocate connections
+   */
+  for (thread = 0; thread < num_threads; thread++)
+    {
+      for (i = 0; i < tm->preallocated_connections; i++)
+	pool_get (tm->connections[thread], tc);
+
+      for (i = 0; i < tm->preallocated_connections; i++)
+	pool_put_index (tm->connections[thread], i);
+    }
+
+  /*
+   * Preallocate half-open connections
+   */
+  for (i = 0; i < tm->preallocated_half_open_connections; i++)
+    pool_get (tm->half_open_connections, tc);
+
+  for (i = 0; i < tm->preallocated_half_open_connections; i++)
+    pool_put_index (tm->half_open_connections, i);
+
   /* Initialize per worker thread tx buffers (used for control messages) */
   vec_validate (tm->tx_buffers, num_threads - 1);
 
@@ -1116,7 +1207,6 @@
 {
   tcp_main_t *tm = vnet_get_tcp_main ();
 
-  tm->vlib_main = vm;
   tm->vnet_main = vnet_get_main ();
   tm->is_enabled = 0;
 
@@ -1125,6 +1215,97 @@
 
 VLIB_INIT_FUNCTION (tcp_init);
 
+
+static clib_error_t *
+tcp_config_fn (vlib_main_t * vm, unformat_input_t * input)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat
+	  (input, "preallocated-connections %d",
+	   &tm->preallocated_connections))
+	;
+      else if (unformat (input, "preallocated-half-open-connections %d",
+			 &tm->preallocated_half_open_connections))
+	;
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+  return 0;
+}
+
+VLIB_CONFIG_FUNCTION (tcp_config_fn, "tcp");
+
+static clib_error_t *
+tcp_src_address (vlib_main_t * vm,
+		 unformat_input_t * input, vlib_cli_command_t * cmd_arg)
+{
+  tcp_main_t *tm = vnet_get_tcp_main ();
+  ip4_address_t v4start, v4end;
+  ip6_address_t v6start, v6end;
+  int v4set = 0;
+  int v6set = 0;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "%U - %U", unformat_ip4_address, &v4start,
+		    unformat_ip4_address, &v4end))
+	v4set = 1;
+      else if (unformat (input, "%U", unformat_ip4_address, &v4start))
+	{
+	  memcpy (&v4end, &v4start, sizeof (v4start));
+	  v4set = 1;
+	}
+      else if (unformat (input, "%U - %U", unformat_ip6_address, &v6start,
+			 unformat_ip4_address, &v6end))
+	v6set = 1;
+      else if (unformat (input, "%U", unformat_ip6_address, &v6start))
+	{
+	  memcpy (&v6end, &v6start, sizeof (v4start));
+	  v6set = 1;
+	}
+      else
+	break;
+    }
+
+  if (!v4set && !v6set)
+    return clib_error_return (0, "at least one v4 or v6 address required");
+
+  if (v4set)
+    {
+      u32 tmp;
+
+      do
+	{
+	  vec_add1 (tm->ip4_src_addresses, v4start);
+	  tmp = clib_net_to_host_u32 (v4start.as_u32);
+	  tmp++;
+	  v4start.as_u32 = clib_host_to_net_u32 (tmp);
+	}
+      while (clib_host_to_net_u32 (v4start.as_u32) <=
+	     clib_host_to_net_u32 (v4end.as_u32));
+    }
+  if (v6set)
+    {
+      clib_warning ("v6 src address list unimplemented...");
+    }
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (tcp_src_address_command, static) =
+{
+  .path = "tcp src-address",
+  .short_help = "tcp src-address <ip-addr> [- <ip-addr>] add src address range",
+  .function = tcp_src_address,
+};
+/* *INDENT-ON* */
+
+
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 12d804b..37b10fd 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -348,6 +348,16 @@
   /* Flag that indicates if stack is on or off */
   u8 is_enabled;
 
+  /** Number of preallocated connections */
+  u32 preallocated_connections;
+  u32 preallocated_half_open_connections;
+
+  /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
+  ip4_address_t *ip4_src_addresses;
+  u32 last_v4_address_rotor;
+  u32 last_v6_address_rotor;
+  ip6_address_t *ip6_src_addresses;
+
   /* convenience */
   vlib_main_t *vlib_main;
   vnet_main_t *vnet_main;
@@ -569,6 +579,7 @@
 always_inline void
 tcp_timer_set (tcp_connection_t * tc, u8 timer_id, u32 interval)
 {
+  ASSERT (tc->c_thread_index == vlib_get_thread_index ());
   tc->timers[timer_id]
     = tw_timer_start_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index],
 				   tc->c_c_index, timer_id, interval);
@@ -577,6 +588,7 @@
 always_inline void
 tcp_timer_reset (tcp_connection_t * tc, u8 timer_id)
 {
+  ASSERT (tc->c_thread_index == vlib_get_thread_index ());
   if (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID)
     return;
 
@@ -588,6 +600,7 @@
 always_inline void
 tcp_timer_update (tcp_connection_t * tc, u8 timer_id, u32 interval)
 {
+  ASSERT (tc->c_thread_index == vlib_get_thread_index ());
   if (tc->timers[timer_id] != TCP_TIMER_HANDLE_INVALID)
     tw_timer_stop_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index],
 				tc->timers[timer_id]);
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index ae68ad1..be51bca 100755
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -383,9 +383,16 @@
       "establish",                                              	\
     },                                                          	\
   };                                                            	\
-  DECLARE_ETD(_tc, _e, 2);                                      	\
-  ed->data[0] = _timer_id;                                      	\
-  ed->data[1] = _timer_id;                                      	\
+  if (_tc)								\
+    {									\
+      DECLARE_ETD(_tc, _e, 2);                                      	\
+      ed->data[0] = _timer_id;                                      	\
+      ed->data[1] = _timer_id;                                      	\
+    }									\
+  else									\
+    {									\
+      clib_warning ("pop for unexisting connection %d", _tc_index);	\
+    }									\
 }
 
 #define TCP_EVT_SEG_INVALID_HANDLER(_tc, _seq, _end, ...)		\
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index a2e6dad..45db0da 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -251,6 +251,7 @@
   if (tcp_opts_tstamp (&tc->rcv_opts) && tc->tsval_recent
       && seq_leq (seq, tc->rcv_las) && seq_leq (tc->rcv_las, seq_end))
     {
+      ASSERT (timestamp_leq (tc->tsval_recent, tc->rcv_opts.tsval));
       tc->tsval_recent = tc->rcv_opts.tsval;
       tc->tsval_recent_age = tcp_time_now ();
     }
@@ -383,12 +384,9 @@
   if (tc->srtt != 0)
     {
       err = mrtt - tc->srtt;
-//      tc->srtt += err >> 3;
 
       /* XXX Drop in RTT results in RTTVAR increase and bigger RTO.
        * The increase should be bound */
-//      tc->rttvar += ((int) clib_abs (err) - (int) tc->rttvar) >> 2;
-
       tc->srtt = clib_max ((int) tc->srtt + (err >> 3), 1);
       diff = (clib_abs (err) - (int) tc->rttvar) >> 2;
       tc->rttvar = clib_max ((int) tc->rttvar + diff, 1);
@@ -491,6 +489,14 @@
 	  && (prev_snd_wnd == tc->snd_wnd));
 }
 
+static u8
+tcp_is_lost_fin (tcp_connection_t * tc)
+{
+  if ((tc->flags & TCP_CONN_FINSNT) && tc->snd_una_max - tc->snd_una == 1)
+    return 1;
+  return 0;
+}
+
 /**
  * Checks if ack is a congestion control event.
  */
@@ -503,7 +509,7 @@
   *is_dack = tc->sack_sb.last_sacked_bytes
     || tcp_ack_is_dupack (tc, b, prev_snd_wnd, prev_snd_una);
 
-  return (*is_dack || tcp_in_cong_recovery (tc));
+  return ((*is_dack || tcp_in_cong_recovery (tc)) && !tcp_is_lost_fin (tc));
 }
 
 void
@@ -750,10 +756,20 @@
        * last hole end */
       tmp = tc->rcv_opts.sacks[vec_len (tc->rcv_opts.sacks) - 1];
       last_hole = scoreboard_last_hole (sb);
-      if (seq_gt (tc->snd_una_max, sb->high_sacked)
-	  && seq_gt (tc->snd_una_max, last_hole->end))
-	last_hole->end = tc->snd_una_max;
-      /* keep track of max byte sacked for when the last hole
+      if (seq_gt (tc->snd_una_max, last_hole->end))
+	{
+	  if (seq_geq (last_hole->start, sb->high_sacked))
+	    {
+	      last_hole->end = tc->snd_una_max;
+	    }
+	  /* New hole after high sacked block */
+	  else if (seq_lt (sb->high_sacked, tc->snd_una_max))
+	    {
+	      scoreboard_insert_hole (sb, sb->tail, sb->high_sacked,
+				      tc->snd_una_max);
+	    }
+	}
+      /* Keep track of max byte sacked for when the last hole
        * is acked */
       if (seq_gt (tmp.end, sb->high_sacked))
 	sb->high_sacked = tmp.end;
@@ -764,7 +780,6 @@
   while (hole && blk_index < vec_len (tc->rcv_opts.sacks))
     {
       blk = &tc->rcv_opts.sacks[blk_index];
-
       if (seq_leq (blk->start, hole->start))
 	{
 	  /* Block covers hole. Remove hole */
@@ -784,6 +799,7 @@
 		    }
 		  else if (!next_hole)
 		    {
+		      ASSERT (seq_geq (sb->high_sacked, ack));
 		      sb->snd_una_adv = sb->high_sacked - ack;
 		      sb->last_bytes_delivered += sb->high_sacked - hole->end;
 		    }
@@ -819,7 +835,6 @@
 	    {
 	      hole->end = blk->start;
 	    }
-
 	  hole = scoreboard_next_hole (sb, hole);
 	}
     }
@@ -827,10 +842,13 @@
   scoreboard_update_bytes (tc, sb);
   sb->last_sacked_bytes = sb->sacked_bytes
     - (old_sacked_bytes - sb->last_bytes_delivered);
+  ASSERT (sb->last_sacked_bytes <= sb->sacked_bytes);
   ASSERT (sb->sacked_bytes == 0
 	  || sb->sacked_bytes < tc->snd_una_max - seq_max (tc->snd_una, ack));
   ASSERT (sb->last_sacked_bytes + sb->lost_bytes <= tc->snd_una_max
 	  - seq_max (tc->snd_una, ack));
+  ASSERT (sb->head == TCP_INVALID_SACK_HOLE_INDEX || tcp_in_recovery (tc)
+	  || sb->holes[sb->head].start == ack + sb->snd_una_adv);
 }
 
 /**
@@ -916,7 +934,8 @@
 static u8
 tcp_cc_is_spurious_retransmit (tcp_connection_t * tc)
 {
-  return (tc->snd_rxt_ts
+  return (tcp_in_recovery (tc)
+	  && tc->snd_rxt_ts
 	  && tcp_opts_tstamp (&tc->rcv_opts)
 	  && timestamp_lt (tc->rcv_opts.tsecr, tc->snd_rxt_ts));
 }
@@ -994,6 +1013,7 @@
     {
       ASSERT (tc->snd_una != tc->snd_una_max
 	      || tc->sack_sb.last_sacked_bytes);
+
       tc->rcv_dupacks++;
 
       if (tc->rcv_dupacks > TCP_DUPACK_THRESHOLD && !tc->bytes_acked)
@@ -1012,17 +1032,20 @@
 	      goto partial_ack_test;
 	    }
 
-	  /* If of of the two conditions lower hold, reset dupacks
-	   * 1) Cumulative ack does not cover more than congestion threshold,
-	   *    and the following doesn't hold: the congestion window is
-	   *    greater than SMSS bytes and the difference between highest_ack
-	   *    and prev_highest_ack is at most 4*SMSS bytes (XXX)
-	   * 2) RFC6582 heuristic to avoid multiple fast retransmits
+	  /* If of of the two conditions lower hold, reset dupacks because
+	   * we're probably after timeout (RFC6582 heuristics).
+	   * If Cumulative ack does not cover more than congestion threshold,
+	   * and:
+	   * 1) The following doesn't hold: The congestion window is greater
+	   *    than SMSS bytes and the difference between highest_ack
+	   *    and prev_highest_ack is at most 4*SMSS bytes
+	   * 2) Echoed timestamp in the last non-dup ack does not equal the
+	   *    stored timestamp
 	   */
-	  if ((seq_gt (tc->snd_una, tc->snd_congestion)
-	       || !(tc->cwnd > tc->snd_mss
-		    && tc->bytes_acked <= 4 * tc->snd_mss))
-	      || tc->rcv_opts.tsecr != tc->tsecr_last_ack)
+	  if (seq_leq (tc->snd_una, tc->snd_congestion)
+	      && ((!(tc->cwnd > tc->snd_mss
+		     && tc->bytes_acked <= 4 * tc->snd_mss))
+		  || (tc->rcv_opts.tsecr != tc->tsecr_last_ack)))
 	    {
 	      tc->rcv_dupacks = 0;
 	      return;
@@ -1038,6 +1061,7 @@
 	   * three segments that have left the network and should've been
 	   * buffered at the receiver XXX */
 	  tc->cwnd = tc->ssthresh + tc->rcv_dupacks * tc->snd_mss;
+	  ASSERT (tc->cwnd >= tc->snd_mss);
 
 	  /* If cwnd allows, send more data */
 	  if (tcp_opts_sack_permitted (&tc->rcv_opts)
@@ -1112,7 +1136,7 @@
 	  >= tc->sack_sb.last_bytes_delivered);
   rxt_delivered = tc->bytes_acked + tc->sack_sb.snd_una_adv
     - tc->sack_sb.last_bytes_delivered;
-  if (rxt_delivered && seq_gt (tc->sack_sb.high_rxt, tc->snd_una))
+  if (0 && rxt_delivered && seq_gt (tc->sack_sb.high_rxt, tc->snd_una))
     {
       /* If we have sacks and we haven't gotten an ack beyond high_rxt,
        * remove sacked bytes delivered */
@@ -1301,6 +1325,8 @@
 {
   int written;
 
+  ASSERT (seq_geq (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt));
+
   /* Pure ACK. Update rcv_nxt and be done. */
   if (PREDICT_FALSE (data_len == 0))
     {
@@ -1450,6 +1476,7 @@
 	  /* Chop off the bytes in the past */
 	  n_bytes_to_drop = tc->rcv_nxt - vnet_buffer (b)->tcp.seq_number;
 	  n_data_bytes -= n_bytes_to_drop;
+	  vnet_buffer (b)->tcp.seq_number = tc->rcv_nxt;
 	  vlib_buffer_advance (b, n_bytes_to_drop);
 
 	  goto in_order;
@@ -1912,11 +1939,12 @@
 		  goto drop;
 		}
 
-	      stream_session_init_fifos_pointers (&new_tc0->connection,
-						  new_tc0->irs + 1,
-						  new_tc0->iss + 1);
 	      /* Make sure after data segment processing ACK is sent */
 	      new_tc0->flags |= TCP_CONN_SNDACK;
+
+	      /* Update rtt with the syn-ack sample */
+	      new_tc0->bytes_acked = 1;
+	      tcp_update_rtt (new_tc0, vnet_buffer (b0)->tcp.ack_number);
 	    }
 	  /* SYN: Simultaneous open. Change state to SYN-RCVD and send SYN-ACK */
 	  else
@@ -1932,9 +1960,8 @@
 		  goto drop;
 		}
 
-	      stream_session_init_fifos_pointers (&new_tc0->connection,
-						  new_tc0->irs + 1,
-						  new_tc0->iss + 1);
+	      tc0->rtt_ts = 0;
+
 	      tcp_make_synack (new_tc0, b0);
 	      next0 = tcp_next_output (is_ip4);
 
@@ -2151,8 +2178,6 @@
 		<< tc0->rcv_opts.wscale;
 	      tc0->snd_wl1 = vnet_buffer (b0)->tcp.seq_number;
 	      tc0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number;
-
-	      /* Shoulder tap the server */
 	      stream_session_accept_notify (&tc0->connection);
 
 	      /* Reset SYN-ACK retransmit timer */
@@ -2175,6 +2200,7 @@
 	      /* If FIN is ACKed */
 	      if (tc0->snd_una == tc0->snd_una_max)
 		{
+		  ASSERT (tcp_fin (tcp0));
 		  tc0->state = TCP_STATE_FIN_WAIT_2;
 		  /* Stop all timers, 2MSL will be set lower */
 		  tcp_connection_timers_reset (tc0);
@@ -2545,10 +2571,6 @@
 	  tcp_make_synack (child0, b0);
 	  next0 = tcp_next_output (is_ip4);
 
-	  /* Init fifo pointers after we have iss */
-	  stream_session_init_fifos_pointers (&child0->connection,
-					      child0->irs + 1,
-					      child0->iss + 1);
 	drop:
 	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
 	    {
@@ -2886,9 +2908,12 @@
   _(LISTEN, TCP_FLAG_SYN, TCP_INPUT_NEXT_LISTEN, TCP_ERROR_NONE);
   _(LISTEN, TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_NONE);
   _(LISTEN, TCP_FLAG_RST, TCP_INPUT_NEXT_DROP, TCP_ERROR_NONE);
+  _(LISTEN, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET,
+    TCP_ERROR_NONE);
   /* ACK for for a SYN-ACK -> tcp-rcv-process. */
   _(SYN_RCVD, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
   _(SYN_RCVD, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+  _(SYN_RCVD, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
   /* SYN-ACK for a SYN */
   _(SYN_SENT, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_SYN_SENT,
     TCP_ERROR_NONE);
@@ -2905,12 +2930,14 @@
   _(ESTABLISHED, TCP_FLAG_RST, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
   _(ESTABLISHED, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED,
     TCP_ERROR_NONE);
+  _(ESTABLISHED, TCP_FLAG_SYN, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE);
   /* ACK or FIN-ACK to our FIN */
   _(FIN_WAIT_1, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
   _(FIN_WAIT_1, TCP_FLAG_ACK | TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS,
     TCP_ERROR_NONE);
   /* FIN in reply to our FIN from the other side */
   _(FIN_WAIT_1, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+  _(FIN_WAIT_1, TCP_FLAG_RST, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
   /* FIN confirming that the peer (app) has closed */
   _(FIN_WAIT_2, TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
   _(FIN_WAIT_2, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
@@ -2929,6 +2956,8 @@
     TCP_ERROR_NONE);
   _(CLOSED, TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED);
   _(CLOSED, TCP_FLAG_RST, TCP_INPUT_NEXT_DROP, TCP_ERROR_CONNECTION_CLOSED);
+  _(CLOSED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET,
+    TCP_ERROR_CONNECTION_CLOSED);
 #undef _
 }
 
diff --git a/src/vnet/tcp/tcp_newreno.c b/src/vnet/tcp/tcp_newreno.c
index c825e95..103fea4 100644
--- a/src/vnet/tcp/tcp_newreno.c
+++ b/src/vnet/tcp/tcp_newreno.c
@@ -63,8 +63,8 @@
 	   * window deflation" attempts to ensure that, when fast recovery
 	   * eventually ends, approximately ssthresh amount of data will be
 	   * outstanding in the network.*/
-	  tc->cwnd = (tc->cwnd > tc->bytes_acked) ?
-	    tc->cwnd - tc->bytes_acked : 0;
+	  tc->cwnd = (tc->cwnd > tc->bytes_acked + tc->snd_mss) ?
+	    tc->cwnd - tc->bytes_acked : tc->snd_mss;
 	  if (tc->bytes_acked > tc->snd_mss)
 	    tc->cwnd += tc->snd_mss;
 	}
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 41bebcb..b418e8b 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -19,17 +19,20 @@
 vlib_node_registration_t tcp4_output_node;
 vlib_node_registration_t tcp6_output_node;
 
-typedef enum _tcp_output_nect
+typedef enum _tcp_output_next
 {
   TCP_OUTPUT_NEXT_DROP,
+  TCP_OUTPUT_NEXT_IP_LOOKUP,
   TCP_OUTPUT_N_NEXT
 } tcp_output_next_t;
 
 #define foreach_tcp4_output_next              	\
   _ (DROP, "error-drop")                        \
+  _ (IP_LOOKUP, "ip4-lookup")
 
 #define foreach_tcp6_output_next              	\
   _ (DROP, "error-drop")                        \
+  _ (IP_LOOKUP, "ip6-lookup")
 
 static char *tcp_error_strings[] = {
 #define tcp_error(n,s) s,
@@ -427,16 +430,16 @@
 #define tcp_get_free_buffer_index(tm, bidx)                             \
 do {                                                                    \
   u32 *my_tx_buffers, n_free_buffers;                                   \
-  u32 thread_index = vlib_get_thread_index();                             	\
-  my_tx_buffers = tm->tx_buffers[thread_index];                            \
+  u32 thread_index = vlib_get_thread_index();                           \
+  my_tx_buffers = tm->tx_buffers[thread_index];                         \
   if (PREDICT_FALSE(vec_len (my_tx_buffers) == 0))                      \
     {                                                                   \
       n_free_buffers = 32;      /* TODO config or macro */              \
       vec_validate (my_tx_buffers, n_free_buffers - 1);                 \
       _vec_len(my_tx_buffers) = vlib_buffer_alloc_from_free_list (      \
-          tm->vlib_main, my_tx_buffers, n_free_buffers,                 \
+       vlib_get_main(), my_tx_buffers, n_free_buffers,                  \
           VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);                         \
-      tm->tx_buffers[thread_index] = my_tx_buffers;                        \
+      tm->tx_buffers[thread_index] = my_tx_buffers;                     \
     }                                                                   \
   /* buffer shortage */                                                 \
   if (PREDICT_FALSE (vec_len (my_tx_buffers) == 0))                     \
@@ -445,12 +448,12 @@
   _vec_len (my_tx_buffers) -= 1;                                        \
 } while (0)
 
-#define tcp_return_buffer(tm)						\
-do {									\
-  u32 *my_tx_buffers;							\
-  u32 thread_index = vlib_get_thread_index();                             	\
-  my_tx_buffers = tm->tx_buffers[thread_index];                          	\
-  _vec_len (my_tx_buffers) +=1;						\
+#define tcp_return_buffer(tm)                   \
+do {                                            \
+  u32 *my_tx_buffers;                           \
+  u32 thread_index = vlib_get_thread_index();   \
+  my_tx_buffers = tm->tx_buffers[thread_index]; \
+  _vec_len (my_tx_buffers) +=1;                 \
 } while (0)
 
 always_inline void
@@ -757,23 +760,22 @@
 tcp_push_ip_hdr (tcp_main_t * tm, tcp_connection_t * tc, vlib_buffer_t * b)
 {
   tcp_header_t *th = vlib_buffer_get_current (b);
-
+  vlib_main_t *vm = vlib_get_main ();
   if (tc->c_is_ip4)
     {
       ip4_header_t *ih;
-      ih = vlib_buffer_push_ip4 (tm->vlib_main, b, &tc->c_lcl_ip4,
+      ih = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip4,
 				 &tc->c_rmt_ip4, IP_PROTOCOL_TCP);
-      th->checksum = ip4_tcp_udp_compute_checksum (tm->vlib_main, b, ih);
+      th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih);
     }
   else
     {
       ip6_header_t *ih;
       int bogus = ~0;
 
-      ih = vlib_buffer_push_ip6 (tm->vlib_main, b, &tc->c_lcl_ip6,
+      ih = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip6,
 				 &tc->c_rmt_ip6, IP_PROTOCOL_TCP);
-      th->checksum = ip6_tcp_udp_icmp_compute_checksum (tm->vlib_main, b, ih,
-							&bogus);
+      th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih, &bogus);
       ASSERT (!bogus);
     }
 }
@@ -851,6 +853,13 @@
   /* Decide where to send the packet */
   next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
 
+  /* Initialize the trajectory trace, if configured */
+  if (VLIB_BUFFER_TRACE_TRAJECTORY > 0)
+    {
+      b->pre_data[0] = 1;
+      b->pre_data[1] = next_index;
+    }
+
   /* Enqueue the packet */
   f = vlib_get_frame_to_node (vm, next_index);
   to_next = vlib_frame_vector_args (f);
@@ -1144,6 +1153,7 @@
 
       /* Account for the SYN */
       tc->snd_nxt += 1;
+      tc->rtt_ts = 0;
     }
   else
     {
@@ -1232,7 +1242,7 @@
   /* Nothing to send */
   if (n_bytes <= 0)
     {
-      clib_warning ("persist found nothing to send");
+      // clib_warning ("persist found nothing to send");
       tcp_return_buffer (tm);
       return;
     }
@@ -1448,7 +1458,7 @@
 	  tcp_connection_t *tc0;
 	  tcp_tx_trace_t *t0;
 	  tcp_header_t *th0 = 0;
-	  u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_DROP;
+	  u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
 
 	  bi0 = from[0];
 	  to_next[0] = bi0;
@@ -1527,6 +1537,7 @@
 	      tc0->rto_boff = 0;
 	    }
 
+#if 0
 	  /* Make sure we haven't lost route to our peer */
 	  if (PREDICT_FALSE (tc0->last_fib_check
 			     < tc0->snd_opts.tsval + TCP_FIB_RECHECK_PERIOD))
@@ -1547,6 +1558,10 @@
 	  /* Use pre-computed dpo to set next node */
 	  next0 = tc0->c_rmt_dpo.dpoi_next_node;
 	  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = tc0->c_rmt_dpo.dpoi_index;
+#endif
+
+	  vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
 
 	  b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED;
 	done:
diff --git a/src/vnet/tcp/tcp_packet.h b/src/vnet/tcp/tcp_packet.h
index a6f62ee..9ccfe65 100644
--- a/src/vnet/tcp/tcp_packet.h
+++ b/src/vnet/tcp/tcp_packet.h
@@ -168,6 +168,7 @@
 #define TCP_OPTION_LEN_TIMESTAMP        10
 #define TCP_OPTION_LEN_SACK_BLOCK        8
 
+#define TCP_HDR_LEN_MAX			60
 #define TCP_WND_MAX                     65535U
 #define TCP_MAX_WND_SCALE               14	/* See RFC 1323 */
 #define TCP_OPTS_ALIGN                  4
diff --git a/src/vnet/tcp/tcp_test.c b/src/vnet/tcp/tcp_test.c
index a461e3b..510deb4 100644
--- a/src/vnet/tcp/tcp_test.c
+++ b/src/vnet/tcp/tcp_test.c
@@ -290,7 +290,7 @@
 {
   tcp_connection_t _tc, *tc = &_tc;
   sack_block_t *sacks;
-  int i, verbose = 0;
+  int i, verbose = 0, expected;
 
   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
     {
@@ -326,8 +326,12 @@
   sacks = vec_dup (tc->snd_sacks);
 
   tcp_update_sack_list (tc, 1100, 1200);
-  TCP_TEST ((vec_len (tc->snd_sacks) == 5), "sack blocks %d expected %d",
-	    vec_len (tc->snd_sacks), 5);
+  if (verbose)
+    vlib_cli_output (vm, "add new segment [1100, 1200]\n%U",
+		     format_tcp_sacks, tc);
+  expected = 5 < TCP_MAX_SACK_BLOCKS ? 6 : 5;
+  TCP_TEST ((vec_len (tc->snd_sacks) == expected),
+	    "sack blocks %d expected %d", vec_len (tc->snd_sacks), expected);
   TCP_TEST ((tc->snd_sacks[0].start == 1100),
 	    "first sack block start %u expected %u", tc->snd_sacks[0].start,
 	    1100);
diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c
index e6b4f8f..9a8ff07 100644
--- a/src/vnet/udp/udp_input.c
+++ b/src/vnet/udp/udp_input.c
@@ -123,7 +123,7 @@
 	  /* lookup session */
 	  s0 = stream_session_lookup4 (&ip0->dst_address, &ip0->src_address,
 				       udp0->dst_port, udp0->src_port,
-				       SESSION_TYPE_IP4_UDP, my_thread_index);
+				       SESSION_TYPE_IP4_UDP);
 
 	  /* no listener */
 	  if (PREDICT_FALSE (s0 == 0))