Cleanup URI code and TCP bugfixing

- Add CLI/API to enable session layer, by default it's disabled
- Improve rcv wnd computation
- Improvements to tx path
- URI code cleanup
- Builtin test tcp server
- Improve src port allocation

Change-Id: I2ace498e76a0771d4c31a8075cc14fe33d7dfa38
Signed-off-by: Florin Coras <fcoras@cisco.com>
diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h
index 5e65ac7..74d39bd 100644
--- a/src/vnet/api_errno.h
+++ b/src/vnet/api_errno.h
@@ -103,7 +103,8 @@
 _(BFD_EAGAIN, -111, "BFD object cannot be manipulated at this time")	\
 _(INVALID_GPE_MODE, -112, "Invalid GPE mode")                           \
 _(LISP_GPE_ENTRIES_PRESENT, -113, "LISP GPE entries are present")       \
-_(ADDRESS_FOUND_FOR_INTERFACE, -114, "Address found for interface")
+_(ADDRESS_FOUND_FOR_INTERFACE, -114, "Address found for interface")	\
+_(SESSION_CONNECT_FAIL, -115, "Session failed to connect")
 
 typedef enum
 {
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
index a561e7d..a542eeb 100644
--- a/src/vnet/session/application.c
+++ b/src/vnet/session/application.c
@@ -154,6 +154,15 @@
   return pool_elt_at_index (app_pool, index);
 }
 
+application_t *
+application_get_if_valid (u32 index)
+{
+  if (pool_is_free_index (app_pool, index))
+    return 0;
+
+  return pool_elt_at_index (app_pool, index);
+}
+
 u32
 application_get_index (application_t * app)
 {
@@ -209,7 +218,7 @@
 
   regp = vl_api_client_index_to_registration (srv->api_client_index);
   if (!regp)
-    server_name = format (0, "%s%c", regp->name, 0);
+    server_name = format (0, "builtin-%d%c", srv->index, 0);
   else
     server_name = regp->name;
 
@@ -269,11 +278,17 @@
 show_app_command_fn (vlib_main_t * vm, unformat_input_t * input,
 		     vlib_cli_command_t * cmd)
 {
+  session_manager_main_t *smm = &session_manager_main;
   application_t *app;
   int do_server = 0;
   int do_client = 0;
   int verbose = 0;
 
+  if (!smm->is_enabled)
+    {
+      clib_error_return (0, "session layer is not enabled");
+    }
+
   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
     {
       if (unformat (input, "server"))
@@ -323,16 +338,20 @@
           /* *INDENT-ON* */
 	}
       else
-	vlib_cli_output (vm, "No active server bindings");
+	vlib_cli_output (vm, "No active client bindings");
     }
 
   return 0;
 }
 
+/* *INDENT-OFF* */
 VLIB_CLI_COMMAND (show_app_command, static) =
 {
-.path = "show app",.short_help =
-    "show app [server|client] [verbose]",.function = show_app_command_fn,};
+  .path = "show app",
+  .short_help = "show app [server|client] [verbose]",
+  .function = show_app_command_fn,
+};
+/* *INDENT-ON* */
 
 /*
  * fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h
index 027d696..480828f 100644
--- a/src/vnet/session/application.h
+++ b/src/vnet/session/application.h
@@ -100,6 +100,7 @@
 				session_cb_vft_t * cb_fns);
 void application_del (application_t * app);
 application_t *application_get (u32 index);
+application_t *application_get_if_valid (u32 index);
 application_t *application_lookup (u32 api_client_index);
 u32 application_get_index (application_t * app);
 
diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c
index 0ea77fd..6ddfb70 100644
--- a/src/vnet/session/application_interface.c
+++ b/src/vnet/session/application_interface.c
@@ -51,7 +51,7 @@
       prefix.fp_proto = FIB_PROTOCOL_IP6;
     }
 
-  clib_memcpy (&prefix.fp_addr, ip46_address, sizeof (ip46_address));
+  clib_memcpy (&prefix.fp_addr, ip46_address, sizeof (ip46_address_t));
   fei = fib_table_lookup (0, &prefix);
   flags = fib_entry_get_flags (fei);
 
@@ -186,9 +186,7 @@
   /*
    * Not connecting to a local server. Create regular session
    */
-  stream_session_open (sst, ip46, port, app->index);
-
-  return 0;
+  return stream_session_open (sst, ip46, port, app->index);
 }
 
 /**
diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c
index e467f4e..399077d 100644
--- a/src/vnet/session/node.c
+++ b/src/vnet/session/node.c
@@ -104,9 +104,13 @@
   snd_space0 = transport_vft->send_space (tc0);
   snd_mss0 = transport_vft->send_mss (tc0);
 
+  /* Can't make any progress */
   if (snd_space0 == 0 || svm_fifo_max_dequeue (s0->server_tx_fifo) == 0
       || snd_mss0 == 0)
-    return 0;
+    {
+      vec_add1 (smm->evts_partially_read[thread_index], *e0);
+      return 0;
+    }
 
   ASSERT (e0->enqueue_length > 0);
 
@@ -143,7 +147,12 @@
 	  if (PREDICT_FALSE (n_bufs < 0.9 * VLIB_FRAME_SIZE))
 	    {
 	      /* Keep track of how much we've dequeued and exit */
-	      e0->enqueue_length -= max_len_to_snd0 - left_to_snd0;
+	      if (left_to_snd0 != max_len_to_snd0)
+		{
+		  e0->enqueue_length -= max_len_to_snd0 - left_to_snd0;
+		  vec_add1 (smm->evts_partially_read[thread_index], *e0);
+		}
+
 	      return -1;
 	    }
 
@@ -185,12 +194,13 @@
 	      t0->server_thread_index = s0->thread_index;
 	    }
 
+	  /* *INDENT-OFF* */
 	  if (1)
 	    {
-	      ELOG_TYPE_DECLARE (e) =
-	      {
-	      .format = "evt-dequeue: id %d length %d",.format_args =
-		  "i4i4",};
+	      ELOG_TYPE_DECLARE (e) = {
+		  .format = "evt-dequeue: id %d length %d",
+		  .format_args = "i4i4",
+	      };
 	      struct
 	      {
 		u32 data[2];
@@ -199,6 +209,7 @@
 	      ed->data[0] = e0->event_id;
 	      ed->data[1] = e0->enqueue_length;
 	    }
+	  /* *INDENT-ON* */
 
 	  len_to_deq0 = (left_to_snd0 < snd_mss0) ? left_to_snd0 : snd_mss0;
 
@@ -289,7 +300,7 @@
 {
   session_manager_main_t *smm = vnet_get_session_manager_main ();
   session_fifo_event_t *my_fifo_events, *e;
-  u32 n_to_dequeue;
+  u32 n_to_dequeue, n_events;
   unix_shared_memory_queue_t *q;
   int n_tx_packets = 0;
   u32 my_thread_index = vm->cpu_index;
@@ -309,14 +320,16 @@
 
   /* min number of events we can dequeue without blocking */
   n_to_dequeue = q->cursize;
-  if (n_to_dequeue == 0)
-    return 0;
-
   my_fifo_events = smm->fifo_events[my_thread_index];
 
-  /* If we didn't manage to process previous events try going
+  if (n_to_dequeue == 0 && vec_len (my_fifo_events) == 0)
+    return 0;
+
+  /*
+   * If we didn't manage to process previous events try going
    * over them again without dequeuing new ones.
-   * XXX: Block senders to sessions that can't keep up */
+   */
+  /* XXX: Block senders to sessions that can't keep up */
   if (vec_len (my_fifo_events) >= 100)
     goto skip_dequeue;
 
@@ -338,8 +351,8 @@
   smm->fifo_events[my_thread_index] = my_fifo_events;
 
 skip_dequeue:
-
-  for (i = 0; i < n_to_dequeue; i++)
+  n_events = vec_len (my_fifo_events);
+  for (i = 0; i < n_events; i++)
     {
       svm_fifo_t *f0;		/* $$$ prefetch 1 ahead maybe */
       stream_session_t *s0;
@@ -354,8 +367,13 @@
       /* $$$ add multiple event queues, per vpp worker thread */
       ASSERT (server_thread_index0 == my_thread_index);
 
-      s0 = pool_elt_at_index (smm->sessions[my_thread_index],
-			      server_session_index0);
+      s0 = stream_session_get_if_valid (server_session_index0,
+					my_thread_index);
+      if (!s0)
+	{
+	  clib_warning ("It's dead Jim!");
+	  continue;
+	}
 
       ASSERT (s0->thread_index == my_thread_index);
 
@@ -380,11 +398,11 @@
 done:
 
   /* Couldn't process all events. Probably out of buffers */
-  if (PREDICT_FALSE (i < n_to_dequeue))
+  if (PREDICT_FALSE (i < n_events))
     {
       session_fifo_event_t *partially_read =
 	smm->evts_partially_read[my_thread_index];
-      vec_add (partially_read, &my_fifo_events[i], n_to_dequeue - i);
+      vec_add (partially_read, &my_fifo_events[i], n_events - i);
       vec_free (my_fifo_events);
       smm->fifo_events[my_thread_index] = partially_read;
       smm->evts_partially_read[my_thread_index] = 0;
@@ -413,8 +431,7 @@
   .n_errors = ARRAY_LEN (session_queue_error_strings),
   .error_strings = session_queue_error_strings,
   .n_next_nodes = SESSION_QUEUE_N_NEXT,
-  /* .state = VLIB_NODE_STATE_DISABLED, enable on-demand? */
-  /* edit / add dispositions here */
+  .state = VLIB_NODE_STATE_DISABLED,
   .next_nodes =
   {
       [SESSION_QUEUE_NEXT_DROP] = "error-drop",
diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api
index a7b28c1..582765b 100644
--- a/src/vnet/session/session.api
+++ b/src/vnet/session/session.api
@@ -422,6 +422,28 @@
   i32 retval;
   u64 handle;
 };
+
+/** \brief enable/disable session layer
+    @param client_index - opaque cookie to identify the sender
+                          client to vpp direction only
+    @param context - sender context, to match reply w/ request
+    @param is_enable - disable session layer if 0, enable otherwise
+*/
+define session_enable_disable {
+  u32 client_index;
+  u32 context;
+  u8 is_enable;
+};
+
+/** \brief Reply for session enable/disable
+    @param context - returned sender context, to match reply w/ request
+    @param retval - return code
+*/
+define session_enable_disable_reply {
+  u32 context;
+  i32 retval;
+};
+
 /*
  * Local Variables:
  * eval: (c-set-style "gnu")
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 539da61..422527e 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -311,11 +311,11 @@
 }
 
 transport_connection_t *
-stream_session_lookup_transport4 (session_manager_main_t * smm,
-				  ip4_address_t * lcl, ip4_address_t * rmt,
+stream_session_lookup_transport4 (ip4_address_t * lcl, ip4_address_t * rmt,
 				  u16 lcl_port, u16 rmt_port, u8 proto,
 				  u32 my_thread_index)
 {
+  session_manager_main_t *smm = &session_manager_main;
   session_kv4_t kv4;
   stream_session_t *s;
   int rv;
@@ -345,11 +345,11 @@
 }
 
 transport_connection_t *
-stream_session_lookup_transport6 (session_manager_main_t * smm,
-				  ip6_address_t * lcl, ip6_address_t * rmt,
+stream_session_lookup_transport6 (ip6_address_t * lcl, ip6_address_t * rmt,
 				  u16 lcl_port, u16 rmt_port, u8 proto,
 				  u32 my_thread_index)
 {
+  session_manager_main_t *smm = &session_manager_main;
   stream_session_t *s;
   session_kv6_t kv6;
   int rv;
@@ -554,7 +554,7 @@
 					u8 * added_a_segment)
 {
   svm_fifo_segment_private_t *fifo_segment;
-  u32 fifo_size, default_fifo_size = 8192 /* TODO config */ ;
+  u32 fifo_size, default_fifo_size = 128 << 10;	/* TODO config */
   int i;
 
   *added_a_segment = 0;
@@ -948,7 +948,7 @@
 connects_session_manager_init (session_manager_main_t * smm, u8 session_type)
 {
   session_manager_t *sm;
-  u32 connect_fifo_size = 8 << 10;	/* Config? */
+  u32 connect_fifo_size = 256 << 10;	/* Config? */
   u32 default_segment_size = 1 << 20;
 
   pool_get (smm->session_managers, sm);
@@ -1055,10 +1055,15 @@
   svm_fifo_segment_free_fifo (fifo_segment, s->server_rx_fifo);
   svm_fifo_segment_free_fifo (fifo_segment, s->server_tx_fifo);
 
-  /* Cleanup app if client */
-  app = application_get (s->app_index);
+  app = application_get_if_valid (s->app_index);
+
+  /* No app. A possibility: after disconnect application called unbind */
+  if (!app)
+    return;
+
   if (app->mode == APP_CLIENT)
     {
+      /* Cleanup app if client */
       application_del (app);
     }
   else if (app->mode == APP_SERVER)
@@ -1068,6 +1073,7 @@
       svm_fifo_t **fifos;
       u32 fifo_index;
 
+      /* For server, see if any segments can be removed */
       sm = session_manager_get (app->session_manager_index);
 
       /* Delete fifo */
@@ -1096,10 +1102,10 @@
 {
   stream_session_t *s;
 
+  /* App might've been removed already */
   s = stream_session_get_if_valid (tc->s_index, tc->thread_index);
   if (!s)
     {
-      clib_warning ("Surprised!");
       return;
     }
   stream_session_delete (s);
@@ -1151,16 +1157,24 @@
   return 0;
 }
 
-void
+int
 stream_session_open (u8 sst, ip46_address_t * addr, u16 port_host_byte_order,
 		     u32 app_index)
 {
   transport_connection_t *tc;
   u32 tci;
   u64 value;
+  int rv;
 
   /* Ask transport to open connection */
-  tci = tp_vfts[sst].open (addr, port_host_byte_order);
+  rv = tp_vfts[sst].open (addr, port_host_byte_order);
+  if (rv < 0)
+    {
+      clib_warning ("Transport failed to open connection.");
+      return VNET_API_ERROR_SESSION_CONNECT_FAIL;
+    }
+
+  tci = rv;
 
   /* Get transport connection */
   tc = tp_vfts[sst].get_half_open (tci);
@@ -1170,6 +1184,8 @@
 
   /* Add to the half-open lookup table */
   stream_session_half_open_table_add (sst, tc, value);
+
+  return 0;
 }
 
 /**
@@ -1216,16 +1232,13 @@
 }
 
 static clib_error_t *
-session_manager_main_init (vlib_main_t * vm)
+session_manager_main_enable (vlib_main_t * vm)
 {
-  u32 num_threads;
-  vlib_thread_main_t *vtm = vlib_get_thread_main ();
   session_manager_main_t *smm = &session_manager_main;
+  vlib_thread_main_t *vtm = vlib_get_thread_main ();
+  u32 num_threads;
   int i;
 
-  smm->vlib_main = vm;
-  smm->vnet_main = vnet_get_main ();
-
   num_threads = 1 /* main thread */  + vtm->n_threads;
 
   if (num_threads < 1)
@@ -1272,11 +1285,48 @@
   for (i = 0; i < SESSION_N_TYPES; i++)
     smm->connect_manager_index[i] = INVALID_INDEX;
 
+  smm->is_enabled = 1;
+
   return 0;
 }
 
-VLIB_INIT_FUNCTION (session_manager_main_init);
+clib_error_t *
+vnet_session_enable_disable (vlib_main_t * vm, u8 is_en)
+{
+  if (is_en)
+    {
+      if (session_manager_main.is_enabled)
+	return 0;
 
+      vlib_node_set_state (vm, session_queue_node.index,
+			   VLIB_NODE_STATE_POLLING);
+
+      return session_manager_main_enable (vm);
+    }
+  else
+    {
+      session_manager_main.is_enabled = 0;
+      vlib_node_set_state (vm, session_queue_node.index,
+			   VLIB_NODE_STATE_DISABLED);
+    }
+
+  return 0;
+}
+
+
+clib_error_t *
+session_manager_main_init (vlib_main_t * vm)
+{
+  session_manager_main_t *smm = &session_manager_main;
+
+  smm->vlib_main = vm;
+  smm->vnet_main = vnet_get_main ();
+  smm->is_enabled = 0;
+
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (session_manager_main_init)
 /*
  * fd.io coding-style-patch-verification: ON
  *
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index cf14cca..46e5ce2 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -213,12 +213,15 @@
   /** Per transport rx function that can either dequeue or peek */
   session_fifo_rx_fn *session_rx_fns[SESSION_N_TYPES];
 
+  u8 is_enabled;
+
   /* Convenience */
   vlib_main_t *vlib_main;
   vnet_main_t *vnet_main;
 };
 
 extern session_manager_main_t session_manager_main;
+extern vlib_node_registration_t session_queue_node;
 
 /*
  * Session manager function
@@ -276,14 +279,12 @@
 					  ip6_address_t * rmt, u16 lcl_port,
 					  u16 rmt_port, u8, u32 thread_index);
 transport_connection_t
-  * stream_session_lookup_transport4 (session_manager_main_t * smm,
-				      ip4_address_t * lcl,
+  * stream_session_lookup_transport4 (ip4_address_t * lcl,
 				      ip4_address_t * rmt, u16 lcl_port,
 				      u16 rmt_port, u8 proto,
 				      u32 thread_index);
 transport_connection_t
-  * stream_session_lookup_transport6 (session_manager_main_t * smm,
-				      ip6_address_t * lcl,
+  * stream_session_lookup_transport6 (ip6_address_t * lcl,
 				      ip6_address_t * rmt, u16 lcl_port,
 				      u16 rmt_port, u8 proto,
 				      u32 thread_index);
@@ -338,6 +339,14 @@
   return svm_fifo_max_enqueue (s->server_rx_fifo);
 }
 
+always_inline u32
+stream_session_fifo_size (transport_connection_t * tc)
+{
+  stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index);
+  return s->server_rx_fifo->nitems;
+}
+
+
 int
 stream_session_enqueue_data (transport_connection_t * tc, u8 * data, u16 len,
 			     u8 queue_event);
@@ -356,8 +365,8 @@
 int
 stream_session_accept (transport_connection_t * tc, u32 listener_index,
 		       u8 sst, u8 notify);
-void stream_session_open (u8 sst, ip46_address_t * addr,
-			  u16 port_host_byte_order, u32 api_client_index);
+int stream_session_open (u8 sst, ip46_address_t * addr,
+			 u16 port_host_byte_order, u32 api_client_index);
 void stream_session_disconnect (stream_session_t * s);
 void stream_session_cleanup (stream_session_t * s);
 int
@@ -369,6 +378,8 @@
 void session_register_transport (u8 type, const transport_proto_vft_t * vft);
 transport_proto_vft_t *session_get_transport_vft (u8 type);
 
+clib_error_t *vnet_session_enable_disable (vlib_main_t * vm, u8 is_en);
+
 #endif /* __included_session_h__ */
 
 /*
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
index 9d06868..8852fc6 100644
--- a/src/vnet/session/session_api.c
+++ b/src/vnet/session/session_api.c
@@ -52,6 +52,8 @@
 _(DISCONNECT_SOCK_REPLY, disconnect_sock_reply)                        	\
 _(ACCEPT_SOCK_REPLY, accept_sock_reply)                           	\
 _(RESET_SOCK_REPLY, reset_sock_reply)                   		\
+_(SESSION_ENABLE_DISABLE, session_enable_disable)                   	\
+
 
 static int
 send_add_segment_callback (u32 api_client_index, const u8 * segment_name,
@@ -146,7 +148,6 @@
   mp = vl_msg_api_alloc (sizeof (*mp));
   mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_CONNECT_URI_REPLY);
   mp->context = app->api_context;
-  mp->retval = is_fail;
   if (!is_fail)
     {
       vpp_queue = session_manager_get_vpp_event_queue (s->thread_index);
@@ -157,6 +158,7 @@
       mp->session_type = s->session_type;
       mp->vpp_event_queue_address = (u64) vpp_queue;
       mp->client_event_queue_address = (u64) app->event_queue;
+      mp->retval = 0;
 
       session_manager_get_segment_info (s->server_segment_index, &seg_name,
 					&mp->segment_size);
@@ -164,12 +166,22 @@
       if (mp->segment_name_length)
 	clib_memcpy (mp->segment_name, seg_name, mp->segment_name_length);
     }
+  else
+    {
+      mp->retval = VNET_API_ERROR_SESSION_CONNECT_FAIL;
+    }
 
   vl_msg_api_send_shmem (q, (u8 *) & mp);
 
   /* Remove client if connect failed */
   if (is_fail)
-    application_del (app);
+    {
+      application_del (app);
+    }
+  else
+    {
+      s->session_state = SESSION_STATE_READY;
+    }
 
   return 0;
 }
@@ -432,6 +444,17 @@
 }
 
 static void
+vl_api_session_enable_disable_t_handler (vl_api_session_enable_disable_t * mp)
+{
+  vl_api_session_enable_disable_reply_t *rmp;
+  vlib_main_t *vm = vlib_get_main ();
+  int rv = 0;
+
+  vnet_session_enable_disable (vm, mp->is_enable);
+  REPLY_MACRO (VL_API_SESSION_ENABLE_DISABLE_REPLY);
+}
+
+static void
 vl_api_bind_uri_t_handler (vl_api_bind_uri_t * mp)
 {
   vl_api_bind_uri_reply_t *rmp;
@@ -476,7 +499,6 @@
       }
   }));
   /* *INDENT-ON* */
-
 }
 
 static void
@@ -493,7 +515,9 @@
 static void
 vl_api_connect_uri_t_handler (vl_api_connect_uri_t * mp)
 {
+  vl_api_connect_uri_reply_t *rmp;
   vnet_connect_args_t _a, *a = &_a;
+  int rv;
 
   a->uri = (char *) mp->uri;
   a->api_client_index = mp->client_index;
@@ -501,7 +525,19 @@
   a->options = mp->options;
   a->session_cb_vft = &uri_session_cb_vft;
   a->mp = mp;
-  vnet_connect_uri (a);
+
+  rv = vnet_connect_uri (a);
+
+  if (rv == 0 || rv == VNET_CONNECT_REDIRECTED)
+    return;
+
+  /* Got some error, relay it */
+
+  /* *INDENT-OFF* */
+  REPLY_MACRO2 (VL_API_CONNECT_URI_REPLY, ({
+    rmp->retval = rv;
+  }));
+  /* *INDENT-ON* */
 }
 
 static void
@@ -662,7 +698,9 @@
 static void
 vl_api_connect_sock_t_handler (vl_api_connect_sock_t * mp)
 {
+  vl_api_connect_sock_reply_t *rmp;
   vnet_connect_args_t _a, *a = &_a;
+  int rv;
 
   clib_memcpy (&a->tep.ip, mp->ip,
 	       (mp->is_ip4 ? sizeof (ip4_address_t) :
@@ -675,7 +713,18 @@
   a->api_context = mp->context;
   a->mp = mp;
 
-  vnet_connect (a);
+  rv = vnet_connect (a);
+
+  if (rv == 0 || rv == VNET_CONNECT_REDIRECTED)
+    return;
+
+  /* Got some error, relay it */
+
+  /* *INDENT-OFF* */
+  REPLY_MACRO2 (VL_API_CONNECT_URI_REPLY, ({
+    rmp->retval = rv;
+  }));
+  /* *INDENT-ON* */
 }
 
 static void
diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c
index b2943a1..b029ee6 100644
--- a/src/vnet/session/session_cli.c
+++ b/src/vnet/session/session_cli.c
@@ -60,7 +60,7 @@
     }
   else
     {
-      clib_warning ("Session in unknown state!");
+      clib_warning ("Session in state: %d!", ss->session_state);
     }
 
   vec_free (str);
@@ -78,6 +78,11 @@
   stream_session_t *s;
   u8 *str = 0;
 
+  if (!smm->is_enabled)
+    {
+      clib_error_return (0, "session layer is not enabled");
+    }
+
   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
     {
       if (unformat (input, "verbose"))
@@ -126,11 +131,14 @@
   return 0;
 }
 
-VLIB_CLI_COMMAND (show_uri_command, static) =
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_session_command, static) =
 {
-.path = "show session",.short_help = "show session [verbose]",.function =
-    show_session_command_fn,};
-
+  .path = "show session",
+  .short_help = "show session [verbose]",
+  .function = show_session_command_fn,
+};
+/* *INDENT-ON* */
 
 static clib_error_t *
 clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -142,6 +150,11 @@
   stream_session_t *pool, *session;
   application_t *server;
 
+  if (!smm->is_enabled)
+    {
+      clib_error_return (0, "session layer is not enabled");
+    }
+
   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
     {
       if (unformat (input, "thread %d", &thread_index))
@@ -174,11 +187,43 @@
   return 0;
 }
 
-VLIB_CLI_COMMAND (clear_uri_session_command, static) =
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_session_command, static) =
 {
-.path = "clear session",.short_help =
-    "clear session thread <thread> session <index>",.function =
-    clear_session_command_fn,};
+  .path = "clear session",
+  .short_help = "clear session thread <thread> session <index>",
+  .function = clear_session_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+session_enable_disable_fn (vlib_main_t * vm, unformat_input_t * input,
+			   vlib_cli_command_t * cmd)
+{
+  u8 is_en = 1;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "enable"))
+	is_en = 1;
+      else if (unformat (input, "disable"))
+	is_en = 0;
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+
+  return vnet_session_enable_disable (vm, is_en);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (session_enable_disable_command, static) =
+{
+  .path = "session",
+  .short_help = "session [enable|disable]",
+  .function = session_enable_disable_fn,
+};
+/* *INDENT-ON* */
 
 /*
  * fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/tcp/builtin_server.c b/src/vnet/tcp/builtin_server.c
new file mode 100644
index 0000000..be65642
--- /dev/null
+++ b/src/vnet/tcp/builtin_server.c
@@ -0,0 +1,135 @@
+/*
+* Copyright (c) 2015-2017 Cisco and/or its affiliates.
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at:
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+#include <vnet/session/application.h>
+#include <vnet/session/application_interface.h>
+
+int
+builtin_session_accept_callback (stream_session_t * s)
+{
+  clib_warning ("called...");
+  s->session_state = SESSION_STATE_READY;
+  return 0;
+}
+
+void
+builtin_session_disconnect_callback (stream_session_t * s)
+{
+  clib_warning ("called...");
+}
+
+int
+builtin_session_connected_callback (u32 client_index,
+				    stream_session_t * s, u8 is_fail)
+{
+  clib_warning ("called...");
+  return -1;
+}
+
+int
+builtin_add_segment_callback (u32 client_index,
+			      const u8 * seg_name, u32 seg_size)
+{
+  clib_warning ("called...");
+  return -1;
+}
+
+int
+builtin_redirect_connect_callback (u32 client_index, void *mp)
+{
+  clib_warning ("called...");
+  return -1;
+}
+
+int
+builtin_server_rx_callback (stream_session_t * s)
+{
+  clib_warning ("called...");
+  return 0;
+}
+
+static session_cb_vft_t builtin_session_cb_vft = {
+  .session_accept_callback = builtin_session_accept_callback,
+  .session_disconnect_callback = builtin_session_disconnect_callback,
+  .session_connected_callback = builtin_session_connected_callback,
+  .add_segment_callback = builtin_add_segment_callback,
+  .redirect_connect_callback = builtin_redirect_connect_callback,
+  .builtin_server_rx_callback = builtin_server_rx_callback
+};
+
+static int
+server_create (vlib_main_t * vm)
+{
+  vnet_bind_args_t _a, *a = &_a;
+  u64 options[SESSION_OPTIONS_N_OPTIONS];
+  char segment_name[128];
+
+  memset (a, 0, sizeof (*a));
+  memset (options, 0, sizeof (options));
+
+  a->uri = "tcp://0.0.0.0/80";
+  a->api_client_index = ~0;
+  a->session_cb_vft = &builtin_session_cb_vft;
+  a->options = options;
+  a->options[SESSION_OPTIONS_SEGMENT_SIZE] = 256 << 10;
+  a->options[SESSION_OPTIONS_RX_FIFO_SIZE] = 64 << 10;
+  a->options[SESSION_OPTIONS_TX_FIFO_SIZE] = 64 << 10;
+  a->segment_name = segment_name;
+  a->segment_name_length = ARRAY_LEN (segment_name);
+
+  return vnet_bind_uri (a);
+}
+
+static clib_error_t *
+server_create_command_fn (vlib_main_t * vm,
+			  unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  int rv;
+#if 0
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "whatever %d", &whatever))
+	;
+      else
+	return clib_error_return (0, "unknown input `%U'",
+				  format_unformat_error, input);
+    }
+#endif
+
+  rv = server_create (vm);
+  switch (rv)
+    {
+    case 0:
+      break;
+    default:
+      return clib_error_return (0, "server_create returned %d", rv);
+    }
+  return 0;
+}
+
+VLIB_CLI_COMMAND (server_create_command, static) =
+{
+.path = "test server",.short_help = "test server",.function =
+    server_create_command_fn,};
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 0f9b709..e5feaeb 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -217,6 +217,7 @@
   return 0;
 }
 
+#define PORT_MASK ((1 << 16)- 1)
 /**
  * Allocate local port and add if successful add entry to local endpoint
  * table to mark the pair as used.
@@ -224,7 +225,6 @@
 u16
 tcp_allocate_local_port (tcp_main_t * tm, ip46_address_t * ip)
 {
-  u8 unique = 0;
   transport_endpoint_t *tep;
   u32 time_now, tei;
   u16 min = 1024, max = 65535, tries;	/* XXX configurable ? */
@@ -235,37 +235,34 @@
   /* Start at random point or max */
   pool_get (tm->local_endpoints, tep);
   clib_memcpy (&tep->ip, ip, sizeof (*ip));
-  tep->port = random_u32 (&time_now) << 16;
-  tep->port = tep->port < min ? max : tep->port;
 
   /* Search for first free slot */
-  while (tries)
+  for (; tries >= 0; tries--)
     {
-      tei = transport_endpoint_lookup (&tm->local_endpoints_table, &tep->ip,
-				       tep->port);
-      if (tei == TRANSPORT_ENDPOINT_INVALID_INDEX)
+      u16 port = 0;
+
+      /* Find a port in the specified range */
+      while (1)
 	{
-	  unique = 1;
-	  break;
+	  port = random_u32 (&time_now) & PORT_MASK;
+	  if (PREDICT_TRUE (port >= min && port < max))
+	    break;
 	}
 
-      tep->port--;
+      tep->port = port;
 
-      if (tep->port < min)
-	tep->port = max;
-
-      tries--;
+      /* Look it up */
+      tei = transport_endpoint_lookup (&tm->local_endpoints_table, &tep->ip,
+				       tep->port);
+      /* If not found, we're done */
+      if (tei == TRANSPORT_ENDPOINT_INVALID_INDEX)
+	{
+	  transport_endpoint_table_add (&tm->local_endpoints_table, tep,
+					tep - tm->local_endpoints);
+	  return tep->port;
+	}
     }
-
-  if (unique)
-    {
-      transport_endpoint_table_add (&tm->local_endpoints_table, tep,
-				    tep - tm->local_endpoints);
-
-      return tep->port;
-    }
-
-  /* Failed */
+  /* No free ports */
   pool_put (tm->local_endpoints, tep);
   return -1;
 }
@@ -360,7 +357,10 @@
   /* Allocate source port */
   lcl_port = tcp_allocate_local_port (tm, &lcl_addr);
   if (lcl_port < 1)
-    return -1;
+    {
+      clib_warning ("Failed to allocate src port");
+      return -1;
+    }
 
   /*
    * Create connection and send SYN
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 22f00a6..3560509 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -30,7 +30,8 @@
 #define TCP_MAX_OPTION_SPACE 40
 
 #define TCP_DUPACK_THRESHOLD 3
-#define TCP_DEFAULT_RX_FIFO_SIZE 64 << 10
+#define TCP_MAX_RX_FIFO_SIZE 2 << 20
+#define TCP_IW_N_SEGMENTS 10
 
 /** TCP FSM state definitions as per RFC793. */
 #define foreach_tcp_fsm_state   \
@@ -590,7 +591,6 @@
 /**
  * Push TCP header to buffer
  *
- * @param vm - vlib_main
  * @param b - buffer to write the header to
  * @param sp_net - source port net order
  * @param dp_net - destination port net order
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index daa0683..0a907d0 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -711,7 +711,7 @@
   if (tcp_opts_sack_permitted (&tc->opt))
     tcp_rcv_sacks (tc, vnet_buffer (b)->tcp.ack_number);
 
-  new_snd_wnd = clib_net_to_host_u32 (th->window) << tc->snd_wscale;
+  new_snd_wnd = clib_net_to_host_u16 (th->window) << tc->snd_wscale;
 
   if (tcp_ack_is_dupack (tc, b, new_snd_wnd))
     {
@@ -1320,7 +1320,6 @@
 
 	  /* Parse options */
 	  tcp_options_parse (tcp0, &new_tc0->opt);
-	  tcp_connection_init_vars (new_tc0);
 
 	  if (tcp_opts_tstamp (&new_tc0->opt))
 	    {
@@ -1331,11 +1330,13 @@
 	  if (tcp_opts_wscale (&new_tc0->opt))
 	    new_tc0->snd_wscale = new_tc0->opt.wscale;
 
-	  new_tc0->snd_wnd = clib_net_to_host_u32 (tcp0->window)
-	    << new_tc0->snd_wscale;
+	  /* No scaling */
+	  new_tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window);
 	  new_tc0->snd_wl1 = seq0;
 	  new_tc0->snd_wl2 = ack0;
 
+	  tcp_connection_init_vars (new_tc0);
+
 	  /* SYN-ACK: See if we can switch to ESTABLISHED state */
 	  if (tcp_ack (tcp0))
 	    {
@@ -1345,6 +1346,9 @@
 	      new_tc0->snd_una = ack0;
 	      new_tc0->state = TCP_STATE_ESTABLISHED;
 
+	      /* Make sure las is initialized for the wnd computation */
+	      new_tc0->rcv_las = new_tc0->rcv_nxt;
+
 	      /* Notify app that we have connection */
 	      stream_session_connect_notify (&new_tc0->connection, sst, 0);
 
@@ -1575,7 +1579,7 @@
 
 	      /* Initialize session variables */
 	      tc0->snd_una = vnet_buffer (b0)->tcp.ack_number;
-	      tc0->snd_wnd = clib_net_to_host_u32 (tcp0->window)
+	      tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window)
 		<< tc0->opt.wscale;
 	      tc0->snd_wl1 = vnet_buffer (b0)->tcp.seq_number;
 	      tc0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number;
@@ -1899,7 +1903,6 @@
 	    }
 
 	  tcp_options_parse (th0, &child0->opt);
-	  tcp_connection_init_vars (child0);
 
 	  child0->irs = vnet_buffer (b0)->tcp.seq_number;
 	  child0->rcv_nxt = vnet_buffer (b0)->tcp.seq_number + 1;
@@ -1913,6 +1916,16 @@
 	      child0->tsval_recent_age = tcp_time_now ();
 	    }
 
+	  if (tcp_opts_wscale (&child0->opt))
+	    child0->snd_wscale = child0->opt.wscale;
+
+	  /* No scaling */
+	  child0->snd_wnd = clib_net_to_host_u16 (th0->window);
+	  child0->snd_wl1 = vnet_buffer (b0)->tcp.seq_number;
+	  child0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number;
+
+	  tcp_connection_init_vars (child0);
+
 	  /* Reuse buffer to make syn-ack and send */
 	  tcp_make_synack (child0, b0);
 	  next0 = tcp_next_output (is_ip4);
@@ -1923,7 +1936,7 @@
 
 	    }
 
-	  b0->error = error0 ? node->errors[error0] : 0;
+	  b0->error = node->errors[error0];
 
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, next0);
@@ -2069,7 +2082,6 @@
   u32 n_left_from, next_index, *from, *to_next;
   u32 my_thread_index = vm->cpu_index;
   tcp_main_t *tm = vnet_get_tcp_main ();
-  session_manager_main_t *ssm = vnet_get_session_manager_main ();
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -2109,26 +2121,26 @@
 
 	      /* lookup session */
 	      tc0 =
-		(tcp_connection_t *) stream_session_lookup_transport4 (ssm,
-								       &ip40->dst_address,
-								       &ip40->src_address,
-								       tcp0->dst_port,
-								       tcp0->src_port,
-								       SESSION_TYPE_IP4_TCP,
-								       my_thread_index);
+		(tcp_connection_t *)
+		stream_session_lookup_transport4 (&ip40->dst_address,
+						  &ip40->src_address,
+						  tcp0->dst_port,
+						  tcp0->src_port,
+						  SESSION_TYPE_IP4_TCP,
+						  my_thread_index);
 	    }
 	  else
 	    {
 	      ip60 = vlib_buffer_get_current (b0);
 	      tcp0 = ip6_next_header (ip60);
 	      tc0 =
-		(tcp_connection_t *) stream_session_lookup_transport6 (ssm,
-								       &ip60->src_address,
-								       &ip60->dst_address,
-								       tcp0->src_port,
-								       tcp0->dst_port,
-								       SESSION_TYPE_IP6_TCP,
-								       my_thread_index);
+		(tcp_connection_t *)
+		stream_session_lookup_transport6 (&ip60->src_address,
+						  &ip60->dst_address,
+						  tcp0->src_port,
+						  tcp0->dst_port,
+						  SESSION_TYPE_IP6_TCP,
+						  my_thread_index);
 	    }
 
 	  /* Session exists */
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index dbcf1f7..7e431cd 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -91,24 +91,30 @@
 }
 
 /**
+ * TCP's IW as recommended by RFC6928
+ */
+always_inline u32
+tcp_initial_wnd_unscaled (tcp_connection_t * tc)
+{
+  return TCP_IW_N_SEGMENTS * dummy_mtu;
+}
+
+/**
  * Compute initial window and scale factor. As per RFC1323, window field in
  * SYN and SYN-ACK segments is never scaled.
  */
 u32
 tcp_initial_window_to_advertise (tcp_connection_t * tc)
 {
-  u32 available_space;
+  u32 max_fifo;
 
   /* Initial wnd for SYN. Fifos are not allocated yet.
-   * Use some predefined value */
-  if (tc->state != TCP_STATE_SYN_RCVD)
-    {
-      return TCP_DEFAULT_RX_FIFO_SIZE;
-    }
+   * Use some predefined value. For SYN-ACK we still want the
+   * scale to be computed in the same way */
+  max_fifo = TCP_MAX_RX_FIFO_SIZE;
 
-  available_space = stream_session_max_enqueue (&tc->connection);
-  tc->rcv_wscale = tcp_window_compute_scale (available_space);
-  tc->rcv_wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale);
+  tc->rcv_wscale = tcp_window_compute_scale (max_fifo);
+  tc->rcv_wnd = tcp_initial_wnd_unscaled (tc);
 
   return clib_min (tc->rcv_wnd, TCP_WND_MAX);
 }
@@ -119,23 +125,43 @@
 u32
 tcp_window_to_advertise (tcp_connection_t * tc, tcp_state_t state)
 {
-  u32 available_space, wnd, scaled_space;
+  u32 available_space, max_fifo, observed_wnd;
 
-  if (state != TCP_STATE_ESTABLISHED)
+  if (state < TCP_STATE_ESTABLISHED)
     return tcp_initial_window_to_advertise (tc);
 
+  /*
+   * Figure out how much space we have available
+   */
   available_space = stream_session_max_enqueue (&tc->connection);
-  scaled_space = available_space >> tc->rcv_wscale;
+  max_fifo = stream_session_fifo_size (&tc->connection);
 
-  /* Need to update scale */
-  if (PREDICT_FALSE ((scaled_space == 0 && available_space != 0))
-      || (scaled_space >= TCP_WND_MAX))
-    tc->rcv_wscale = tcp_window_compute_scale (available_space);
+  ASSERT (tc->opt.mss < max_fifo);
 
-  wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale);
-  tc->rcv_wnd = wnd;
+  if (available_space < tc->opt.mss && available_space < max_fifo / 8)
+    available_space = 0;
 
-  return wnd >> tc->rcv_wscale;
+  /*
+   * Use the above and what we know about what we've previously advertised
+   * to compute the new window
+   */
+  observed_wnd = tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
+
+  /* Bad. Thou shalt not shrink */
+  if (available_space < observed_wnd)
+    {
+      if (available_space == 0)
+	clib_warning ("Didn't shrink rcv window despite not having space");
+    }
+
+  tc->rcv_wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale);
+
+  if (tc->rcv_wnd == 0)
+    {
+      tc->flags |= TCP_CONN_SENT_RCV_WND0;
+    }
+
+  return tc->rcv_wnd >> tc->rcv_wscale;
 }
 
 /**
@@ -225,7 +251,7 @@
 }
 
 always_inline int
-tcp_make_syn_options (tcp_options_t * opts, u32 initial_wnd)
+tcp_make_syn_options (tcp_options_t * opts, u8 wnd_scale)
 {
   u8 len = 0;
 
@@ -234,7 +260,7 @@
   len += TCP_OPTION_LEN_MSS;
 
   opts->flags |= TCP_OPTS_FLAG_WSCALE;
-  opts->wscale = tcp_window_compute_scale (initial_wnd);
+  opts->wscale = wnd_scale;
   len += TCP_OPTION_LEN_WINDOW_SCALE;
 
   opts->flags |= TCP_OPTS_FLAG_TSTAMP;
@@ -327,8 +353,7 @@
     case TCP_STATE_SYN_RCVD:
       return tcp_make_synack_options (tc, opts);
     case TCP_STATE_SYN_SENT:
-      return tcp_make_syn_options (opts,
-				   tcp_initial_window_to_advertise (tc));
+      return tcp_make_syn_options (opts, tc->rcv_wscale);
     default:
       clib_warning ("Not handled!");
       return 0;
@@ -732,7 +757,7 @@
 
   /* Make and write options */
   memset (&snd_opts, 0, sizeof (snd_opts));
-  tcp_opts_len = tcp_make_syn_options (&snd_opts, initial_wnd);
+  tcp_opts_len = tcp_make_syn_options (&snd_opts, tc->rcv_wscale);
   tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
 
   th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
@@ -900,7 +925,7 @@
 
   tcp_reuse_buffer (vm, b);
 
-  ASSERT (tc->state == TCP_STATE_ESTABLISHED);
+  ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
   ASSERT (max_bytes != 0);
 
   if (tcp_opts_sack_permitted (&tc->opt))
@@ -929,7 +954,6 @@
 				       max_bytes);
   ASSERT (n_bytes != 0);
 
-  tc->snd_nxt += n_bytes;
   tcp_push_hdr_i (tc, b, tc->state);
 
   return n_bytes;
@@ -967,7 +991,7 @@
   tcp_get_free_buffer_index (tm, &bi);
   b = vlib_get_buffer (vm, bi);
 
-  if (tc->state == TCP_STATE_ESTABLISHED)
+  if (tc->state >= TCP_STATE_ESTABLISHED)
     {
       tcp_fastrecovery_off (tc);
 
@@ -977,6 +1001,12 @@
       /* Figure out what and how many bytes we can send */
       snd_space = tcp_available_snd_space (tc);
       max_bytes = clib_min (tc->snd_mss, snd_space);
+
+      if (max_bytes == 0)
+	{
+	  clib_warning ("no wnd to retransmit");
+	  return;
+	}
       tcp_prepare_retransmit_segment (tc, b, max_bytes);
 
       tc->rtx_bytes += max_bytes;
@@ -996,7 +1026,11 @@
 	tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
 
       vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
+
       tcp_push_hdr_i (tc, b, tc->state);
+
+      /* Account for the SYN */
+      tc->snd_nxt += 1;
     }
 
   if (!is_syn)
@@ -1163,8 +1197,8 @@
 	  if (PREDICT_FALSE
 	      (vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK))
 	    {
+	      ASSERT (tc0->snt_dupacks > 0);
 	      tc0->snt_dupacks--;
-	      ASSERT (tc0->snt_dupacks >= 0);
 	      if (!tcp_session_has_ooo_data (tc0))
 		{
 		  error0 = TCP_ERROR_FILTERED_DUPACKS;