session: fix multi-buffer segments

Change-Id: I0e22c85ea570b934b9c78dc5e86d86d690bdae5e
Signed-off-by: Florin Coras <fcoras@cisco.com>
diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c
index 9c5b17d..fac2b85 100644
--- a/src/vnet/session/session_node.c
+++ b/src/vnet/session/session_node.c
@@ -139,7 +139,7 @@
 {
   u32 n_trace = vlib_get_trace_count (vm, node);
   u32 left_to_snd0, max_len_to_snd0, len_to_deq0, snd_space0;
-  u32 n_bufs_per_evt, n_frames_per_evt;
+  u32 n_bufs_per_evt, n_frames_per_evt, n_bufs_per_frame;
   transport_connection_t *tc0;
   transport_proto_vft_t *transport_vft;
   u32 next_index, next0, *to_next, n_left_to_next, bi0;
@@ -148,7 +148,7 @@
   u16 snd_mss0, n_bufs_per_seg, n_bufs;
   u8 *data0;
   int i, n_bytes_read;
-  u32 n_bytes_per_buf, deq_per_buf;
+  u32 n_bytes_per_buf, deq_per_buf, deq_per_first_buf;
   u32 buffers_allocated, buffers_allocated_this_call;
 
   next_index = next0 = session_type_to_next[s0->session_type];
@@ -198,51 +198,56 @@
 
   n_bytes_per_buf = vlib_buffer_free_list_buffer_size
     (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+  ASSERT (n_bytes_per_buf > MAX_HDRS_LEN);
   n_bytes_per_seg = MAX_HDRS_LEN + snd_mss0;
   n_bufs_per_seg = ceil ((double) n_bytes_per_seg / n_bytes_per_buf);
-  n_bufs_per_evt = (ceil ((double) max_len_to_snd0 / n_bytes_per_seg))
-    * n_bufs_per_seg;
+  n_bufs_per_evt = ceil ((double) max_len_to_snd0 / n_bytes_per_seg);
   n_frames_per_evt = ceil ((double) n_bufs_per_evt / VLIB_FRAME_SIZE);
+  n_bufs_per_frame = n_bufs_per_seg * VLIB_FRAME_SIZE;
 
   deq_per_buf = clib_min (snd_mss0, n_bytes_per_buf);
+  deq_per_first_buf = clib_min (snd_mss0, n_bytes_per_buf - MAX_HDRS_LEN);
 
   n_bufs = vec_len (smm->tx_buffers[thread_index]);
   left_to_snd0 = max_len_to_snd0;
   for (i = 0; i < n_frames_per_evt; i++)
     {
       /* Make sure we have at least one full frame of buffers ready */
-      if (PREDICT_FALSE (n_bufs < VLIB_FRAME_SIZE))
+      if (PREDICT_FALSE (n_bufs < n_bufs_per_frame))
 	{
 	  vec_validate (smm->tx_buffers[thread_index],
-			n_bufs + 2 * VLIB_FRAME_SIZE - 1);
-
+			n_bufs + n_bufs_per_frame - 1);
 	  buffers_allocated = 0;
 	  do
 	    {
-	      buffers_allocated_this_call =
-		vlib_buffer_alloc
-		(vm,
-		 &smm->tx_buffers[thread_index][n_bufs + buffers_allocated],
-		 2 * VLIB_FRAME_SIZE - buffers_allocated);
+	      buffers_allocated_this_call = vlib_buffer_alloc (vm,
+							       &smm->tx_buffers
+							       [thread_index]
+							       [n_bufs +
+								buffers_allocated],
+							       n_bufs_per_frame
+							       -
+							       buffers_allocated);
 	      buffers_allocated += buffers_allocated_this_call;
 	    }
 	  while (buffers_allocated_this_call > 0
-		 && ((buffers_allocated + n_bufs < VLIB_FRAME_SIZE)));
+		 && ((buffers_allocated + n_bufs < n_bufs_per_frame)));
 
 	  n_bufs += buffers_allocated;
 	  _vec_len (smm->tx_buffers[thread_index]) = n_bufs;
 
-	  if (PREDICT_FALSE (n_bufs < VLIB_FRAME_SIZE))
+	  if (PREDICT_FALSE (n_bufs < n_bufs_per_frame))
 	    {
 	      vec_add1 (smm->pending_event_vector[thread_index], *e0);
 	      return -1;
 	    }
+	  ASSERT (n_bufs >= n_bufs_per_frame);
 	}
       /* Allow enqueuing of a new event */
       svm_fifo_unset_event (s0->server_tx_fifo);
 
       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-      while (left_to_snd0 && n_left_to_next >= n_bufs_per_seg)
+      while (left_to_snd0 && n_left_to_next)
 	{
 	  /*
 	   * Handle first buffer in chain separately
@@ -251,7 +256,6 @@
 	  /* Get free buffer */
 	  ASSERT (n_bufs >= 1);
 	  bi0 = smm->tx_buffers[thread_index][--n_bufs];
-	  ASSERT (bi0);
 	  _vec_len (smm->tx_buffers[thread_index]) = n_bufs;
 
 	  /* usual speculation, or the enqueue_x1 macro will barf */
@@ -266,8 +270,7 @@
 	  b0->current_data = 0;
 	  b0->total_length_not_including_first_buffer = 0;
 
-	  len_to_deq0 = clib_min (left_to_snd0, deq_per_buf);
-
+	  len_to_deq0 = clib_min (left_to_snd0, deq_per_first_buf);
 	  data0 = vlib_buffer_make_headroom (b0, MAX_HDRS_LEN);
 	  if (peek_data)
 	    {
@@ -294,7 +297,7 @@
 	  /*
 	   * Fill in the remaining buffers in the chain, if any
 	   */
-	  if (PREDICT_FALSE (n_bufs_per_seg > 1))
+	  if (PREDICT_FALSE (n_bufs_per_seg > 1 && left_to_snd0))
 	    {
 	      u32 left_for_seg;
 	      left_for_seg = clib_min (snd_mss0 - n_bytes_read, left_to_snd0);
diff --git a/src/vnet/tcp/builtin_server.c b/src/vnet/tcp/builtin_server.c
index 80a9556..295c189 100644
--- a/src/vnet/tcp/builtin_server.c
+++ b/src/vnet/tcp/builtin_server.c
@@ -267,7 +267,7 @@
   shmem_hdr = am->shmem_hdr;
   bsm->vl_input_queue = shmem_hdr->vl_input_queue;
   bsm->my_client_index =
-    vl_api_memclnt_create_internal ("http_test_server", bsm->vl_input_queue);
+    vl_api_memclnt_create_internal ("tcp_test_server", bsm->vl_input_queue);
   return 0;
 }
 
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index e6a211b..7da0c07 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -1096,7 +1096,8 @@
   tcp_main_t *tm = vnet_get_tcp_main ();
   vlib_main_t *vm = vlib_get_main ();
   int n_bytes = 0;
-  u32 start, bi, available_bytes;
+  u32 start, bi, available_bytes, seg_size;
+  u8 *data;
 
   ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
   ASSERT (max_deq_bytes != 0);
@@ -1104,14 +1105,15 @@
   /*
    * Make sure we can retransmit something
    */
-  max_deq_bytes = clib_min (tc->snd_mss, max_deq_bytes);
   available_bytes = stream_session_tx_fifo_max_dequeue (&tc->connection);
   if (!available_bytes)
     return 0;
+  max_deq_bytes = clib_min (tc->snd_mss, max_deq_bytes);
   max_deq_bytes = clib_min (available_bytes, max_deq_bytes);
-  start = tc->snd_una + offset;
+  seg_size = max_deq_bytes + MAX_HDRS_LEN;
 
   /* Start is beyond snd_congestion */
+  start = tc->snd_una + offset;
   if (seq_geq (start, tc->snd_congestion))
     {
       goto done;
@@ -1139,13 +1141,13 @@
   if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi)))
     return 0;
   *b = vlib_get_buffer (vm, bi);
+  data = vlib_buffer_make_headroom (*b, MAX_HDRS_LEN);
 
   /* Easy case, buffer size greater than mss */
-  if (PREDICT_TRUE (max_deq_bytes <= tm->bytes_per_buffer))
+  if (PREDICT_TRUE (seg_size <= tm->bytes_per_buffer))
     {
-      n_bytes = stream_session_peek_bytes (&tc->connection,
-					   vlib_buffer_get_current (*b),
-					   offset, max_deq_bytes);
+      n_bytes = stream_session_peek_bytes (&tc->connection, data, offset,
+					   max_deq_bytes);
       ASSERT (n_bytes == max_deq_bytes);
       b[0]->current_length = n_bytes;
       tcp_push_hdr_i (tc, *b, tc->state, 0);
@@ -1157,10 +1159,9 @@
       u32 thread_index = vlib_get_thread_index ();
       u16 n_peeked, len_to_deq, available_bufs;
       vlib_buffer_t *chain_b, *prev_b;
-      u8 *data0;
       int i;
 
-      n_bufs_per_seg = ceil ((double) max_deq_bytes / tm->bytes_per_buffer);
+      n_bufs_per_seg = ceil ((double) seg_size / tm->bytes_per_buffer);
       ASSERT (available_bytes >= max_deq_bytes);
 
       /* Make sure we have enough buffers */
@@ -1175,9 +1176,9 @@
 	    }
 	}
 
-      n_bytes = stream_session_peek_bytes (&tc->connection,
-					   vlib_buffer_get_current (*b),
-					   offset, tm->bytes_per_buffer);
+      n_bytes = stream_session_peek_bytes (&tc->connection, data, offset,
+					   tm->bytes_per_buffer -
+					   MAX_HDRS_LEN);
       b[0]->current_length = n_bytes;
       b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
       b[0]->total_length_not_including_first_buffer = 0;
@@ -1194,8 +1195,8 @@
 	  ASSERT (chain_bi != (u32) ~ 0);
 	  chain_b = vlib_get_buffer (vm, chain_bi);
 	  chain_b->current_data = 0;
-	  data0 = vlib_buffer_get_current (chain_b);
-	  n_peeked = stream_session_peek_bytes (&tc->connection, data0,
+	  data = vlib_buffer_get_current (chain_b);
+	  n_peeked = stream_session_peek_bytes (&tc->connection, data,
 						n_bytes, len_to_deq);
 	  n_bytes += n_peeked;
 	  ASSERT (n_peeked == len_to_deq);
@@ -1215,6 +1216,8 @@
     }
 
   ASSERT (n_bytes > 0);
+  ASSERT (((*b)->current_data + (*b)->current_length) <=
+	  tm->bytes_per_buffer);
 
   if (tcp_in_fastrecovery (tc))
     tc->snd_rxt_bytes += n_bytes;