ICMP46 error: Clone first buffer instead of "truncating" original buffer

Previous code was walked buffer chain, effectively trying to "truncate" the chain, reset the
length of first buffer and reused that as the ICMP error message. That could have issues in cases
there were other users of the buffer chain. Update to clone the first buffer in chain, and
use that for the ICMP error message instead.

Change-Id: Ibc1a0bf2d854dae41874808c8297028ed93dd69d
Signed-off-by: Ole Troan <ot@cisco.com>
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index 3e0234b..483a990 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -976,6 +976,27 @@
   return fd;
 }
 
+/* duplicate first buffer in chain */
+always_inline vlib_buffer_t *
+vlib_buffer_copy_no_chain (vlib_main_t * vm, vlib_buffer_t * b, u32 * di)
+{
+  vlib_buffer_t *d;
+
+  if ((vlib_buffer_alloc (vm, di, 1)) != 1)
+    return 0;
+
+  d = vlib_get_buffer (vm, *di);
+  /* 1st segment */
+  d->current_data = b->current_data;
+  d->current_length = b->current_length;
+  clib_memcpy_fast (d->opaque, b->opaque, sizeof (b->opaque));
+  clib_memcpy_fast (d->opaque2, b->opaque2, sizeof (b->opaque2));
+  clib_memcpy_fast (vlib_buffer_get_current (d),
+		    vlib_buffer_get_current (b), b->current_length);
+
+  return d;
+}
+
 /** \brief Create a maximum of 256 clones of buffer and store them
     in the supplied array
 
diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c
index a598ca9..ce81dc0 100644
--- a/src/vnet/ip/icmp4.c
+++ b/src/vnet/ip/icmp4.c
@@ -476,15 +476,29 @@
 
       while (n_left_from > 0 && n_left_to_next > 0)
 	{
-	  u32 pi0 = from[0];
+	  /*
+	   * Duplicate first buffer and free the original chain.  Keep
+	   * as much of the original packet as possible, within the
+	   * minimum MTU. We chat "a little" here by keeping whatever
+	   * is available in the first buffer.
+	   */
+
+	  u32 pi0 = ~0;
+	  u32 org_pi0 = from[0];
 	  u32 next0 = IP4_ICMP_ERROR_NEXT_LOOKUP;
 	  u8 error0 = ICMP4_ERROR_NONE;
-	  vlib_buffer_t *p0;
+	  vlib_buffer_t *p0, *org_p0;
 	  ip4_header_t *ip0, *out_ip0;
 	  icmp46_header_t *icmp0;
 	  u32 sw_if_index0, if_add_index0;
 	  ip_csum_t sum;
 
+	  org_p0 = vlib_get_buffer (vm, org_pi0);
+	  p0 = vlib_buffer_copy_no_chain (vm, org_p0, &pi0);
+	  vlib_buffer_free_one (vm, org_pi0);
+	  if (!p0 || pi0 == ~0)	/* Out of buffers */
+	    continue;
+
 	  /* Speculatively enqueue p0 to the current next frame */
 	  to_next[0] = pi0;
 	  from += 1;
@@ -492,28 +506,9 @@
 	  n_left_from -= 1;
 	  n_left_to_next -= 1;
 
-	  p0 = vlib_get_buffer (vm, pi0);
 	  ip0 = vlib_buffer_get_current (p0);
 	  sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
 
-	  /*
-	   * RFC1812 says to keep as much of the original packet as
-	   * possible within the minimum MTU (576). We cheat "a little"
-	   * here by keeping whatever fits in the first buffer, to be more
-	   * efficient
-	   */
-	  if (PREDICT_FALSE (p0->total_length_not_including_first_buffer))
-	    {
-	      /* clear current_length of all other buffers in chain */
-	      vlib_buffer_t *b = p0;
-	      p0->total_length_not_including_first_buffer = 0;
-	      while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
-		{
-		  b = vlib_get_buffer (vm, b->next_buffer);
-		  b->current_length = 0;
-		}
-	    }
-
 	  /* Add IP header and ICMPv4 header including a 4 byte data field */
 	  vlib_buffer_advance (p0,
 			       -sizeof (ip4_header_t) -
@@ -521,7 +516,6 @@
 
 	  p0->current_length =
 	    p0->current_length > 576 ? 576 : p0->current_length;
-
 	  out_ip0 = vlib_buffer_get_current (p0);
 	  icmp0 = (icmp46_header_t *) & out_ip0[1];
 
@@ -570,6 +564,7 @@
 	  /* Update error status */
 	  if (error0 == ICMP4_ERROR_NONE)
 	    error0 = icmp4_icmp_type_to_error (icmp0->type);
+
 	  vlib_error_count (vm, node->node_index, error0, 1);
 
 	  /* Verify speculative enqueue, maybe switch current next frame */
diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c
index 37deb76..ff83a0c 100644
--- a/src/vnet/ip/icmp6.c
+++ b/src/vnet/ip/icmp6.c
@@ -493,15 +493,29 @@
 
       while (n_left_from > 0 && n_left_to_next > 0)
 	{
-	  u32 pi0 = from[0];
+	  /*
+	   * Duplicate first buffer and free the original chain.  Keep
+	   * as much of the original packet as possible, within the
+	   * minimum MTU. We chat "a little" here by keeping whatever
+	   * is available in the first buffer.
+	   */
+
+	  u32 pi0 = ~0;
+	  u32 org_pi0 = from[0];
 	  u32 next0 = IP6_ICMP_ERROR_NEXT_LOOKUP;
 	  u8 error0 = ICMP6_ERROR_NONE;
-	  vlib_buffer_t *p0;
+	  vlib_buffer_t *p0, *org_p0;
 	  ip6_header_t *ip0, *out_ip0;
 	  icmp46_header_t *icmp0;
 	  u32 sw_if_index0, if_add_index0;
 	  int bogus_length;
 
+	  org_p0 = vlib_get_buffer (vm, org_pi0);
+	  p0 = vlib_buffer_copy_no_chain (vm, org_p0, &pi0);
+	  vlib_buffer_free_one (vm, org_pi0);
+	  if (!p0 || pi0 == ~0)	/* Out of buffers */
+	    continue;
+
 	  /* Speculatively enqueue p0 to the current next frame */
 	  to_next[0] = pi0;
 	  from += 1;
@@ -509,37 +523,14 @@
 	  n_left_from -= 1;
 	  n_left_to_next -= 1;
 
-	  p0 = vlib_get_buffer (vm, pi0);
 	  ip0 = vlib_buffer_get_current (p0);
 	  sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
 
-	  /* RFC4443 says to keep as much of the original packet as possible
-	   * within the minimum MTU. We cheat "a little" here by keeping whatever fits
-	   * in the first buffer, to be more efficient */
-	  if (PREDICT_FALSE (p0->total_length_not_including_first_buffer))
-	    {			/* clear current_length of all other buffers in chain */
-	      vlib_buffer_t *b = p0;
-	      p0->total_length_not_including_first_buffer = 0;
-	      while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
-		{
-		  b = vlib_get_buffer (vm, b->next_buffer);
-		  b->current_length = 0;
-		  // XXX: Buffer leak???
-		}
-	    }
-
 	  /* Add IP header and ICMPv6 header including a 4 byte data field */
-	  int headroom = sizeof (ip6_header_t) + sizeof (icmp46_header_t) + 4;
+	  vlib_buffer_advance (p0,
+			       -(sizeof (ip6_header_t) +
+				 sizeof (icmp46_header_t) + 4));
 
-	  /* Verify that we're not falling off the edge */
-	  if (p0->current_data - headroom < -VLIB_BUFFER_PRE_DATA_SIZE)
-	    {
-	      next0 = IP6_ICMP_ERROR_NEXT_DROP;
-	      error0 = ICMP6_ERROR_DROP;
-	      goto error;
-	    }
-
-	  vlib_buffer_advance (p0, -headroom);
 	  vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0;
 	  p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
 	  p0->current_length =
@@ -571,7 +562,6 @@
 	    {
 	      next0 = IP6_ICMP_ERROR_NEXT_DROP;
 	      error0 = ICMP6_ERROR_DROP;
-	      goto error;
 	    }
 
 	  /* Fill icmp header fields */
@@ -588,7 +578,6 @@
 	  if (error0 == ICMP6_ERROR_NONE)
 	    error0 = icmp6_icmp_type_to_error (icmp0->type);
 
-	error:
 	  vlib_error_count (vm, node->node_index, error0, 1);
 
 	  /* Verify speculative enqueue, maybe switch current next frame */