ip: Fixes for IPv6 and MPLS fragmentation

Type: fix

- IPv6 fragmentation did not work if the packet spaneed multiple buffers, because the 'len' calculation to did max out at the size of a buffer
- IPv6 fragmentation did not work when the l2unfragmentable size was non-zero, it was not used in the correct places
- IPv6oMPLS fragmentation would fragment all IPv6, it should do so only for link local
- IPv6oMPLS should send back TooBig ICMP6 for non locally generated

Signed-off-by: Neale Ranns <neale@graphiant.com>
Change-Id: Ie8f02cdfdd7b7e8474e62b6d0acda8f20c371184
diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c
index b9bc90d..9c223c3 100644
--- a/src/vnet/ip/ip_frag.c
+++ b/src/vnet/ip/ip_frag.c
@@ -25,10 +25,10 @@
 
 typedef struct
 {
-  u8 ipv6;
   u16 mtu;
   u8 next;
   u16 n_fragments;
+  u16 pkt_size;
 } ip_frag_trace_t;
 
 static u8 *
@@ -37,8 +37,8 @@
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *);
-  s = format (s, "IPv%s mtu: %u fragments: %u next: %d",
-	      t->ipv6 ? "6" : "4", t->mtu, t->n_fragments, t->next);
+  s = format (s, "mtu: %u pkt-size: %u fragments: %u next: %d", t->mtu,
+	      t->pkt_size, t->n_fragments, t->next);
   return s;
 }
 
@@ -286,7 +286,7 @@
 	      ip_frag_trace_t *tr =
 		vlib_add_trace (vm, node, p0, sizeof (*tr));
 	      tr->mtu = mtu;
-	      tr->ipv6 = is_ip6 ? 1 : 0;
+	      tr->pkt_size = vlib_buffer_length_in_chain (vm, p0);
 	      tr->n_fragments = vec_len (buffer);
 	      tr->next = vnet_buffer (p0)->ip_frag.next_index;
 	    }
@@ -385,13 +385,17 @@
   ip6_header_t *ip6;
   u16 len, max, rem, ip_frag_id;
   u8 *org_from_packet;
+  u16 head_bytes;
 
   from_b = vlib_get_buffer (vm, from_bi);
   org_from_packet = vlib_buffer_get_current (from_b);
   ip6 = vlib_buffer_get_current (from_b) + l2unfragmentablesize;
 
+  head_bytes =
+    (sizeof (ip6_header_t) + sizeof (ip6_frag_hdr_t) + l2unfragmentablesize);
   rem = clib_net_to_host_u16 (ip6->payload_length);
-  max = (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) & ~0x7;	// TODO: Is max correct??
+  max = (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) - head_bytes) &
+	~0x7;
 
   if (rem >
       (vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip6_header_t)))
@@ -423,9 +427,7 @@
       ip6_frag_hdr_t *to_frag_hdr;
       u8 *to_data;
 
-      len =
-	(rem >
-	 (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) ? max : rem);
+      len = (rem > max ? max : rem);
       if (len != rem)		/* Last fragment does not need to divisible by 8 */
 	len &= ~0x7;
       if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
@@ -438,7 +440,7 @@
       /* Copy ip6 header */
       clib_memcpy_fast (to_b->data, org_from_packet,
 			l2unfragmentablesize + sizeof (ip6_header_t));
-      to_ip6 = vlib_buffer_get_current (to_b);
+      to_ip6 = vlib_buffer_get_current (to_b) + l2unfragmentablesize;
       to_frag_hdr = (ip6_frag_hdr_t *) (to_ip6 + 1);
       to_data = (void *) (to_frag_hdr + 1);
 
@@ -484,8 +486,7 @@
 	  to_ptr += bytes_to_copy;
 	}
 
-      to_b->current_length =
-	len + sizeof (ip6_header_t) + sizeof (ip6_frag_hdr_t);
+      to_b->current_length = len + head_bytes;
       to_ip6->payload_length =
 	clib_host_to_net_u16 (len + sizeof (ip6_frag_hdr_t));
       to_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
index 3699163..faeba74 100644
--- a/src/vnet/mpls/mpls_output.c
+++ b/src/vnet/mpls/mpls_output.c
@@ -377,11 +377,12 @@
 
 typedef enum
 {
-    MPLS_FRAG_NEXT_REWRITE,
-    MPLS_FRAG_NEXT_REWRITE_MIDCHAIN,
-    MPLS_FRAG_NEXT_ICMP_ERROR,
-    MPLS_FRAG_NEXT_DROP,
-    MPLS_FRAG_N_NEXT,
+  MPLS_FRAG_NEXT_REWRITE,
+  MPLS_FRAG_NEXT_REWRITE_MIDCHAIN,
+  MPLS_FRAG_NEXT_ICMP4_ERROR,
+  MPLS_FRAG_NEXT_ICMP6_ERROR,
+  MPLS_FRAG_NEXT_DROP,
+  MPLS_FRAG_N_NEXT,
 } mpls_frag_next_t;
 
 static uword
@@ -390,9 +391,7 @@
            vlib_frame_t * frame)
 {
     u32 n_left_from, next_index, * from, * to_next, n_left_to_next, *frags;
-    vlib_node_runtime_t * error_node;
 
-    error_node = vlib_node_get_runtime (vm, mpls_output_node.index);
     from = vlib_frame_vector_args (frame);
     n_left_from = frame->n_vectors;
     next_index = node->cached_next_index;
@@ -410,8 +409,7 @@
             mpls_frag_next_t next0;
             u32 pi0, adj_index0;
             ip_frag_error_t error0 = IP_FRAG_ERROR_NONE;
-            i16 encap_size;
-	    u16 mtu;
+	    i16 encap_size, mtu;
 	    u8 is_ip4;
 
 	    pi0 = to_next[0] = from[0];
@@ -422,6 +420,7 @@
 
 	    adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
 	    adj0 = adj_get (adj_index0);
+
 	    /* the size of the MPLS stack */
 	    encap_size = vnet_buffer (p0)->l3_hdr_offset - p0->current_data;
 	    mtu = adj0->rewrite_header.max_l3_packet_bytes - encap_size;
@@ -430,7 +429,18 @@
 	    if (is_ip4)
 	      error0 = ip4_frag_do_fragment (vm, pi0, mtu, encap_size, &frags);
 	    else
-	      error0 = ip6_frag_do_fragment (vm, pi0, mtu, encap_size, &frags);
+	      {
+		if (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
+		  {
+		    /* only fragment locally generated IPv6 */
+		    error0 = IP_FRAG_ERROR_DONT_FRAGMENT_SET;
+		  }
+		else
+		  {
+		    error0 =
+		      ip6_frag_do_fragment (vm, pi0, mtu, encap_size, &frags);
+		  }
+	      }
 
 	    if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
 	      {
@@ -443,24 +453,41 @@
 	    if (PREDICT_TRUE (error0 == IP_FRAG_ERROR_NONE))
 	      {
 		/* Free original buffer chain */
-		vlib_buffer_free_one (vm, pi0); /* Free original packet */
+		vlib_buffer_free_one (vm, pi0);
 		next0 = (IP_LOOKUP_NEXT_MIDCHAIN == adj0->lookup_next_index ?
 			   MPLS_FRAG_NEXT_REWRITE_MIDCHAIN :
 			   MPLS_FRAG_NEXT_REWRITE);
 	      }
-	    else if (is_ip4 && error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
-	      {
-		icmp4_error_set_vnet_buffer (
-		  p0, ICMP4_destination_unreachable,
-		  ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
-		  vnet_buffer (p0)->ip_frag.mtu);
-		next0 = MPLS_FRAG_NEXT_ICMP_ERROR;
-	      }
 	    else
 	      {
-		vlib_error_count (vm, mpls_output_node.index, error0, 1);
-		vec_add1 (frags, pi0); /* Get rid of the original buffer */
-		next0 = MPLS_FRAG_NEXT_DROP;
+		vlib_error_count (vm, node->node_index, error0, 1);
+
+		if (error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
+		  {
+		    vlib_buffer_advance (p0, encap_size);
+		    if (is_ip4)
+		      {
+			icmp4_error_set_vnet_buffer (
+			  p0, ICMP4_destination_unreachable,
+			  ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
+			  vnet_buffer (p0)->ip_frag.mtu);
+			next0 = MPLS_FRAG_NEXT_ICMP4_ERROR;
+		      }
+		    else
+		      {
+			icmp6_error_set_vnet_buffer (
+			  p0, ICMP6_packet_too_big, 0,
+			  adj0->rewrite_header.max_l3_packet_bytes);
+			next0 = MPLS_FRAG_NEXT_ICMP6_ERROR;
+		      }
+		  }
+		else
+		  {
+		    next0 = MPLS_FRAG_NEXT_DROP;
+		  }
+
+		/* Get rid of the original buffer */
+		vec_add1 (frags, pi0);
 	      }
 
 	    /* Send fragments that were added in the frame */
@@ -480,9 +507,6 @@
 		    to_next += 1;
 		    n_left_to_next -= 1;
 
-		    p0 = vlib_get_buffer (vm, i);
-		    p0->error = error_node->errors[error0];
-
 		    vlib_validate_buffer_enqueue_x1 (
 		      vm, node, next_index, to_next, n_left_to_next, i, next0);
 		  }
@@ -511,22 +535,21 @@
 }
 
 VLIB_REGISTER_NODE (mpls_frag_node) = {
-    .function = mpls_frag,
-    .name = "mpls-frag",
-    .vector_size = sizeof (u32),
-    .format_trace = format_mpls_frag_trace,
-    .type = VLIB_NODE_TYPE_INTERNAL,
+  .function = mpls_frag,
+  .name = "mpls-frag",
+  .vector_size = sizeof (u32),
+  .format_trace = format_mpls_frag_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
 
-    .n_errors = IP_FRAG_N_ERROR,
-    .error_strings = mpls_frag_error_strings,
+  .n_errors = IP_FRAG_N_ERROR,
+  .error_strings = mpls_frag_error_strings,
 
-    .n_next_nodes = MPLS_FRAG_N_NEXT,
-    .next_nodes = {
-        [MPLS_FRAG_NEXT_REWRITE] = "mpls-output",
-        [MPLS_FRAG_NEXT_REWRITE_MIDCHAIN] = "mpls-midchain",
-        [MPLS_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
-        [MPLS_FRAG_NEXT_DROP] = "mpls-drop"
-    },
+  .n_next_nodes = MPLS_FRAG_N_NEXT,
+  .next_nodes = { [MPLS_FRAG_NEXT_REWRITE] = "mpls-output",
+		  [MPLS_FRAG_NEXT_REWRITE_MIDCHAIN] = "mpls-midchain",
+		  [MPLS_FRAG_NEXT_ICMP4_ERROR] = "ip4-icmp-error",
+		  [MPLS_FRAG_NEXT_ICMP6_ERROR] = "ip6-icmp-error",
+		  [MPLS_FRAG_NEXT_DROP] = "mpls-drop" },
 };
 
 /*