IPIP and IPv6 fragmentation
- Error where ICMPv6 error code doesn't reset VLIB_TX = -1
Leading to crash for ICMP generated on tunnelled packets
- Missed setting VNET_BUFFER_F_LOCALLY_ORIGINATED, so
IP in IPv6 packets never got fragmented.
- Add support for fragmentation of buffer chains.
- Remove support for inner fragmentation in frag code itself.
Change-Id: If9a97301b7e35ca97ffa5c0fada2b9e7e7dbfb27
Signed-off-by: Ole Troan <ot@cisco.com>
diff --git a/src/plugins/map/ip4_map.c b/src/plugins/map/ip4_map.c
index 715cdef..44392e8 100644
--- a/src/plugins/map/ip4_map.c
+++ b/src/plugins/map/ip4_map.c
@@ -167,7 +167,8 @@
if (mm->frag_inner)
{
- ip_frag_set_vnet_buffer (b, sizeof (ip6_header_t), mtu,
+ // TODO: Fix inner fragmentation after removed inner support from ip-frag.
+ ip_frag_set_vnet_buffer (b, /*sizeof (ip6_header_t), */ mtu,
IP4_FRAG_NEXT_IP6_LOOKUP,
IP_FRAG_FLAG_IP6_HEADER);
return (IP4_MAP_NEXT_IP4_FRAGMENT);
@@ -183,7 +184,7 @@
*error = MAP_ERROR_DF_SET;
return (IP4_MAP_NEXT_ICMP_ERROR);
}
- ip_frag_set_vnet_buffer (b, 0, mtu, IP6_FRAG_NEXT_IP6_LOOKUP,
+ ip_frag_set_vnet_buffer (b, mtu, IP6_FRAG_NEXT_IP6_LOOKUP,
IP_FRAG_FLAG_IP6_HEADER);
return (IP4_MAP_NEXT_IP6_FRAGMENT);
}
@@ -621,7 +622,7 @@
&& (clib_net_to_host_u16 (ip60->payload_length) +
sizeof (*ip60) > d0->mtu)))
{
- vnet_buffer (p0)->ip_frag.header_offset = sizeof (*ip60);
+ // TODO: vnet_buffer (p0)->ip_frag.header_offset = sizeof (*ip60);
vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP;
vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
vnet_buffer (p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER;
diff --git a/src/plugins/map/ip4_map_t.c b/src/plugins/map/ip4_map_t.c
index 0a9903a..c6b0912 100644
--- a/src/plugins/map/ip4_map_t.c
+++ b/src/plugins/map/ip4_map_t.c
@@ -237,7 +237,6 @@
if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
{
- vnet_buffer (p0)->ip_frag.header_offset = 0;
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP;
next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG;
@@ -322,7 +321,6 @@
{
if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
{
- vnet_buffer (p0)->ip_frag.header_offset = 0;
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
vnet_buffer (p0)->ip_frag.next_index =
IP6_FRAG_NEXT_IP6_LOOKUP;
@@ -391,7 +389,6 @@
if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
{
//Send to fragmentation node if necessary
- vnet_buffer (p0)->ip_frag.header_offset = 0;
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
vnet_buffer (p0)->ip_frag.next_index =
IP6_FRAG_NEXT_IP6_LOOKUP;
@@ -409,7 +406,6 @@
if (vnet_buffer (p1)->map_t.mtu < p1->current_length)
{
//Send to fragmentation node if necessary
- vnet_buffer (p1)->ip_frag.header_offset = 0;
vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu;
vnet_buffer (p1)->ip_frag.next_index =
IP6_FRAG_NEXT_IP6_LOOKUP;
@@ -453,7 +449,6 @@
if (vnet_buffer (p0)->map_t.mtu < p0->current_length)
{
//Send to fragmentation node if necessary
- vnet_buffer (p0)->ip_frag.header_offset = 0;
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
vnet_buffer (p0)->ip_frag.next_index =
IP6_FRAG_NEXT_IP6_LOOKUP;
diff --git a/src/plugins/map/ip6_map.c b/src/plugins/map/ip6_map.c
index 01b2d87..5b80209 100644
--- a/src/plugins/map/ip6_map.c
+++ b/src/plugins/map/ip6_map.c
@@ -308,7 +308,6 @@
(d0->mtu
&& (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
{
- vnet_buffer (p0)->ip_frag.header_offset = 0;
vnet_buffer (p0)->ip_frag.flags = 0;
vnet_buffer (p0)->ip_frag.next_index =
IP4_FRAG_NEXT_IP4_LOOKUP;
@@ -341,7 +340,6 @@
(d1->mtu
&& (clib_host_to_net_u16 (ip41->length) > d1->mtu)))
{
- vnet_buffer (p1)->ip_frag.header_offset = 0;
vnet_buffer (p1)->ip_frag.flags = 0;
vnet_buffer (p1)->ip_frag.next_index =
IP4_FRAG_NEXT_IP4_LOOKUP;
@@ -495,7 +493,6 @@
(d0->mtu
&& (clib_host_to_net_u16 (ip40->length) > d0->mtu)))
{
- vnet_buffer (p0)->ip_frag.header_offset = 0;
vnet_buffer (p0)->ip_frag.flags = 0;
vnet_buffer (p0)->ip_frag.next_index =
IP4_FRAG_NEXT_IP4_LOOKUP;
@@ -935,7 +932,6 @@
(d0->mtu && (clib_host_to_net_u16 (ip40->length) > d0->mtu)
&& error0 == MAP_ERROR_NONE && !cached))
{
- vnet_buffer (p0)->ip_frag.header_offset = 0;
vnet_buffer (p0)->ip_frag.flags = 0;
vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
vnet_buffer (p0)->ip_frag.mtu = d0->mtu;
diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c
index 01ed810..aa4fa4d 100644
--- a/src/plugins/map/ip6_map_t.c
+++ b/src/plugins/map/ip6_map_t.c
@@ -264,7 +264,6 @@
{
//Send to fragmentation node if necessary
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
- vnet_buffer (p0)->ip_frag.header_offset = 0;
vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP;
next0 = IP6_MAPT_ICMP_NEXT_IP4_FRAG;
}
@@ -348,7 +347,6 @@
{
//Send to fragmentation node if necessary
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
- vnet_buffer (p0)->ip_frag.header_offset = 0;
vnet_buffer (p0)->ip_frag.next_index =
IP4_FRAG_NEXT_IP4_LOOKUP;
next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
@@ -366,7 +364,6 @@
{
//Send to fragmentation node if necessary
vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu;
- vnet_buffer (p1)->ip_frag.header_offset = 0;
vnet_buffer (p1)->ip_frag.next_index =
IP4_FRAG_NEXT_IP4_LOOKUP;
next1 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
@@ -405,7 +402,6 @@
{
//Send to fragmentation node if necessary
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
- vnet_buffer (p0)->ip_frag.header_offset = 0;
vnet_buffer (p0)->ip_frag.next_index =
IP4_FRAG_NEXT_IP4_LOOKUP;
next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG;
@@ -466,7 +462,6 @@
{
//Send to fragmentation node if necessary
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
- vnet_buffer (p0)->ip_frag.header_offset = 0;
vnet_buffer (p0)->ip_frag.next_index =
IP4_FRAG_NEXT_IP4_LOOKUP;
next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
@@ -484,7 +479,6 @@
{
//Send to fragmentation node if necessary
vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu;
- vnet_buffer (p1)->ip_frag.header_offset = 0;
vnet_buffer (p1)->ip_frag.next_index =
IP4_FRAG_NEXT_IP4_LOOKUP;
next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
@@ -523,7 +517,6 @@
{
//Send to fragmentation node if necessary
vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu;
- vnet_buffer (p0)->ip_frag.header_offset = 0;
vnet_buffer (p0)->ip_frag.next_index =
IP4_FRAG_NEXT_IP4_LOOKUP;
next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG;
diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h
index 2a6feb5..8071081 100644
--- a/src/vnet/buffer.h
+++ b/src/vnet/buffer.h
@@ -277,7 +277,6 @@
struct
{
u32 pad[2]; /* do not overlay w/ ip.adj_index[0,1] */
- u16 header_offset;
u16 mtu;
u8 next_index;
u8 flags; //See ip_frag.h
diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c
index 6ebdef4..4252ac7 100644
--- a/src/vnet/ip/icmp6.c
+++ b/src/vnet/ip/icmp6.c
@@ -524,14 +524,24 @@
{
b = vlib_get_buffer (vm, b->next_buffer);
b->current_length = 0;
+ // XXX: Buffer leak???
}
}
/* Add IP header and ICMPv6 header including a 4 byte data field */
- vlib_buffer_advance (p0,
- -sizeof (ip6_header_t) -
- sizeof (icmp46_header_t) - 4);
+ int headroom = sizeof (ip6_header_t) + sizeof (icmp46_header_t) + 4;
+ /* Verify that we're not falling off the edge */
+ if (p0->current_data - headroom < -VLIB_BUFFER_PRE_DATA_SIZE)
+ {
+ next0 = IP6_ICMP_ERROR_NEXT_DROP;
+ error0 = ICMP6_ERROR_DROP;
+ goto error;
+ }
+
+ vlib_buffer_advance (p0, -headroom);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0;
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
p0->current_length =
p0->current_length > 1280 ? 1280 : p0->current_length;
@@ -561,6 +571,7 @@
{
next0 = IP6_ICMP_ERROR_NEXT_DROP;
error0 = ICMP6_ERROR_DROP;
+ goto error;
}
/* Fill icmp header fields */
@@ -573,11 +584,11 @@
ip6_tcp_udp_icmp_compute_checksum (vm, p0, out_ip0,
&bogus_length);
-
-
/* Update error status */
if (error0 == ICMP6_ERROR_NONE)
error0 = icmp6_icmp_type_to_error (icmp0->type);
+
+ error:
vlib_error_count (vm, node->node_index, error0, 1);
/* Verify speculative enqueue, maybe switch current next frame */
@@ -602,7 +613,7 @@
.n_next_nodes = IP6_ICMP_ERROR_N_NEXT,
.next_nodes = {
- [IP6_ICMP_ERROR_NEXT_DROP] = "ip6-drop",
+ [IP6_ICMP_ERROR_NEXT_DROP] = "error-drop",
[IP6_ICMP_ERROR_NEXT_LOOKUP] = "ip6-lookup",
},
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index d5cf011..d5e2cd5 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -2097,7 +2097,7 @@
else
{
/* IP fragmentation */
- ip_frag_set_vnet_buffer (b, 0, adj_packet_bytes,
+ ip_frag_set_vnet_buffer (b, adj_packet_bytes,
IP4_FRAG_NEXT_IP4_REWRITE, 0);
*next = IP4_REWRITE_NEXT_FRAGMENT;
}
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index 5abbba5..e05792f 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -1566,9 +1566,10 @@
if (is_locally_generated)
{
/* IP fragmentation */
- ip_frag_set_vnet_buffer (b, 0, adj_packet_bytes,
+ ip_frag_set_vnet_buffer (b, adj_packet_bytes,
IP6_FRAG_NEXT_IP6_REWRITE, 0);
*next = IP6_REWRITE_NEXT_FRAGMENT;
+ *error = IP6_ERROR_MTU_EXCEEDED;
}
else
{
diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c
index eb9bb4a..628d9d6 100644
--- a/src/vnet/ip/ip_frag.c
+++ b/src/vnet/ip/ip_frag.c
@@ -27,7 +27,6 @@
typedef struct
{
u8 ipv6;
- u16 header_offset;
u16 mtu;
u8 next;
u16 n_fragments;
@@ -39,13 +38,48 @@
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *);
- s = format (s, "IPv%s offset: %u mtu: %u fragments: %u",
- t->ipv6 ? "6" : "4", t->header_offset, t->mtu, t->n_fragments);
+ s = format (s, "IPv%s mtu: %u fragments: %u",
+ t->ipv6 ? "6" : "4", t->mtu, t->n_fragments);
return s;
}
static u32 running_fragment_id;
+static void
+frag_set_sw_if_index (vlib_buffer_t * to, vlib_buffer_t * from)
+{
+ vnet_buffer (to)->sw_if_index[VLIB_RX] =
+ vnet_buffer (from)->sw_if_index[VLIB_RX];
+ vnet_buffer (to)->sw_if_index[VLIB_TX] =
+ vnet_buffer (from)->sw_if_index[VLIB_TX];
+
+ /* Copy adj_index in case DPO based node is sending for the
+ * fragmentation, the packet would be sent back to the proper
+ * DPO next node and Index
+ */
+ vnet_buffer (to)->ip.adj_index[VLIB_RX] =
+ vnet_buffer (from)->ip.adj_index[VLIB_RX];
+ vnet_buffer (to)->ip.adj_index[VLIB_TX] =
+ vnet_buffer (from)->ip.adj_index[VLIB_TX];
+}
+
+static vlib_buffer_t *
+frag_buffer_alloc (vlib_buffer_t * org_b, u32 * bi)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ if (vlib_buffer_alloc (vm, bi, 1) != 1)
+ return 0;
+
+ vlib_buffer_t *b = vlib_get_buffer (vm, *bi);
+ vlib_buffer_free_list_t *fl =
+ vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ vlib_buffer_init_for_free_list (b, fl);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
+ vlib_buffer_copy_trace_flag (vm, org_b, *bi);
+
+ return b;
+}
+
/*
* Limitation: Does follow buffer chains in the packet to fragment,
* but does not generate buffer chains. I.e. a fragment is always
@@ -58,23 +92,19 @@
{
vlib_buffer_t *from_b;
ip4_header_t *ip4;
- u16 mtu, len, max, rem, offset, ip_frag_id, ip_frag_offset;
+ u16 mtu, len, max, rem, ip_frag_id, ip_frag_offset;
u8 *org_from_packet, more;
from_b = vlib_get_buffer (vm, from_bi);
- offset = vnet_buffer (from_b)->ip_frag.header_offset;
mtu = vnet_buffer (from_b)->ip_frag.mtu;
org_from_packet = vlib_buffer_get_current (from_b);
- ip4 = (ip4_header_t *) vlib_buffer_get_current (from_b) + offset;
+ ip4 = (ip4_header_t *) vlib_buffer_get_current (from_b);
rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t);
- max =
- (mtu - sizeof (ip4_header_t) -
- vnet_buffer (from_b)->ip_frag.header_offset) & ~0x7;
+ max = (mtu - sizeof (ip4_header_t)) & ~0x7;
if (rem >
- (vlib_buffer_length_in_chain (vm, from_b) - offset -
- sizeof (ip4_header_t)))
+ (vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip4_header_t)))
{
*error = IP_FRAG_ERROR_MALFORMED;
return;
@@ -110,9 +140,9 @@
u8 *from_data = (void *) (ip4 + 1);
vlib_buffer_t *org_from_b = from_b;
- u16 ptr = 0, fo = 0;
- u16 left_in_from_buffer =
- from_b->current_length - offset - sizeof (ip4_header_t);
+ u16 fo = 0;
+ u16 left_in_from_buffer = from_b->current_length - sizeof (ip4_header_t);
+ u16 ptr = 0;
/* Do the actual fragmentation */
while (rem)
@@ -122,40 +152,28 @@
ip4_header_t *to_ip4;
u8 *to_data;
- len = (rem > (mtu - sizeof (ip4_header_t) - offset) ? max : rem);
+ len = (rem > (mtu - sizeof (ip4_header_t)) ? max : rem);
if (len != rem) /* Last fragment does not need to divisible by 8 */
len &= ~0x7;
- if (!vlib_buffer_alloc (vm, &to_bi, 1))
+ if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
{
*error = IP_FRAG_ERROR_MEMORY;
return;
}
vec_add1 (*buffer, to_bi);
- to_b = vlib_get_buffer (vm, to_bi);
- vnet_buffer (to_b)->sw_if_index[VLIB_RX] =
- vnet_buffer (org_from_b)->sw_if_index[VLIB_RX];
- vnet_buffer (to_b)->sw_if_index[VLIB_TX] =
- vnet_buffer (org_from_b)->sw_if_index[VLIB_TX];
- /* Copy adj_index in case DPO based node is sending for the
- * fragmentation, the packet would be sent back to the proper
- * DPO next node and Index
- */
- vnet_buffer (to_b)->ip.adj_index[VLIB_RX] =
- vnet_buffer (org_from_b)->ip.adj_index[VLIB_RX];
- vnet_buffer (to_b)->ip.adj_index[VLIB_TX] =
- vnet_buffer (org_from_b)->ip.adj_index[VLIB_TX];
+ frag_set_sw_if_index (to_b, org_from_b);
- /* Copy offset and ip4 header */
- clib_memcpy (to_b->data, org_from_packet,
- offset + sizeof (ip4_header_t));
- to_ip4 = vlib_buffer_get_current (to_b) + offset;
+ /* Copy ip4 header */
+ clib_memcpy (to_b->data, org_from_packet, sizeof (ip4_header_t));
+ to_ip4 = vlib_buffer_get_current (to_b);
to_data = (void *) (to_ip4 + 1);
/* Spin through from buffers filling up the to buffer */
- u16 to_ptr = 0;
- u16 bytes_to_copy, left_in_to_buffer = len;
+ u16 left_in_to_buffer = len, to_ptr = 0;
while (1)
{
+ u16 bytes_to_copy;
+
/* Figure out how many bytes we can safely copy */
bytes_to_copy = left_in_to_buffer <= left_in_from_buffer ?
left_in_to_buffer : left_in_from_buffer;
@@ -166,7 +184,7 @@
if (left_in_to_buffer == 0)
break;
- ASSERT (left_in_from_buffer == 0);
+ ASSERT (left_in_from_buffer <= 0);
/* Move buffer */
if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT))
{
@@ -180,7 +198,7 @@
to_ptr += bytes_to_copy;
}
- to_b->current_length = offset + len + sizeof (ip4_header_t);
+ to_b->current_length = len + sizeof (ip4_header_t);
to_ip4->fragment_id = ip_frag_id;
to_ip4->flags_and_fragment_offset =
@@ -214,21 +232,20 @@
}
void
-ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 offset, u16 mtu,
- u8 next_index, u8 flags)
+ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 mtu, u8 next_index, u8 flags)
{
- vnet_buffer (b)->ip_frag.header_offset = offset;
vnet_buffer (b)->ip_frag.mtu = mtu;
vnet_buffer (b)->ip_frag.next_index = next_index;
vnet_buffer (b)->ip_frag.flags = flags;
}
-static uword
-ip4_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+
+static inline uword
+frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, u32 node_index, bool is_ip6)
{
u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
- vlib_node_runtime_t *error_node =
- vlib_node_get_runtime (vm, ip4_frag_node.index);
+ vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, node_index);
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
@@ -244,58 +261,68 @@
u32 pi0, *frag_from, frag_left;
vlib_buffer_t *p0;
ip_frag_error_t error0;
- ip4_frag_next_t next0;
+ int next0;
- //Note: The packet is not enqueued now.
- //It is instead put in a vector where other fragments
- //will be put as well.
+ /*
+ * Note: The packet is not enqueued now. It is instead put
+ * in a vector where other fragments will be put as well.
+ */
pi0 = from[0];
from += 1;
n_left_from -= 1;
error0 = IP_FRAG_ERROR_NONE;
p0 = vlib_get_buffer (vm, pi0);
- ip4_frag_do_fragment (vm, pi0, &buffer, &error0);
+ if (is_ip6)
+ ip6_frag_do_fragment (vm, pi0, &buffer, &error0);
+ else
+ ip4_frag_do_fragment (vm, pi0, &buffer, &error0);
if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
{
ip_frag_trace_t *tr =
vlib_add_trace (vm, node, p0, sizeof (*tr));
- tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset;
tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
- tr->ipv6 = 0;
+ tr->ipv6 = is_ip6 ? 1 : 0;
tr->n_fragments = vec_len (buffer);
tr->next = vnet_buffer (p0)->ip_frag.next_index;
}
- if (error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
+ if (!is_ip6 && error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
{
icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable,
ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
vnet_buffer (p0)->ip_frag.mtu);
- vlib_buffer_advance (p0,
- vnet_buffer (p0)->ip_frag.header_offset);
next0 = IP4_FRAG_NEXT_ICMP_ERROR;
}
else
{
- /* *INDENT-OFF* */
- next0 = (error0 == IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
- ip_frag.next_index : IP4_FRAG_NEXT_DROP;
- /* *INDENT-ON* */
+ if (is_ip6)
+ next0 =
+ (error0 ==
+ IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
+ ip_frag.next_index : IP6_FRAG_NEXT_DROP;
+ else
+ next0 =
+ (error0 ==
+ IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
+ ip_frag.next_index : IP4_FRAG_NEXT_DROP;
}
if (error0 == IP_FRAG_ERROR_NONE)
{
/* Free original buffer chain */
- vlib_buffer_free_one (vm, pi0);
frag_sent += vec_len (buffer);
small_packets += (vec_len (buffer) == 1);
+ vlib_buffer_free_one (vm, pi0); /* Free original packet */
}
else
- vlib_error_count (vm, ip4_frag_node.index, error0, 1);
+ {
+ vlib_error_count (vm, node_index, error0, 1);
+ vec_add1 (buffer, pi0); /* Get rid of the original buffer */
+ }
- //Send fragments that were added in the frame
+ /* Send fragments that were added in the frame */
frag_from = buffer;
frag_left = vec_len (buffer);
@@ -325,262 +352,143 @@
}
vec_free (buffer);
- vlib_node_increment_counter (vm, ip4_frag_node.index,
+ vlib_node_increment_counter (vm, node_index,
IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
- vlib_node_increment_counter (vm, ip4_frag_node.index,
+ vlib_node_increment_counter (vm, node_index,
IP_FRAG_ERROR_SMALL_PACKET, small_packets);
return frame->n_vectors;
}
-void
-ip6_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
- ip_frag_error_t * error)
+
+static uword
+ip4_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
- vlib_buffer_t *p;
- ip6_header_t *ip6_hdr;
- ip6_frag_hdr_t *frag_hdr;
- u8 *payload, *next_header;
-
- p = vlib_get_buffer (vm, pi);
-
- //Parsing the IPv6 headers
- ip6_hdr =
- vlib_buffer_get_current (p) + vnet_buffer (p)->ip_frag.header_offset;
- payload = (u8 *) (ip6_hdr + 1);
- next_header = &ip6_hdr->protocol;
- if (*next_header == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
- {
- next_header = payload;
- payload += payload[1] * 8;
- }
-
- if (*next_header == IP_PROTOCOL_IP6_DESTINATION_OPTIONS)
- {
- next_header = payload;
- payload += payload[1] * 8;
- }
-
- if (*next_header == IP_PROTOCOL_IPV6_ROUTE)
- {
- next_header = payload;
- payload += payload[1] * 8;
- }
-
- if (PREDICT_FALSE
- (payload >= (u8 *) vlib_buffer_get_current (p) + p->current_length))
- {
- //A malicious packet could set an extension header with a too big size
- //and make us modify another vlib_buffer
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
- }
-
- if (p->flags & VLIB_BUFFER_NEXT_PRESENT)
- {
- *error = IP_FRAG_ERROR_MALFORMED;
- return;
- }
-
- u8 has_more;
- u16 initial_offset;
- if (*next_header == IP_PROTOCOL_IPV6_FRAGMENTATION)
- {
- //The fragmentation header is already there
- frag_hdr = (ip6_frag_hdr_t *) payload;
- has_more = ip6_frag_hdr_more (frag_hdr);
- initial_offset = ip6_frag_hdr_offset (frag_hdr);
- }
- else
- {
- //Insert a fragmentation header in the packet
- u8 nh = *next_header;
- *next_header = IP_PROTOCOL_IPV6_FRAGMENTATION;
- vlib_buffer_advance (p, -sizeof (*frag_hdr));
- u8 *start = vlib_buffer_get_current (p);
- memmove (start, start + sizeof (*frag_hdr),
- payload - (start + sizeof (*frag_hdr)));
- frag_hdr = (ip6_frag_hdr_t *) (payload - sizeof (*frag_hdr));
- frag_hdr->identification = ++running_fragment_id;
- frag_hdr->next_hdr = nh;
- frag_hdr->rsv = 0;
- has_more = 0;
- initial_offset = 0;
- }
- payload = (u8 *) (frag_hdr + 1);
-
- u16 headers_len = payload - (u8 *) vlib_buffer_get_current (p);
- u16 max_payload = vnet_buffer (p)->ip_frag.mtu - headers_len;
- u16 rem = p->current_length - headers_len;
- u16 ptr = 0;
-
- if (max_payload < 8)
- {
- *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
- return;
- }
-
- while (rem)
- {
- u32 bi;
- vlib_buffer_t *b;
- u16 len = (rem > max_payload) ? (max_payload & ~0x7) : rem;
- rem -= len;
-
- if (ptr != 0)
- {
- if (!vlib_buffer_alloc (vm, &bi, 1))
- {
- *error = IP_FRAG_ERROR_MEMORY;
- return;
- }
- b = vlib_get_buffer (vm, bi);
- vnet_buffer (b)->sw_if_index[VLIB_RX] =
- vnet_buffer (p)->sw_if_index[VLIB_RX];
- vnet_buffer (b)->sw_if_index[VLIB_TX] =
- vnet_buffer (p)->sw_if_index[VLIB_TX];
-
- /* Copy Adj_index in case DPO based node is sending for the fragmentation,
- the packet would be sent back to the proper DPO next node and Index */
- vnet_buffer (b)->ip.adj_index[VLIB_RX] =
- vnet_buffer (p)->ip.adj_index[VLIB_RX];
- vnet_buffer (b)->ip.adj_index[VLIB_TX] =
- vnet_buffer (p)->ip.adj_index[VLIB_TX];
-
- clib_memcpy (vlib_buffer_get_current (b),
- vlib_buffer_get_current (p), headers_len);
- clib_memcpy (vlib_buffer_get_current (b) + headers_len,
- payload + ptr, len);
- frag_hdr =
- vlib_buffer_get_current (b) + headers_len - sizeof (*frag_hdr);
- }
- else
- {
- bi = pi;
- b = vlib_get_buffer (vm, bi);
- //frag_hdr already set here
- }
-
- ip6_hdr =
- vlib_buffer_get_current (b) + vnet_buffer (p)->ip_frag.header_offset;
- frag_hdr->fragment_offset_and_more =
- ip6_frag_hdr_offset_and_more (initial_offset + (ptr >> 3),
- (rem || has_more));
- b->current_length = headers_len + len;
- ip6_hdr->payload_length =
- clib_host_to_net_u16 (b->current_length -
- vnet_buffer (p)->ip_frag.header_offset -
- sizeof (*ip6_hdr));
-
- if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
- {
- //Encapsulating ipv4 header
- ip4_header_t *encap_header4 =
- (ip4_header_t *) vlib_buffer_get_current (b);
- encap_header4->length = clib_host_to_net_u16 (b->current_length);
- encap_header4->checksum = ip4_header_checksum (encap_header4);
- }
- else if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
- {
- //Encapsulating ipv6 header
- ip6_header_t *encap_header6 =
- (ip6_header_t *) vlib_buffer_get_current (b);
- encap_header6->payload_length =
- clib_host_to_net_u16 (b->current_length -
- sizeof (*encap_header6));
- }
-
- vec_add1 (*buffer, bi);
-
- ptr += len;
- }
+ return frag_node_inline (vm, node, frame, ip4_frag_node.index,
+ 0 /* is_ip6 */ );
}
static uword
ip6_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
- u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
- vlib_node_runtime_t *error_node =
- vlib_node_get_runtime (vm, ip6_frag_node.index);
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
- u32 frag_sent = 0, small_packets = 0;
- u32 *buffer = 0;
+ return frag_node_inline (vm, node, frame, ip6_frag_node.index,
+ 1 /* is_ip6 */ );
+}
- while (n_left_from > 0)
+/*
+ * Fragments the packet given in from_bi. Fragments are returned in the buffer vector.
+ * Caller must ensure the original packet is freed.
+ */
+void
+ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u32 ** buffer,
+ ip_frag_error_t * error)
+{
+ vlib_buffer_t *from_b;
+ ip6_header_t *ip6;
+ u16 mtu, len, max, rem, ip_frag_id;
+
+ from_b = vlib_get_buffer (vm, from_bi);
+ mtu = vnet_buffer (from_b)->ip_frag.mtu;
+ ip6 = (ip6_header_t *) vlib_buffer_get_current (from_b);
+
+ rem = clib_net_to_host_u16 (ip6->payload_length);
+ max = (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) & ~0x7; // TODO: Is max correct??
+
+ if (rem >
+ (vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip6_header_t)))
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 pi0, *frag_from, frag_left;
- vlib_buffer_t *p0;
- ip_frag_error_t error0;
- ip6_frag_next_t next0;
-
- pi0 = from[0];
- from += 1;
- n_left_from -= 1;
- error0 = IP_FRAG_ERROR_NONE;
-
- p0 = vlib_get_buffer (vm, pi0);
- ip6_frag_do_fragment (vm, pi0, &buffer, &error0);
-
- if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
- {
- ip_frag_trace_t *tr =
- vlib_add_trace (vm, node, p0, sizeof (*tr));
- tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset;
- tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
- tr->ipv6 = 1;
- tr->n_fragments = vec_len (buffer);
- tr->next = vnet_buffer (p0)->ip_frag.next_index;
- }
-
- /* *INDENT-OFF* */
- next0 = (error0 == IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
- ip_frag.next_index : IP6_FRAG_NEXT_DROP;
- /* *INDENT-ON* */
-
- frag_sent += vec_len (buffer);
- small_packets += (vec_len (buffer) == 1);
-
- //Send fragments that were added in the frame
- frag_from = buffer;
- frag_left = vec_len (buffer);
- while (frag_left > 0)
- {
- while (frag_left > 0 && n_left_to_next > 0)
- {
- u32 i;
- i = to_next[0] = frag_from[0];
- frag_from += 1;
- frag_left -= 1;
- to_next += 1;
- n_left_to_next -= 1;
-
- vlib_get_buffer (vm, i)->error = error_node->errors[error0];
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next, i,
- next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- vlib_get_next_frame (vm, node, next_index, to_next,
- n_left_to_next);
- }
- vec_reset_length (buffer);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ *error = IP_FRAG_ERROR_MALFORMED;
+ return;
}
- vec_free (buffer);
- vlib_node_increment_counter (vm, ip6_frag_node.index,
- IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
- vlib_node_increment_counter (vm, ip6_frag_node.index,
- IP_FRAG_ERROR_SMALL_PACKET, small_packets);
- return frame->n_vectors;
+ /* TODO: Look through header chain for fragmentation header */
+ if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ *error = IP_FRAG_ERROR_MALFORMED;
+ return;
+ }
+
+ u8 *from_data = (void *) (ip6 + 1);
+ vlib_buffer_t *org_from_b = from_b;
+ u16 fo = 0;
+ u16 left_in_from_buffer = from_b->current_length - sizeof (ip6_header_t);
+ u16 ptr = 0;
+
+ ip_frag_id = ++running_fragment_id; // Fix
+
+ /* Do the actual fragmentation */
+ while (rem)
+ {
+ u32 to_bi;
+ vlib_buffer_t *to_b;
+ ip6_header_t *to_ip6;
+ ip6_frag_hdr_t *to_frag_hdr;
+ u8 *to_data;
+
+ len =
+ (rem >
+ (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) ? max : rem);
+ if (len != rem) /* Last fragment does not need to divisible by 8 */
+ len &= ~0x7;
+ if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
+ {
+ *error = IP_FRAG_ERROR_MEMORY;
+ return;
+ }
+ vec_add1 (*buffer, to_bi);
+ frag_set_sw_if_index (to_b, org_from_b);
+
+ /* Copy ip6 header */
+ clib_memcpy (to_b->data, ip6, sizeof (ip6_header_t));
+ to_ip6 = vlib_buffer_get_current (to_b);
+ to_frag_hdr = (ip6_frag_hdr_t *) (to_ip6 + 1);
+ to_data = (void *) (to_frag_hdr + 1);
+
+ /* Spin through from buffers filling up the to buffer */
+ u16 left_in_to_buffer = len, to_ptr = 0;
+ while (1)
+ {
+ u16 bytes_to_copy;
+
+ /* Figure out how many bytes we can safely copy */
+ bytes_to_copy = left_in_to_buffer <= left_in_from_buffer ?
+ left_in_to_buffer : left_in_from_buffer;
+ clib_memcpy (to_data + to_ptr, from_data + ptr, bytes_to_copy);
+ left_in_to_buffer -= bytes_to_copy;
+ ptr += bytes_to_copy;
+ left_in_from_buffer -= bytes_to_copy;
+ if (left_in_to_buffer == 0)
+ break;
+
+ ASSERT (left_in_from_buffer <= 0);
+ /* Move buffer */
+ if (!(from_b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ *error = IP_FRAG_ERROR_MALFORMED;
+ return;
+ }
+ from_b = vlib_get_buffer (vm, from_b->next_buffer);
+ from_data = (u8 *) vlib_buffer_get_current (from_b);
+ ptr = 0;
+ left_in_from_buffer = from_b->current_length;
+ to_ptr += bytes_to_copy;
+ }
+
+ to_b->current_length =
+ len + sizeof (ip6_header_t) + sizeof (ip6_frag_hdr_t);
+ to_ip6->payload_length =
+ clib_host_to_net_u16 (len + sizeof (ip6_frag_hdr_t));
+ to_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ to_frag_hdr->fragment_offset_and_more =
+ ip6_frag_hdr_offset_and_more ((fo >> 3), len != rem);
+ to_frag_hdr->identification = ip_frag_id;
+ to_frag_hdr->next_hdr = ip6->protocol;
+ to_frag_hdr->rsv = 0;
+
+ rem -= len;
+ fo += len;
+ }
}
static char *ip4_frag_error_strings[] = {
diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h
index ef5eb4c..06eeee8 100644
--- a/src/vnet/ip/ip_frag.h
+++ b/src/vnet/ip/ip_frag.h
@@ -84,7 +84,7 @@
IP_FRAG_N_ERROR,
} ip_frag_error_t;
-void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 offset, u16 mtu,
+void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 mtu,
u8 next_index, u8 flags);
void
ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
diff --git a/src/vnet/ipip/ipip.c b/src/vnet/ipip/ipip.c
index c49be09..718463d 100644
--- a/src/vnet/ipip/ipip.c
+++ b/src/vnet/ipip/ipip.c
@@ -138,6 +138,10 @@
ip6_header_t *ip6;
const ipip_tunnel_t *t = data;
+ /* Must set locally originated otherwise we're not allowed to
+ fragment the packet later */
+ b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
ip6 = vlib_buffer_get_current (b);
ip6->payload_length =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) -
diff --git a/src/vnet/ipip/node.c b/src/vnet/ipip/node.c
index 60d6223..6171d3e 100644
--- a/src/vnet/ipip/node.c
+++ b/src/vnet/ipip/node.c
@@ -99,6 +99,14 @@
if (is_ipv6)
{
ip60 = vlib_buffer_get_current (b0);
+ /* Check for outer fragmentation */
+ if (ip60->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ next0 = IPIP_INPUT_NEXT_DROP;
+ b0->error = node->errors[IPIP_ERROR_FRAGMENTED_PACKET];
+ goto drop;
+ }
+
vlib_buffer_advance (b0, sizeof (*ip60));
ip_set (&src0, &ip60->src_address, false);
ip_set (&dst0, &ip60->dst_address, false);
diff --git a/src/vpp-api/python/vpp_papi/vpp_stats.py b/src/vpp-api/python/vpp_papi/vpp_stats.py
index 456312b..8c1aaf2 100644
--- a/src/vpp-api/python/vpp_papi/vpp_stats.py
+++ b/src/vpp-api/python/vpp_papi/vpp_stats.py
@@ -134,7 +134,8 @@
for i in range(rv_len):
n = ffi.string(rv[i].name).decode()
e = stat_entry_to_python(self.api, rv[i])
- stats[n] = e
+ if e != None:
+ stats[n] = e
return stats
def get_counter(self, name):