ip: reassembly: handle atomic fragments correctly
If a fragment arrives with fragment offset = 0 and M = 0, it means that
this is actually a complete packet and per RFC 8200, it should be
treated independently from other fragments. This patch does that.
Fragmentation header is stripped and fragment is forwarded irregardles
of other existing reassemblies in case of full reassembly and treated
the same way as regular packet in shallow virtual reassembly.
Type: improvement
Change-Id: If3322d5e3160cd755b8465a642702a9166d46cc2
Signed-off-by: Klement Sekera <ksekera@cisco.com>
diff --git a/src/vnet/ip/reass/ip6_full_reass.c b/src/vnet/ip/reass/ip6_full_reass.c
index 901da99..fc7fa18 100644
--- a/src/vnet/ip/reass/ip6_full_reass.c
+++ b/src/vnet/ip/reass/ip6_full_reass.c
@@ -498,11 +498,11 @@
}
always_inline ip6_full_reass_t *
-ip6_full_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_full_reass_main_t * rm,
- ip6_full_reass_per_thread_t * rt,
- ip6_full_reass_kv_t * kv, u32 * icmp_bi,
- u8 * do_handoff)
+ip6_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip6_full_reass_main_t *rm,
+ ip6_full_reass_per_thread_t *rt,
+ ip6_full_reass_kv_t *kv, u32 *icmp_bi,
+ u8 *do_handoff, int skip_bihash)
{
ip6_full_reass_t *reass;
f64 now;
@@ -512,7 +512,7 @@
reass = NULL;
now = vlib_time_now (vm);
- if (!clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
+ if (!skip_bihash && !clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
{
if (vm->thread_index != kv->v.memory_owner_thread_index)
{
@@ -558,24 +558,37 @@
++rt->reass_n;
}
- reass->key.as_u64[0] = kv->kv.key[0];
- reass->key.as_u64[1] = kv->kv.key[1];
- reass->key.as_u64[2] = kv->kv.key[2];
- reass->key.as_u64[3] = kv->kv.key[3];
- reass->key.as_u64[4] = kv->kv.key[4];
- reass->key.as_u64[5] = kv->kv.key[5];
kv->v.reass_index = (reass - rt->pool);
kv->v.memory_owner_thread_index = vm->thread_index;
reass->last_heard = now;
- int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
- if (rv)
+ if (!skip_bihash)
{
- ip6_full_reass_free (rm, rt, reass);
- reass = NULL;
- // if other worker created a context already work with the other copy
- if (-2 == rv)
- goto again;
+ reass->key.as_u64[0] = kv->kv.key[0];
+ reass->key.as_u64[1] = kv->kv.key[1];
+ reass->key.as_u64[2] = kv->kv.key[2];
+ reass->key.as_u64[3] = kv->kv.key[3];
+ reass->key.as_u64[4] = kv->kv.key[4];
+ reass->key.as_u64[5] = kv->kv.key[5];
+
+ int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
+ if (rv)
+ {
+ ip6_full_reass_free (rm, rt, reass);
+ reass = NULL;
+ // if other worker created a context already work with the other copy
+ if (-2 == rv)
+ goto again;
+ }
+ }
+ else
+ {
+ reass->key.as_u64[0] = ~0;
+ reass->key.as_u64[1] = ~0;
+ reass->key.as_u64[2] = ~0;
+ reass->key.as_u64[3] = ~0;
+ reass->key.as_u64[4] = ~0;
+ reass->key.as_u64[5] = ~0;
}
return reass;
@@ -843,12 +856,13 @@
}
always_inline ip6_full_reass_rc_t
-ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_full_reass_main_t * rm,
- ip6_full_reass_per_thread_t * rt,
- ip6_full_reass_t * reass, u32 * bi0, u32 * next0,
- u32 * error0, ip6_frag_hdr_t * frag_hdr,
- bool is_custom_app, u32 * handoff_thread_idx)
+ip6_full_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip6_full_reass_main_t *rm,
+ ip6_full_reass_per_thread_t *rt,
+ ip6_full_reass_t *reass, u32 *bi0, u32 *next0,
+ u32 *error0, ip6_frag_hdr_t *frag_hdr,
+ bool is_custom_app, u32 *handoff_thread_idx,
+ int skip_bihash)
{
int consumed = 0;
vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
@@ -956,6 +970,12 @@
~0);
}
}
+ else if (skip_bihash)
+ {
+ // if this reassembly is not in bihash, then the packet must have been
+ // consumed
+ return IP6_FULL_REASS_RC_INTERNAL_ERROR;
+ }
if (~0 != reass->last_packet_octet &&
reass->data_len == reass->last_packet_octet + 1)
{
@@ -973,6 +993,12 @@
}
else
{
+ if (skip_bihash)
+ {
+ // if this reassembly is not in bihash, it should've been an atomic
+ // fragment and thus finalized
+ return IP6_FULL_REASS_RC_INTERNAL_ERROR;
+ }
if (consumed)
{
*bi0 = ~0;
@@ -1113,22 +1139,33 @@
next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
goto skip_reass;
}
+
+ int skip_bihash = 0;
ip6_full_reass_kv_t kv;
u8 do_handoff = 0;
- kv.k.as_u64[0] = ip0->src_address.as_u64[0];
- kv.k.as_u64[1] = ip0->src_address.as_u64[1];
- kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
- kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
- kv.k.as_u64[4] =
- ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX])) << 32 |
- (u64) frag_hdr->identification;
- kv.k.as_u64[5] = ip0->protocol;
+ if (0 == ip6_frag_hdr_offset (frag_hdr) &&
+ !ip6_frag_hdr_more (frag_hdr))
+ {
+ // this is atomic fragment and needs to be processed separately
+ skip_bihash = 1;
+ }
+ else
+ {
+ kv.k.as_u64[0] = ip0->src_address.as_u64[0];
+ kv.k.as_u64[1] = ip0->src_address.as_u64[1];
+ kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
+ kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
+ kv.k.as_u64[4] =
+ ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]))
+ << 32 |
+ (u64) frag_hdr->identification;
+ kv.k.as_u64[5] = ip0->protocol;
+ }
- ip6_full_reass_t *reass =
- ip6_full_reass_find_or_create (vm, node, rm, rt, &kv, &icmp_bi,
- &do_handoff);
+ ip6_full_reass_t *reass = ip6_full_reass_find_or_create (
+ vm, node, rm, rt, &kv, &icmp_bi, &do_handoff, skip_bihash);
if (reass)
{
@@ -1148,9 +1185,9 @@
{
u32 handoff_thread_idx;
u32 counter = ~0;
- switch (ip6_full_reass_update
- (vm, node, rm, rt, reass, &bi0, &next0, &error0,
- frag_hdr, is_custom_app, &handoff_thread_idx))
+ switch (ip6_full_reass_update (
+ vm, node, rm, rt, reass, &bi0, &next0, &error0, frag_hdr,
+ is_custom_app, &handoff_thread_idx, skip_bihash))
{
case IP6_FULL_REASS_RC_OK:
/* nothing to do here */
diff --git a/src/vnet/ip/reass/ip6_sv_reass.c b/src/vnet/ip/reass/ip6_sv_reass.c
index fb435ba..3656c5a 100644
--- a/src/vnet/ip/reass/ip6_sv_reass.c
+++ b/src/vnet/ip/reass/ip6_sv_reass.c
@@ -215,7 +215,7 @@
clib_net_to_host_u16 (t->l4_dst_port));
break;
case REASS_PASSTHROUGH:
- s = format (s, "[not-fragmented]");
+ s = format (s, "[not fragmented or atomic fragment]");
break;
}
return s;
@@ -532,13 +532,24 @@
ip6_header_t *ip0 = vlib_buffer_get_current (b0);
ip6_frag_hdr_t *frag_hdr;
ip6_ext_hdr_chain_t hdr_chain;
+ bool is_atomic_fragment = false;
int res = ip6_ext_header_walk (
b0, ip0, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
- if (res < 0 ||
- hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION)
+ if (res >= 0 &&
+ hdr_chain.eh[res].protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
{
- // this is a regular packet - no fragmentation
+ frag_hdr =
+ ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset);
+ is_atomic_fragment = (0 == ip6_frag_hdr_offset (frag_hdr) &&
+ !ip6_frag_hdr_more (frag_hdr));
+ }
+
+ if (res < 0 ||
+ hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION ||
+ is_atomic_fragment)
+ {
+ // this is a regular unfragmented packet or an atomic fragment
if (!ip6_get_port
(vm, b0, ip0, b0->current_length,
&(vnet_buffer (b0)->ip.reass.ip_proto),
@@ -565,10 +576,10 @@
}
goto packet_enqueue;
}
- frag_hdr =
- ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset);
+
vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset =
hdr_chain.eh[res].offset;
+
if (0 == ip6_frag_hdr_offset (frag_hdr))
{
// first fragment - verify upper-layer is present