ip: SVR fix race condition
There could be a race condition where two fragments of one chain end up
at the same time on different workers, one overwriting others hash
entry. Add a check for that and restart processing on the unlucky worker
who ends up being second from hash table POV. This will then result in a
proper handover to worker now owning this reassembly.
Type: fix
Fixes: de34c35fc73226943538149fae9dbc5cfbdc6e75
Signed-off-by: Klement Sekera <ksekera@cisco.com>
Change-Id: I9eb29c5cb1ffe3b5eb1d5a638e17ab7ba2628d28
diff --git a/src/vnet/ip/reass/ip4_sv_reass.c b/src/vnet/ip/reass/ip4_sv_reass.c
index f5fa575..9971daf 100644
--- a/src/vnet/ip/reass/ip4_sv_reass.c
+++ b/src/vnet/ip/reass/ip4_sv_reass.c
@@ -321,6 +321,8 @@
ip4_sv_reass_t *reass = NULL;
f64 now = vlib_time_now (vm);
+again:
+
if (!clib_bihash_search_16_8 (&rm->hash, &kv->kv, &kv->kv))
{
if (vm->thread_index != kv->v.thread_index)
@@ -375,10 +377,14 @@
kv->v.thread_index = vm->thread_index;
reass->last_heard = now;
- if (clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 1))
+ int rv = clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 2);
+ if (rv)
{
ip4_sv_reass_free (vm, rm, rt, reass);
reass = NULL;
+ // if other worker created a context already work with the other copy
+ if (-2 == rv)
+ goto again;
}
return reass;