nat: fix forwarding handoff workaround

A special case when out2in packet needs to
be handoffed to other worker thread. We are
not able to determine which thread they belong
to in the first processing of nat handoff node.
These packets needs to go through out2in slowpath
before we are able to tell where to handoff them.

Type: fix
Ticket: VPP-1941

Change-Id: I7173bda970ce6a91d81f48fc72aa2457586a076f
Signed-off-by: Filip Varga <fivarga@cisco.com>
diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c
index 0ca2042..91f14d6 100644
--- a/src/plugins/nat/nat.c
+++ b/src/plugins/nat/nat.c
@@ -4752,6 +4752,7 @@
     [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath",
     [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in",
     [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath",
+    [NAT_NEXT_OUT2IN_ED_HANDOFF] = "nat44-ed-out2in-handoff",
     [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff",
     [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff",
   },
diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h
index a713f7b..daebe45 100644
--- a/src/plugins/nat/nat.h
+++ b/src/plugins/nat/nat.h
@@ -88,6 +88,7 @@
   NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH,
   NAT_NEXT_OUT2IN_ED_FAST_PATH,
   NAT_NEXT_OUT2IN_ED_SLOW_PATH,
+  NAT_NEXT_OUT2IN_ED_HANDOFF,
   NAT_NEXT_IN2OUT_CLASSIFY,
   NAT_NEXT_OUT2IN_CLASSIFY,
   NAT_N_NEXT,
@@ -162,6 +163,19 @@
     NAT_IN2OUT_ED_N_ERROR,
 } nat_in2out_ed_error_t;
 
+#define foreach_nat44_handoff_error                       \
+_(CONGESTION_DROP, "congestion drop")                     \
+_(SAME_WORKER, "same worker")                             \
+_(DO_HANDOFF, "do handoff")
+
+typedef enum
+{
+#define _(sym,str) NAT44_HANDOFF_ERROR_##sym,
+  foreach_nat44_handoff_error
+#undef _
+    NAT44_HANDOFF_N_ERROR,
+} nat44_handoff_error_t;
+
 #define foreach_nat_out2in_ed_error                     \
 _(UNSUPPORTED_PROTOCOL, "unsupported protocol")         \
 _(OUT_OF_PORTS, "out of ports")                         \
diff --git a/src/plugins/nat/nat44_handoff.c b/src/plugins/nat/nat44_handoff.c
index 69cdc42..651c8d1 100644
--- a/src/plugins/nat/nat44_handoff.c
+++ b/src/plugins/nat/nat44_handoff.c
@@ -33,26 +33,12 @@
   u8 output;
 } nat44_handoff_trace_t;
 
-#define foreach_nat44_handoff_error                       \
-_(CONGESTION_DROP, "congestion drop")                     \
-_(SAME_WORKER, "same worker")                             \
-_(DO_HANDOFF, "do handoff")
-
-typedef enum
-{
-#define _(sym,str) NAT44_HANDOFF_ERROR_##sym,
-  foreach_nat44_handoff_error
-#undef _
-    NAT44_HANDOFF_N_ERROR,
-} nat44_handoff_error_t;
-
 static char *nat44_handoff_error_strings[] = {
 #define _(sym,string) string,
   foreach_nat44_handoff_error
 #undef _
 };
 
-
 static u8 *
 format_nat44_handoff_trace (u8 * s, va_list * args)
 {
diff --git a/src/plugins/nat/nat_ha.c b/src/plugins/nat/nat_ha.c
index 3ae572b..08b065d 100644
--- a/src/plugins/nat/nat_ha.c
+++ b/src/plugins/nat/nat_ha.c
@@ -1057,7 +1057,7 @@
 #define _(sym,str) NAT_HA_HANDOFF_ERROR_##sym,
   foreach_nat_ha_handoff_error
 #undef _
-    NAT44_HANDOFF_N_ERROR,
+    NAT_HA_HANDOFF_N_ERROR,
 } nat_ha_handoff_error_t;
 
 static char *nat_ha_handoff_error_strings[] = {
diff --git a/src/plugins/nat/out2in_ed.c b/src/plugins/nat/out2in_ed.c
index b0e8892..95e2ea9 100644
--- a/src/plugins/nat/out2in_ed.c
+++ b/src/plugins/nat/out2in_ed.c
@@ -46,6 +46,11 @@
   u32 is_slow_path;
 } nat44_ed_out2in_trace_t;
 
+typedef struct
+{
+  u16 thread_next;
+} nat44_ed_out2in_handoff_trace_t;
+
 static u8 *
 format_nat44_ed_out2in_trace (u8 * s, va_list * args)
 {
@@ -513,16 +518,24 @@
   nat44_session_update_lru (sm, s, thread_index);
 }
 
-static inline void
-create_bypass_for_fwd_worker (snat_main_t * sm, vlib_buffer_t * b,
-			      ip4_header_t * ip, u32 rx_fib_index)
+static_always_inline int
+create_bypass_for_fwd_worker (snat_main_t * sm,
+			      vlib_buffer_t * b, ip4_header_t * ip,
+			      u32 rx_fib_index, u32 thread_index)
 {
-  ip4_header_t ip_wkr = {
+  ip4_header_t tmp = {
     .src_address = ip->dst_address,
   };
-  u32 thread_index = sm->worker_in2out_cb (&ip_wkr, rx_fib_index, 0);
+  u32 index = sm->worker_in2out_cb (&tmp, rx_fib_index, 0);
+
+  if (index != thread_index)
+    {
+      vnet_buffer2 (b)->nat.thread_next = index;
+      return 1;
+    }
 
   create_bypass_for_fwd (sm, b, ip, rx_fib_index, thread_index);
+  return 0;
 }
 
 #ifndef CLIB_MARCH_VARIANT
@@ -590,10 +603,19 @@
 	      else
 		{
 		  if (sm->num_workers > 1)
-		    create_bypass_for_fwd_worker (sm, b, ip, rx_fib_index);
+		    {
+		      if (create_bypass_for_fwd_worker (sm, b, ip,
+							rx_fib_index,
+							thread_index))
+			{
+			  next = NAT_NEXT_OUT2IN_ED_HANDOFF;
+			}
+		    }
 		  else
-		    create_bypass_for_fwd (sm, b, ip, rx_fib_index,
-					   thread_index);
+		    {
+		      create_bypass_for_fwd (sm, b, ip, rx_fib_index,
+					     thread_index);
+		    }
 		}
 	    }
 	  goto out;
@@ -1208,12 +1230,18 @@
 		    }
 		  else
 		    {
-		      if (sm->num_workers > 1)
-			create_bypass_for_fwd_worker (sm, b0, ip0,
-						      rx_fib_index0);
+		      if ((sm->num_workers > 1)
+			  && create_bypass_for_fwd_worker (sm, b0, ip0,
+							   rx_fib_index0,
+							   thread_index))
+			{
+			  next[0] = NAT_NEXT_OUT2IN_ED_HANDOFF;
+			}
 		      else
-			create_bypass_for_fwd (sm, b0, ip0, rx_fib_index0,
-					       thread_index);
+			{
+			  create_bypass_for_fwd (sm, b0, ip0, rx_fib_index0,
+						 thread_index);
+			}
 		    }
 		}
 	      goto trace0;
@@ -1383,6 +1411,88 @@
   return frame->n_vectors;
 }
 
+static inline uword
+nat_handoff_node_fn_inline (vlib_main_t * vm,
+			    vlib_node_runtime_t * node,
+			    vlib_frame_t * frame, u32 fq_index)
+{
+  u32 n_enq, n_left_from, *from;
+
+  u16 thread_indices[VLIB_FRAME_SIZE], *ti = thread_indices;
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+
+  vlib_get_buffers (vm, from, b, n_left_from);
+
+  while (n_left_from >= 4)
+    {
+      if (PREDICT_TRUE (n_left_from >= 8))
+	{
+	  vlib_prefetch_buffer_header (b[4], LOAD);
+	  vlib_prefetch_buffer_header (b[5], LOAD);
+	  vlib_prefetch_buffer_header (b[6], LOAD);
+	  vlib_prefetch_buffer_header (b[7], LOAD);
+	  CLIB_PREFETCH (&b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
+	  CLIB_PREFETCH (&b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
+	  CLIB_PREFETCH (&b[6]->data, CLIB_CACHE_LINE_BYTES, LOAD);
+	  CLIB_PREFETCH (&b[7]->data, CLIB_CACHE_LINE_BYTES, LOAD);
+	}
+
+      ti[0] = vnet_buffer2 (b[0])->nat.thread_next;
+      ti[1] = vnet_buffer2 (b[1])->nat.thread_next;
+      ti[2] = vnet_buffer2 (b[2])->nat.thread_next;
+      ti[3] = vnet_buffer2 (b[3])->nat.thread_next;
+
+      b += 4;
+      ti += 4;
+      n_left_from -= 4;
+    }
+
+  while (n_left_from > 0)
+    {
+      ti[0] = vnet_buffer2 (b[0])->nat.thread_next;
+
+      b += 1;
+      ti += 1;
+      n_left_from -= 1;
+    }
+
+  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+    {
+      u32 i;
+      b = bufs;
+      ti = thread_indices;
+
+      for (i = 0; i < frame->n_vectors; i++)
+	{
+	  if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
+	    {
+	      nat44_ed_out2in_handoff_trace_t *t =
+		vlib_add_trace (vm, node, b[0], sizeof (*t));
+	      t->thread_next = ti[0];
+	      b += 1;
+	      ti += 1;
+	    }
+	  else
+	    break;
+	}
+    }
+
+  n_enq = vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices,
+					 frame->n_vectors, 1);
+
+  if (n_enq < frame->n_vectors)
+    {
+      vlib_node_increment_counter (vm, node->node_index,
+				   NAT44_HANDOFF_ERROR_CONGESTION_DROP,
+				   frame->n_vectors - n_enq);
+    }
+
+  return frame->n_vectors;
+}
+
 VLIB_NODE_FN (nat44_ed_out2in_node) (vlib_main_t * vm,
 				     vlib_node_runtime_t * node,
 				     vlib_frame_t * frame)
@@ -1431,6 +1541,35 @@
 /* *INDENT-ON* */
 
 static u8 *
+format_nat44_ed_out2in_handoff_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  nat44_ed_out2in_handoff_trace_t *t =
+    va_arg (*args, nat44_ed_out2in_handoff_trace_t *);
+  return format (s, "out2in ed handoff thread_next index %d", t->thread_next);
+}
+
+VLIB_NODE_FN (nat44_ed_out2in_handoff_node) (vlib_main_t * vm,
+					     vlib_node_runtime_t * node,
+					     vlib_frame_t * frame)
+{
+  return nat_handoff_node_fn_inline (vm, node, frame,
+				     snat_main.ed_out2in_node_index);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (nat44_ed_out2in_handoff_node) = {
+  .name = "nat44-ed-out2in-handoff",
+  .vector_size = sizeof (u32),
+  .sibling_of = "nat-default",
+  .format_trace = format_nat44_ed_out2in_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = 0,
+};
+/* *INDENT-ON* */
+
+static u8 *
 format_nat_pre_trace (u8 * s, va_list * args)
 {
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h
index 50515c4..9e997b8 100644
--- a/src/vnet/buffer.h
+++ b/src/vnet/buffer.h
@@ -452,11 +452,10 @@
   /* size of L4 prototol header */
   u16 gso_l4_hdr_sz;
 
-  /* The union below has a u64 alignment, so this space is unused */
-  u32 __unused2[1];
-
   struct
   {
+    u16 unused;
+    u16 thread_next;
     u32 arc_next;
     u32 ed_out2in_nat_session_index;
   } nat;