ip: do policer thread handoff from punt policers

Pass packets arriving at the ip[46] punt policer nodes to punt policer
handoff nodes if the worker thread they arrive on is not the same one
configured in the policer. Initially, the policer will be tied to the
worker thread that it first received a packet on. This will be expanded
in future to be a configuration API option.

Type: improvement
Signed-off-by: Brian Russell <brian@graphiant.com>
Change-Id: Ic39d936084c354af1859ad3d946da6cd0f6e34d9
diff --git a/src/vnet/ip/ip4_punt_drop.c b/src/vnet/ip/ip4_punt_drop.c
index 94c260a..20430ad 100644
--- a/src/vnet/ip/ip4_punt_drop.c
+++ b/src/vnet/ip/ip4_punt_drop.c
@@ -51,6 +51,29 @@
 };
 #endif /* CLIB_MARCH_VARIANT */
 
+static char *ip4_punt_policer_handoff_error_strings[] = { "congestion drop" };
+
+VLIB_NODE_FN (ip4_punt_policer_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  return policer_handoff (vm, node, frame, ip4_punt_policer_cfg.fq_index,
+			  ip4_punt_policer_cfg.policer_index);
+}
+
+VLIB_REGISTER_NODE (ip4_punt_policer_handoff_node) = {
+  .name = "ip4-punt-policer-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_policer_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(ip4_punt_policer_handoff_error_strings),
+  .error_strings = ip4_punt_policer_handoff_error_strings,
+
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+
 static char *ip4_punt_policer_error_strings[] = {
 #define _(sym,string) string,
   foreach_ip_punt_policer_error
@@ -77,6 +100,7 @@
 
   .next_nodes = {
     [IP_PUNT_POLICER_NEXT_DROP] = "ip4-drop",
+    [IP_PUNT_POLICER_NEXT_HANDOFF] = "ip4-punt-policer-handoff",
   },
 };
 
diff --git a/src/vnet/ip/ip6_punt_drop.c b/src/vnet/ip/ip6_punt_drop.c
index 8d91c09..107703a 100644
--- a/src/vnet/ip/ip6_punt_drop.c
+++ b/src/vnet/ip/ip6_punt_drop.c
@@ -38,6 +38,29 @@
 ip_punt_policer_t ip6_punt_policer_cfg;
 #endif /* CLIB_MARCH_VARIANT */
 
+static char *ip6_punt_policer_handoff_error_strings[] = { "congestion drop" };
+
+VLIB_NODE_FN (ip6_punt_policer_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  return policer_handoff (vm, node, frame, ip6_punt_policer_cfg.fq_index,
+			  ip6_punt_policer_cfg.policer_index);
+}
+
+VLIB_REGISTER_NODE (ip6_punt_policer_handoff_node) = {
+  .name = "ip6-punt-policer-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_policer_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN(ip6_punt_policer_handoff_error_strings),
+  .error_strings = ip6_punt_policer_handoff_error_strings,
+
+  .n_next_nodes = 1,
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+
 static char *ip6_punt_policer_error_strings[] = {
 #define _(sym,string) string,
   foreach_ip_punt_policer_error
@@ -67,6 +90,7 @@
   /* edit / add dispositions here */
   .next_nodes = {
     [IP_PUNT_POLICER_NEXT_DROP] = "ip6-drop",
+    [IP_PUNT_POLICER_NEXT_HANDOFF] = "ip6-punt-policer-handoff",
   },
 };
 
diff --git a/src/vnet/ip/ip_punt_drop.h b/src/vnet/ip/ip_punt_drop.h
index 2fc1140..11affcb 100644
--- a/src/vnet/ip/ip_punt_drop.h
+++ b/src/vnet/ip/ip_punt_drop.h
@@ -33,6 +33,7 @@
 typedef enum ip_punt_policer_next_t_
 {
   IP_PUNT_POLICER_NEXT_DROP,
+  IP_PUNT_POLICER_NEXT_HANDOFF,
   IP_PUNT_POLICER_N_NEXT,
 } ip_punt_policer_next_t;
 
@@ -71,6 +72,10 @@
   u64 time_in_policer_periods;
   vnet_feature_main_t *fm = &feature_main;
   vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
+  vnet_policer_main_t *pm = &vnet_policer_main;
+  policer_read_response_type_st *pol = &pm->policers[policer_index];
+  u32 pol_thread_index = pol->thread_index;
+  u32 this_thread_index = vm->thread_index;
 
   time_in_policer_periods =
     clib_cpu_time_now () >> POLICER_TICKS_PER_PERIOD_SHIFT;
@@ -79,6 +84,20 @@
   n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
 
+  if (PREDICT_FALSE (pol_thread_index == ~0))
+    {
+      /*
+       * This is the first packet to use this policer. Set the
+       * thread index in the policer to this thread and any
+       * packets seen by this node on other threads will
+       * be handed off to this one.
+       *
+       * This could happen simultaneously on another thread.
+       */
+      clib_atomic_cmp_and_swap (&pol->thread_index, ~0, this_thread_index);
+      pol_thread_index = this_thread_index;
+    }
+
   while (n_left_from > 0)
     {
       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
@@ -102,45 +121,52 @@
 	  b0 = vlib_get_buffer (vm, bi0);
 	  b1 = vlib_get_buffer (vm, bi1);
 
-	  vnet_get_config_data (&cm->config_main,
-				&b0->current_config_index, &next0, 0);
-	  vnet_get_config_data (&cm->config_main,
-				&b1->current_config_index, &next1, 0);
-
-	  act0 = vnet_policer_police (vm, b0,
-				      policer_index,
-				      time_in_policer_periods,
-				      POLICE_CONFORM);
-	  act1 = vnet_policer_police (vm, b1,
-				      policer_index,
-				      time_in_policer_periods,
-				      POLICE_CONFORM);
-
-	  if (PREDICT_FALSE (act0 == SSE2_QOS_ACTION_DROP))
+	  if (PREDICT_FALSE (this_thread_index != pol_thread_index))
 	    {
-	      next0 = IP_PUNT_POLICER_NEXT_DROP;
-	      b0->error = node->errors[IP_PUNT_POLICER_ERROR_DROP];
+	      next0 = next1 = IP_PUNT_POLICER_NEXT_HANDOFF;
 	    }
-	  if (PREDICT_FALSE (act1 == SSE2_QOS_ACTION_DROP))
+	  else
 	    {
-	      next1 = IP_PUNT_POLICER_NEXT_DROP;
-	      b1->error = node->errors[IP_PUNT_POLICER_ERROR_DROP];
+
+	      vnet_get_config_data (&cm->config_main,
+				    &b0->current_config_index, &next0, 0);
+	      vnet_get_config_data (&cm->config_main,
+				    &b1->current_config_index, &next1, 0);
+
+	      act0 =
+		vnet_policer_police (vm, b0, policer_index,
+				     time_in_policer_periods, POLICE_CONFORM);
+	      act1 =
+		vnet_policer_police (vm, b1, policer_index,
+				     time_in_policer_periods, POLICE_CONFORM);
+
+	      if (PREDICT_FALSE (act0 == SSE2_QOS_ACTION_DROP))
+		{
+		  next0 = IP_PUNT_POLICER_NEXT_DROP;
+		  b0->error = node->errors[IP_PUNT_POLICER_ERROR_DROP];
+		}
+	      if (PREDICT_FALSE (act1 == SSE2_QOS_ACTION_DROP))
+		{
+		  next1 = IP_PUNT_POLICER_NEXT_DROP;
+		  b1->error = node->errors[IP_PUNT_POLICER_ERROR_DROP];
+		}
+
+	      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  ip_punt_policer_trace_t *t =
+		    vlib_add_trace (vm, node, b0, sizeof (*t));
+		  t->next = next0;
+		  t->policer_index = policer_index;
+		}
+	      if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  ip_punt_policer_trace_t *t =
+		    vlib_add_trace (vm, node, b1, sizeof (*t));
+		  t->next = next1;
+		  t->policer_index = policer_index;
+		}
 	    }
 
-	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      ip_punt_policer_trace_t *t =
-		vlib_add_trace (vm, node, b0, sizeof (*t));
-	      t->next = next0;
-	      t->policer_index = policer_index;
-	    }
-	  if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
-	    {
-	      ip_punt_policer_trace_t *t =
-		vlib_add_trace (vm, node, b1, sizeof (*t));
-	      t->next = next1;
-	      t->policer_index = policer_index;
-	    }
 	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
 					   n_left_to_next,
 					   bi0, bi1, next0, next1);
@@ -162,27 +188,32 @@
 
 	  b0 = vlib_get_buffer (vm, bi0);
 
-	  vnet_get_config_data (&cm->config_main,
-				&b0->current_config_index, &next0, 0);
-
-	  act0 = vnet_policer_police (vm, b0,
-				      policer_index,
-				      time_in_policer_periods,
-				      POLICE_CONFORM);
-	  if (PREDICT_FALSE (act0 == SSE2_QOS_ACTION_DROP))
+	  if (PREDICT_FALSE (this_thread_index != pol_thread_index))
 	    {
-	      next0 = IP_PUNT_POLICER_NEXT_DROP;
-	      b0->error = node->errors[IP_PUNT_POLICER_ERROR_DROP];
+	      next0 = IP_PUNT_POLICER_NEXT_HANDOFF;
 	    }
-
-	  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+	  else
 	    {
-	      ip_punt_policer_trace_t *t =
-		vlib_add_trace (vm, node, b0, sizeof (*t));
-	      t->next = next0;
-	      t->policer_index = policer_index;
-	    }
+	      vnet_get_config_data (&cm->config_main,
+				    &b0->current_config_index, &next0, 0);
 
+	      act0 =
+		vnet_policer_police (vm, b0, policer_index,
+				     time_in_policer_periods, POLICE_CONFORM);
+	      if (PREDICT_FALSE (act0 == SSE2_QOS_ACTION_DROP))
+		{
+		  next0 = IP_PUNT_POLICER_NEXT_DROP;
+		  b0->error = node->errors[IP_PUNT_POLICER_ERROR_DROP];
+		}
+
+	      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+		{
+		  ip_punt_policer_trace_t *t =
+		    vlib_add_trace (vm, node, b0, sizeof (*t));
+		  t->next = next0;
+		  t->policer_index = policer_index;
+		}
+	    }
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
 					   n_left_to_next, bi0, next0);
 	}