L2 Flood: use the buffer clone API rather than buffer recycle

Change-Id: I40e4efd8061369efc535f0d49b2f63668b6d1d15
Signed-off-by: Neale Ranns <neale.ranns@cisco.com>
diff --git a/src/vnet/l2/l2_flood.c b/src/vnet/l2/l2_flood.c
index ed9e5ac..97f1387 100644
--- a/src/vnet/l2/l2_flood.c
+++ b/src/vnet/l2/l2_flood.c
@@ -58,6 +58,10 @@
   /* convenience variables */
   vlib_main_t *vlib_main;
   vnet_main_t *vnet_main;
+
+  /* per-cpu vector of cloned packets */
+  u32 **clones;
+  l2_flood_member_t ***members;
 } l2flood_main_t;
 
 typedef struct
@@ -131,158 +135,6 @@
  * could be turned into an ICMP reply. If BVI processing is not performed
  * last, the modified packet would be replicated to the remaining members.
  */
-
-static_always_inline void
-l2flood_process (vlib_main_t * vm,
-		 vlib_node_runtime_t * node,
-		 l2flood_main_t * msm,
-		 u64 * counter_base,
-		 vlib_buffer_t * b0,
-		 u32 * sw_if_index0,
-		 l2fib_entry_key_t * key0,
-		 u32 * bucket0, l2fib_entry_result_t * result0, u32 * next0)
-{
-  u16 bd_index0;
-  l2_bridge_domain_t *bd_config;
-  l2_flood_member_t *members;
-  i32 current_member;		/* signed */
-  replication_context_t *ctx;
-  u8 in_shg = vnet_buffer (b0)->l2.shg;
-
-  if (!replication_is_recycled (b0))
-    {
-
-      /* Do flood "prep node" processing */
-
-      /* Get config for the bridge domain interface */
-      bd_index0 = vnet_buffer (b0)->l2.bd_index;
-      bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index0);
-      members = bd_config->members;
-
-      /* Find first member that passes the reflection and SHG checks */
-      current_member = bd_config->flood_count - 1;
-      while ((current_member >= 0) &&
-	     ((members[current_member].sw_if_index == *sw_if_index0) ||
-	      (in_shg && members[current_member].shg == in_shg)))
-	{
-	  current_member--;
-	}
-
-      if (current_member < 0)
-	{
-	  /* No members to flood to */
-	  *next0 = L2FLOOD_NEXT_DROP;
-	  b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS];
-	  return;
-	}
-
-      if ((current_member > 0) &&
-	  ((current_member > 1) ||
-	   ((members[0].sw_if_index != *sw_if_index0) &&
-	    (!in_shg || members[0].shg != in_shg))))
-	{
-	  /* If more than one member then initiate replication */
-	  ctx =
-	    replication_prep (vm, b0, l2flood_node.index, 1 /* l2_packet */ );
-	  ctx->feature_replicas = (uword) members;
-	  ctx->feature_counter = current_member;
-	}
-
-    }
-  else
-    {
-      vnet_buffer_opaque_t *vnet_buff_op;
-
-      /* Do flood "recycle node" processing */
-
-      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL))
-	{
-	  (void) replication_recycle (vm, b0, 1 /* is_last */ );
-	  *next0 = L2FLOOD_NEXT_DROP;
-	  b0->error = node->errors[L2FLOOD_ERROR_REPL_FAIL];
-	  return;
-	}
-
-      ctx = replication_get_ctx (b0);
-      replication_clear_recycled (b0);
-
-      members = (l2_flood_member_t *) (intptr_t) ctx->feature_replicas;
-      current_member = (i32) ctx->feature_counter - 1;
-
-      /* Need to update input index from saved packet context */
-      vnet_buff_op = (vnet_buffer_opaque_t *) ctx->vnet_buffer;
-      *sw_if_index0 = vnet_buff_op->sw_if_index[VLIB_RX];
-
-      /* Find next member that passes the reflection and SHG check */
-      while ((current_member >= 0) &&
-	     ((members[current_member].sw_if_index == *sw_if_index0) ||
-	      (in_shg && members[current_member].shg == in_shg)))
-	{
-	  current_member--;
-	}
-
-      if (current_member < 0)
-	{
-	  /*
-	   * No more members to flood to.
-	   * Terminate replication and drop packet.
-	   */
-
-	  replication_recycle (vm, b0, 1 /* is_last */ );
-
-	  *next0 = L2FLOOD_NEXT_DROP;
-	  /* Ideally we woudn't bump a counter here, just silently complete */
-	  b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS];
-	  return;
-	}
-
-      /* Restore packet and context and continue replication */
-      ctx->feature_counter = current_member;
-      replication_recycle (vm, b0, ((current_member == 0) ||	/*is_last */
-				    ((current_member == 1) &&
-				     ((members[0].sw_if_index ==
-				       *sw_if_index0) || (in_shg
-							  && members[0].shg ==
-							  in_shg)))));
-    }
-
-  /* Forward packet to the current member */
-  if (PREDICT_FALSE (members[current_member].flags & L2_FLOOD_MEMBER_BVI))
-    {
-      /* Do BVI processing */
-      u32 rc;
-      rc = l2_to_bvi (vm,
-		      msm->vnet_main,
-		      b0,
-		      members[current_member].sw_if_index,
-		      &msm->l3_next, next0);
-
-      if (PREDICT_FALSE (rc))
-	{
-	  if (rc == TO_BVI_ERR_BAD_MAC)
-	    {
-	      b0->error = node->errors[L2FLOOD_ERROR_BVI_BAD_MAC];
-	      *next0 = L2FLOOD_NEXT_DROP;
-	    }
-	  else if (rc == TO_BVI_ERR_ETHERTYPE)
-	    {
-	      b0->error = node->errors[L2FLOOD_ERROR_BVI_ETHERTYPE];
-	      *next0 = L2FLOOD_NEXT_DROP;
-	    }
-	}
-    }
-  else
-    {
-      /* Do normal L2 forwarding */
-      vnet_buffer (b0)->sw_if_index[VLIB_TX] =
-	members[current_member].sw_if_index;
-      *next0 = L2FLOOD_NEXT_L2_OUTPUT;
-
-    }
-
-}
-
-
 static uword
 l2flood_node_fn (vlib_main_t * vm,
 		 vlib_node_runtime_t * node, vlib_frame_t * frame)
@@ -290,161 +142,214 @@
   u32 n_left_from, *from, *to_next;
   l2flood_next_t next_index;
   l2flood_main_t *msm = &l2flood_main;
-  vlib_node_t *n = vlib_get_node (vm, l2flood_node.index);
-  u32 node_counter_base_index = n->error_heap_index;
-  vlib_error_main_t *em = &vm->error_main;
+  u32 thread_index = vm->thread_index;
 
   from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;	/* number of packets to process */
+  n_left_from = frame->n_vectors;
   next_index = node->cached_next_index;
 
   while (n_left_from > 0)
     {
       u32 n_left_to_next;
 
-      /* get space to enqueue frame to graph node "next_index" */
       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 
-      while (n_left_from >= 6 && n_left_to_next >= 2)
-	{
-	  u32 bi0, bi1;
-	  vlib_buffer_t *b0, *b1;
-	  u32 next0, next1;
-	  u32 sw_if_index0, sw_if_index1;
-	  l2fib_entry_key_t key0, key1;
-	  l2fib_entry_result_t result0, result1;
-	  u32 bucket0, bucket1;
-
-	  /* Prefetch next iteration. */
-	  {
-	    vlib_buffer_t *p2, *p3, *p4, *p5;
-
-	    p2 = vlib_get_buffer (vm, from[2]);
-	    p3 = vlib_get_buffer (vm, from[3]);
-	    p4 = vlib_get_buffer (vm, from[4]);
-	    p5 = vlib_get_buffer (vm, from[5]);
-
-	    /* Prefetch the buffer header for the N+2 loop iteration */
-	    vlib_prefetch_buffer_header (p4, LOAD);
-	    vlib_prefetch_buffer_header (p5, LOAD);
-
-	    /* Prefetch the replication context for the N+1 loop iteration */
-	    /* This depends on the buffer header above */
-	    replication_prefetch_ctx (p2);
-	    replication_prefetch_ctx (p3);
-
-	    /* Prefetch the packet for the N+1 loop iteration */
-	    CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
-	    CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
-	  }
-
-	  /* speculatively enqueue b0 and b1 to the current next frame */
-	  /* bi is "buffer index", b is pointer to the buffer */
-	  to_next[0] = bi0 = from[0];
-	  to_next[1] = bi1 = from[1];
-	  from += 2;
-	  to_next += 2;
-	  n_left_from -= 2;
-	  n_left_to_next -= 2;
-
-	  b0 = vlib_get_buffer (vm, bi0);
-	  b1 = vlib_get_buffer (vm, bi1);
-
-	  /* RX interface handles */
-	  sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-	  sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
-
-	  /* process 2 pkts */
-	  em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 2;
-
-	  l2flood_process (vm, node, msm,
-			   &em->counters[node_counter_base_index], b0,
-			   &sw_if_index0, &key0, &bucket0, &result0, &next0);
-
-	  l2flood_process (vm, node, msm,
-			   &em->counters[node_counter_base_index], b1,
-			   &sw_if_index1, &key1, &bucket1, &result1, &next1);
-
-	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
-	    {
-	      if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  l2flood_trace_t *t =
-		    vlib_add_trace (vm, node, b0, sizeof (*t));
-		  ethernet_header_t *h0 = vlib_buffer_get_current (b0);
-		  t->sw_if_index = sw_if_index0;
-		  t->bd_index = vnet_buffer (b0)->l2.bd_index;
-		  clib_memcpy (t->src, h0->src_address, 6);
-		  clib_memcpy (t->dst, h0->dst_address, 6);
-		}
-	      if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
-		{
-		  l2flood_trace_t *t =
-		    vlib_add_trace (vm, node, b1, sizeof (*t));
-		  ethernet_header_t *h1 = vlib_buffer_get_current (b1);
-		  t->sw_if_index = sw_if_index1;
-		  t->bd_index = vnet_buffer (b1)->l2.bd_index;
-		  clib_memcpy (t->src, h1->src_address, 6);
-		  clib_memcpy (t->dst, h1->dst_address, 6);
-		}
-	    }
-
-	  /* verify speculative enqueues, maybe switch current next frame */
-	  /* if next0==next1==next_index then nothing special needs to be done */
-	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
-					   to_next, n_left_to_next,
-					   bi0, bi1, next0, next1);
-	}
-
       while (n_left_from > 0 && n_left_to_next > 0)
 	{
-	  u32 bi0;
-	  vlib_buffer_t *b0;
-	  u32 next0;
-	  u32 sw_if_index0;
-	  l2fib_entry_key_t key0;
-	  l2fib_entry_result_t result0;
-	  u32 bucket0;
+	  u32 next0, sw_if_index0, bi0, ci0;
+	  u16 n_clones, n_cloned, clone0;
+	  l2_bridge_domain_t *bd_config;
+	  l2_flood_member_t *member;
+	  vlib_buffer_t *b0, *c0;
+	  u8 in_shg;
+	  i32 mi;
 
 	  /* speculatively enqueue b0 to the current next frame */
 	  bi0 = from[0];
-	  to_next[0] = bi0;
 	  from += 1;
-	  to_next += 1;
 	  n_left_from -= 1;
-	  n_left_to_next -= 1;
+	  next0 = L2FLOOD_NEXT_DROP;
 
 	  b0 = vlib_get_buffer (vm, bi0);
 
+	  /* Get config for the bridge domain interface */
+	  bd_config = vec_elt_at_index (l2input_main.bd_configs,
+					vnet_buffer (b0)->l2.bd_index);
+	  in_shg = vnet_buffer (b0)->l2.shg;
 	  sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
 
-	  /* process 1 pkt */
-	  em->counters[node_counter_base_index + L2FLOOD_ERROR_L2FLOOD] += 1;
+	  vec_validate (msm->members[thread_index],
+			vec_len (bd_config->members));
 
-	  l2flood_process (vm, node, msm,
-			   &em->counters[node_counter_base_index], b0,
-			   &sw_if_index0, &key0, &bucket0, &result0, &next0);
+	  vec_reset_length (msm->members[thread_index]);
+
+	  /* Find first members that passes the reflection and SHG checks */
+	  for (mi = bd_config->flood_count - 1; mi >= 0; mi--)
+	    {
+	      member = &bd_config->members[mi];
+	      if ((member->sw_if_index != sw_if_index0) &&
+		  (!in_shg || (member->shg != in_shg)))
+		{
+		  vec_add1 (msm->members[thread_index], member);
+		}
+	    }
+
+	  n_clones = vec_len (msm->members[thread_index]);
+
+	  if (0 == n_clones)
+	    {
+	      /* No members to flood to */
+	      to_next[0] = bi0;
+	      to_next += 1;
+	      n_left_to_next -= 1;
+
+	      b0->error = node->errors[L2FLOOD_ERROR_NO_MEMBERS];
+	      vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+					       to_next, n_left_to_next,
+					       bi0, L2FLOOD_NEXT_DROP);
+	      continue;
+	    }
+
+	  vec_validate (msm->clones[thread_index], n_clones);
+	  vec_reset_length (msm->clones[thread_index]);
+
+	  /*
+	   * the header offset needs to be large enoguh to incorporate
+	   * all the L3 headers that could be touched when doing BVI
+	   * processing. So take the current l2 length plus 2 * IPv6
+	   * headers (for tunnel encap)
+	   */
+	  n_cloned = vlib_buffer_clone (vm, bi0,
+					msm->clones[thread_index],
+					n_clones,
+					(vnet_buffer (b0)->l2.l2_len +
+					 sizeof (udp_header_t) +
+					 2 * sizeof (ip6_header_t)));
+
+	  if (PREDICT_FALSE (n_cloned != n_clones))
+	    {
+	      b0->error = node->errors[L2FLOOD_ERROR_REPL_FAIL];
+	    }
+
+	  /*
+	   * for all but the last clone, these are not BVI bound
+	   */
+	  for (clone0 = 0; clone0 < n_cloned - 1; clone0++)
+	    {
+	      member = msm->members[thread_index][clone0];
+	      ci0 = msm->clones[thread_index][clone0];
+	      c0 = vlib_get_buffer (vm, ci0);
+
+	      to_next[0] = ci0;
+	      to_next += 1;
+	      n_left_to_next -= 1;
+
+	      if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+				 (b0->flags & VLIB_BUFFER_IS_TRACED)))
+		{
+		  ethernet_header_t *h0;
+		  l2flood_trace_t *t;
+
+		  if (c0 != b0)
+		    vlib_buffer_copy_trace_flag (vm, b0, ci0);
+
+		  t = vlib_add_trace (vm, node, c0, sizeof (*t));
+		  h0 = vlib_buffer_get_current (c0);
+		  t->sw_if_index = sw_if_index0;
+		  t->bd_index = vnet_buffer (c0)->l2.bd_index;
+		  clib_memcpy (t->src, h0->src_address, 6);
+		  clib_memcpy (t->dst, h0->dst_address, 6);
+		}
+
+	      /* Do normal L2 forwarding */
+	      vnet_buffer (c0)->sw_if_index[VLIB_TX] = member->sw_if_index;
+	      next0 = L2FLOOD_NEXT_L2_OUTPUT;
+
+	      vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+					       to_next, n_left_to_next,
+					       ci0, next0);
+	      if (PREDICT_FALSE (0 == n_left_to_next))
+		{
+		  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+		  vlib_get_next_frame (vm, node, next_index,
+				       to_next, n_left_to_next);
+		}
+	    }
+
+	  /*
+	   * the last clone that might go to a BVI
+	   */
+	  member = msm->members[thread_index][clone0];
+	  ci0 = msm->clones[thread_index][clone0];
+	  c0 = vlib_get_buffer (vm, ci0);
+
+	  to_next[0] = ci0;
+	  to_next += 1;
+	  n_left_to_next -= 1;
 
 	  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
 			     (b0->flags & VLIB_BUFFER_IS_TRACED)))
 	    {
-	      l2flood_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
-	      ethernet_header_t *h0 = vlib_buffer_get_current (b0);
+	      ethernet_header_t *h0;
+	      l2flood_trace_t *t;
+
+	      if (c0 != b0)
+		vlib_buffer_copy_trace_flag (vm, b0, ci0);
+
+	      t = vlib_add_trace (vm, node, c0, sizeof (*t));
+	      h0 = vlib_buffer_get_current (c0);
 	      t->sw_if_index = sw_if_index0;
-	      t->bd_index = vnet_buffer (b0)->l2.bd_index;
+	      t->bd_index = vnet_buffer (c0)->l2.bd_index;
 	      clib_memcpy (t->src, h0->src_address, 6);
 	      clib_memcpy (t->dst, h0->dst_address, 6);
 	    }
 
-	  /* verify speculative enqueue, maybe switch current next frame */
+
+	  /* Forward packet to the current member */
+	  if (PREDICT_FALSE (member->flags & L2_FLOOD_MEMBER_BVI))
+	    {
+	      /* Do BVI processing */
+	      u32 rc;
+	      rc = l2_to_bvi (vm,
+			      msm->vnet_main,
+			      c0, member->sw_if_index, &msm->l3_next, &next0);
+
+	      if (PREDICT_FALSE (rc))
+		{
+		  if (rc == TO_BVI_ERR_BAD_MAC)
+		    {
+		      c0->error = node->errors[L2FLOOD_ERROR_BVI_BAD_MAC];
+		    }
+		  else if (rc == TO_BVI_ERR_ETHERTYPE)
+		    {
+		      c0->error = node->errors[L2FLOOD_ERROR_BVI_ETHERTYPE];
+		    }
+		}
+	    }
+	  else
+	    {
+	      /* Do normal L2 forwarding */
+	      vnet_buffer (c0)->sw_if_index[VLIB_TX] = member->sw_if_index;
+	      next0 = L2FLOOD_NEXT_L2_OUTPUT;
+	    }
+
 	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
 					   to_next, n_left_to_next,
-					   bi0, next0);
+					   ci0, next0);
+	  if (PREDICT_FALSE (0 == n_left_to_next))
+	    {
+	      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+	      vlib_get_next_frame (vm, node, next_index,
+				   to_next, n_left_to_next);
+	    }
 	}
 
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
 
+  vlib_node_increment_counter (vm, node->node_index,
+			       L2FLOOD_ERROR_L2FLOOD, frame->n_vectors);
+
   return frame->n_vectors;
 }
 
@@ -478,6 +383,9 @@
   mp->vlib_main = vm;
   mp->vnet_main = vnet_get_main ();
 
+  vec_validate (mp->clones, vlib_num_workers ());
+  vec_validate (mp->members, vlib_num_workers ());
+
   /* Initialize the feature next-node indexes */
   feat_bitmap_init_next_nodes (vm,
 			       l2flood_node.index,
@@ -485,7 +393,7 @@
 			       l2input_get_feat_names (),
 			       mp->feat_next_node_index);
 
-  return 0;
+  return NULL;
 }
 
 VLIB_INIT_FUNCTION (l2flood_init);