VPP-1341: fix loopback interface graph arcs

Remove broken special case from l2_input.c:set_int_l2_mode(), which
turns out to confuse the graph dispatch engine.

The loopback TX function needs to push packets to either
ethernet-input or to l2-input, based on bridge / BVI
configuration. Rather than overloading a single graph arc - and making
vain attempts to reconfigure it - create both arcs and use the correct
one.

Rewrote the loopback tx function as an idosyncratic multi-arch
quad/single loop fn.

Change-Id: I15b56ce641d90a11e7b3c7d23859f40e168dd7b2
Signed-off-by: Dave Barach <dave@barachs.net>
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
index e39ae7b..a358080 100644
--- a/src/vnet/ethernet/interface.c
+++ b/src/vnet/ethernet/interface.c
@@ -380,80 +380,196 @@
   return (u32) ~ 0;
 }
 
-/* Echo packets back to ethernet/l2-input. */
+/**
+ * Echo packets back to ethernet/l2-input.
+ *
+ * This node is "special." We know, for example, that
+ * all of the vnet_buffer (bX)->sw_if_index[VLIB_TX] values
+ * [had better!] match.
+ *
+ * Please do not copy the code first and ask questions later.
+ *
+ * "These are not the droids we're looking.
+ *  You can go about your business.
+ *  Move along..."
+ */
 static uword
 simulated_ethernet_interface_tx (vlib_main_t * vm,
-				 vlib_node_runtime_t * node,
-				 vlib_frame_t * frame)
+				 vlib_node_runtime_t *
+				 node, vlib_frame_t * frame)
 {
-  u32 n_left_from, n_left_to_next, n_copy, *from, *to_next;
-  u32 next_index = VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT;
-  u32 i, next_node_index, bvi_flag, sw_if_index;
-  u32 n_pkts = 0, n_bytes = 0;
+  u32 n_left_from, n_left_to_next, *from, *to_next;
+  u32 next_index;
+  u32 n_bytes = 0;
   u32 thread_index = vm->thread_index;
   vnet_main_t *vnm = vnet_get_main ();
   vnet_interface_main_t *im = &vnm->interface_main;
-  vlib_node_main_t *nm = &vm->node_main;
-  vlib_node_t *loop_node;
-  vlib_buffer_t *b;
-
-  // check tx node index, it is ethernet-input on loopback create
-  // but can be changed to l2-input if loopback is configured as
-  // BVI of a BD (Bridge Domain).
-  loop_node = vec_elt (nm->nodes, node->node_index);
-  next_node_index = loop_node->next_nodes[next_index];
-  bvi_flag = (next_node_index == l2input_node.index) ? 1 : 0;
+  u32 new_tx_sw_if_index, sw_if_index_all;
+  vlib_buffer_t *b0, *b1, *b2, *b3;
+  u32 bi0, bi1, bi2, bi3;
+  u32 next_all;
+  l2_input_config_t *config;
 
   n_left_from = frame->n_vectors;
   from = vlib_frame_args (frame);
 
+  /*
+   * Work out where all of the packets are going.
+   */
+
+  bi0 = from[0];
+  b0 = vlib_get_buffer (vm, bi0);
+  sw_if_index_all = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+  /*
+   * Look at the L2 config for the interface to decide which
+   * graph arc to use. If the interface is bridged, send pkts
+   * to l2-input. Otherwise, to ethernet-input
+   */
+  config = l2input_intf_config (sw_if_index_all);
+  next_all =
+    config->bridge ? VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT :
+    VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT;
+
+  /*
+   * If the interface is a BVI, set the tx sw_if_index to the
+   * L2 path's special value.
+   * Otherwise, set it to ~0, to be reset later by the L3 path
+   */
+  if (config->bvi)
+    new_tx_sw_if_index = L2INPUT_BVI;
+  else
+    new_tx_sw_if_index = ~0;
+
+  /* Get the right next frame... */
+  next_index = next_all;
+
+  /*
+   * Use a quad-single loop, in case we have to impedance-match a
+   * full frame into a non-empty next frame or some such.
+   */
+
   while (n_left_from > 0)
     {
       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 
-      n_copy = clib_min (n_left_from, n_left_to_next);
-
-      clib_memcpy (to_next, from, n_copy * sizeof (from[0]));
-      n_left_to_next -= n_copy;
-      n_left_from -= n_copy;
-      i = 0;
-      b = vlib_get_buffer (vm, from[i]);
-      sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX];
-      while (1)
+      while (n_left_from >= 4 && n_left_to_next >= 4)
 	{
-	  // Set up RX and TX indices as if received from a real driver
-	  // unless loopback is used as a BVI. For BVI case, leave TX index
-	  // and update l2_len in packet as required for l2 forwarding path
-	  vnet_buffer (b)->sw_if_index[VLIB_RX] = sw_if_index;
-	  if (bvi_flag)
+	  /* Prefetch next iteration. */
+	  if (PREDICT_TRUE (n_left_from >= 8))
 	    {
-	      vnet_update_l2_len (b);
-	      vnet_buffer (b)->sw_if_index[VLIB_TX] = L2INPUT_BVI;
+	      vlib_buffer_t *p4, *p5, *p6, *p7;
+
+	      p4 = vlib_get_buffer (vm, from[4]);
+	      p5 = vlib_get_buffer (vm, from[5]);
+	      p6 = vlib_get_buffer (vm, from[6]);
+	      p7 = vlib_get_buffer (vm, from[7]);
+
+	      vlib_prefetch_buffer_header (p4, STORE);
+	      vlib_prefetch_buffer_header (p5, STORE);
+	      vlib_prefetch_buffer_header (p6, STORE);
+	      vlib_prefetch_buffer_header (p7, STORE);
 	    }
-	  else
-	    vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+	  to_next[0] = bi0 = from[0];
+	  to_next[1] = bi1 = from[1];
+	  to_next[2] = bi2 = from[2];
+	  to_next[3] = bi3 = from[3];
+	  from += 4;
+	  to_next += 4;
+	  n_left_from -= 4;
+	  n_left_to_next -= 4;
 
-	  i++;
-	  n_pkts++;
-	  n_bytes += vlib_buffer_length_in_chain (vm, b);
+	  b0 = vlib_get_buffer (vm, bi0);
+	  b1 = vlib_get_buffer (vm, bi1);
+	  b2 = vlib_get_buffer (vm, bi2);
+	  b3 = vlib_get_buffer (vm, bi3);
 
-	  if (i < n_copy)
-	    b = vlib_get_buffer (vm, from[i]);
-	  else
-	    break;
+	  /* This "should never happen," of course... */
+	  if (CLIB_DEBUG > 0)
+	    {
+	      u32 cache_not_ok;
+	      u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
+	      sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+	      sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+	      sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_TX];
+	      sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_TX];
+
+	      cache_not_ok = (sw_if_index0 ^ sw_if_index1)
+		^ (sw_if_index2 ^ sw_if_index3);
+	      cache_not_ok += sw_if_index0 ^ sw_if_index_all;
+	      ASSERT (cache_not_ok == 0);
+	    }
+
+	  vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index_all;
+	  vnet_buffer (b1)->sw_if_index[VLIB_RX] = sw_if_index_all;
+	  vnet_buffer (b2)->sw_if_index[VLIB_RX] = sw_if_index_all;
+	  vnet_buffer (b3)->sw_if_index[VLIB_RX] = sw_if_index_all;
+
+	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = new_tx_sw_if_index;
+	  vnet_buffer (b1)->sw_if_index[VLIB_TX] = new_tx_sw_if_index;
+	  vnet_buffer (b2)->sw_if_index[VLIB_TX] = new_tx_sw_if_index;
+	  vnet_buffer (b3)->sw_if_index[VLIB_TX] = new_tx_sw_if_index;
+
+	  /* Update l2 lengths if necessary */
+	  if (next_all == VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT)
+	    {
+	      vnet_update_l2_len (b0);
+	      vnet_update_l2_len (b1);
+	      vnet_update_l2_len (b2);
+	      vnet_update_l2_len (b3);
+	    }
+
+	  /* Byte accounting */
+	  n_bytes += vlib_buffer_length_in_chain (vm, b0);
+	  n_bytes += vlib_buffer_length_in_chain (vm, b1);
+	  n_bytes += vlib_buffer_length_in_chain (vm, b2);
+	  n_bytes += vlib_buffer_length_in_chain (vm, b3);
+
+	  /* This *should* be a noop every time... */
+	  vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
+					   to_next, n_left_to_next,
+					   bi0, bi1, bi2, bi3,
+					   next_all, next_all,
+					   next_all, next_all);
 	}
-      from += n_copy;
 
+      while (n_left_from > 0 && n_left_to_next > 0)
+	{
+	  bi0 = from[0];
+	  to_next[0] = bi0;
+	  from += 1;
+	  to_next += 1;
+	  n_left_from -= 1;
+	  n_left_to_next -= 1;
+
+	  b0 = vlib_get_buffer (vm, bi0);
+	  if (CLIB_DEBUG > 0)
+	    {
+	      u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+	      ASSERT (sw_if_index0 == sw_if_index_all);
+	    }
+
+	  vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index_all;
+	  vnet_buffer (b0)->sw_if_index[VLIB_TX] = new_tx_sw_if_index;
+	  if (next_all == VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT)
+	    vnet_update_l2_len (b0);
+
+	  n_bytes += vlib_buffer_length_in_chain (vm, b0);
+
+	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+					   to_next, n_left_to_next,
+					   bi0, next_all);
+	}
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-
-      /* increment TX interface stat */
-      vlib_increment_combined_counter (im->combined_sw_if_counters +
-				       VNET_INTERFACE_COUNTER_TX,
-				       thread_index, sw_if_index, n_pkts,
-				       n_bytes);
     }
 
-  return n_left_from;
+  /* increment TX interface stat */
+  vlib_increment_combined_counter (im->combined_sw_if_counters +
+				   VNET_INTERFACE_COUNTER_TX,
+				   thread_index, sw_if_index_all,
+				   frame->n_vectors, n_bytes);
+
+  return frame->n_vectors;
 }
 
 static u8 *
@@ -482,6 +598,8 @@
 };
 /* *INDENT-ON* */
 
+VLIB_DEVICE_TX_FUNCTION_MULTIARCH (ethernet_simulated_device_class,
+				   simulated_ethernet_interface_tx);
 
 /*
  * Maintain a bitmap of allocated loopback instance numbers.
@@ -617,6 +735,11 @@
      "ethernet-input", VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
   ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
 
+  slot = vlib_node_add_named_next_with_slot
+    (vm, hw_if->tx_node_index,
+     "l2-input", VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT);
+  ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT);
+
   {
     vnet_sw_interface_t *si = vnet_get_hw_sw_interface (vnm, hw_if_index);
     *sw_if_indexp = si->sw_if_index;
diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h
index 08af7f2..206bfbe 100644
--- a/src/vnet/interface_funcs.h
+++ b/src/vnet/interface_funcs.h
@@ -417,6 +417,7 @@
 } vnet_interface_tx_next_t;
 
 #define VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT VNET_INTERFACE_TX_N_NEXT
+#define VNET_SIMULATED_ETHERNET_TX_NEXT_L2_INPUT (VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT + 1)
 
 typedef enum
 {
diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c
index 6e3851e..feab40c 100644
--- a/src/vnet/l2/l2_input.c
+++ b/src/vnet/l2/l2_input.c
@@ -564,7 +564,6 @@
   l2_input_config_t *config;
   l2_bridge_domain_t *bd_config;
   i32 l2_if_adjust = 0;
-  u32 slot;
   vnet_device_class_t *dev_class;
 
   hi = vnet_get_sup_hw_interface (vnet_main, sw_if_index);
@@ -589,13 +588,6 @@
 	  /* delete the l2fib entry for the bvi interface */
 	  l2fib_del_entry (hi->hw_address, config->bd_index, sw_if_index);
 
-	  /* Make loop output node send packet back to ethernet-input node */
-	  slot =
-	    vlib_node_add_named_next_with_slot (vm, hi->tx_node_index,
-						"ethernet-input",
-						VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
-	  ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
-
 	  /* since this is a no longer BVI interface do not to flood to it */
 	  si = vnet_get_sw_interface (vnm, sw_if_index);
 	  si->flood_class = VNET_FLOOD_CLASS_NO_FLOOD;
@@ -697,13 +689,6 @@
 	      /* Disable learning by default. no use since l2fib entry is static. */
 	      config->feature_bitmap &= ~L2INPUT_FEAT_LEARN;
 
-	      /* Make loop output node send packet to l2-input node */
-	      slot =
-		vlib_node_add_named_next_with_slot (vm, hi->tx_node_index,
-						    "l2-input",
-						    VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
-	      ASSERT (slot == VNET_SIMULATED_ETHERNET_TX_NEXT_ETHERNET_INPUT);
-
 	      /* since this is a BVI interface we want to flood to it */
 	      si = vnet_get_sw_interface (vnm, sw_if_index);
 	      si->flood_class = VNET_FLOOD_CLASS_BVI;
diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c
index 4a5628e..00f093e 100644
--- a/src/vnet/unix/tapcli.c
+++ b/src/vnet/unix/tapcli.c
@@ -275,7 +275,7 @@
   vnet_main_t *vnm;
   vnet_sw_interface_t *si;
   u8 admin_down;
-  u32 next = node->cached_next_index;
+  u32 next = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
   u32 n_left_to_next, next_index;
   u32 *to_next;