vxlan:ip4 decap:remove access to tunnel object

store local/remote addresses + vrf + vni in hash key
store complete decap info in hash value (sw_if_index + next_index +
error)

this removes the need to access the tunnel object when matching both
unicast and mcast.

however for mcast handling it requires 3 hash lookups:
* one failed unicast lookup (by src+dst addrs)
* lookup by mcast(dst) addr .
* unicast lookup (tunnel local ip as dst + pkt's src addr)

where previously it needed 2:
* lookup by src to find unicast tunnel + compare dst to local addr
(failing for mcast)
* lookup by mcast to find the mcast tunnel

Change-Id: I7a3485d130a54194b8f7e2df0431258db36eceeb
Signed-off-by: Eyal Bari <ebari@cisco.com>
diff --git a/src/vnet/vxlan/decap.c b/src/vnet/vxlan/decap.c
index 3ae70c9..387539d 100644
--- a/src/vnet/vxlan/decap.c
+++ b/src/vnet/vxlan/decap.c
@@ -62,73 +62,90 @@
 
 typedef vxlan4_tunnel_key_t last_tunnel_cache4;
 
-always_inline vxlan_tunnel_t *
+static const vxlan_decap_info_t decap_not_found = {
+  .sw_if_index = ~0,
+  .next_index = VXLAN_INPUT_NEXT_DROP,
+  .error = VXLAN_ERROR_NO_SUCH_TUNNEL
+};
+
+static const vxlan_decap_info_t decap_bad_flags = {
+  .sw_if_index = ~0,
+  .next_index = VXLAN_INPUT_NEXT_DROP,
+  .error = VXLAN_ERROR_BAD_FLAGS
+};
+
+always_inline vxlan_decap_info_t
 vxlan4_find_tunnel (vxlan_main_t * vxm, last_tunnel_cache4 * cache,
 		    u32 fib_index, ip4_header_t * ip4_0,
-		    vxlan_header_t * vxlan0, vxlan_tunnel_t ** stats_t0)
+		    vxlan_header_t * vxlan0, u32 * stats_sw_if_index)
 {
-  /* Make sure VXLAN tunnel exist according to packet SIP and VNI */
-  vxlan4_tunnel_key_t key4;
-  key4.key[1] = ((u64) fib_index << 32) | vxlan0->vni_reserved;
+  if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I))
+    return decap_bad_flags;
 
-  if (PREDICT_FALSE (key4.key[1] != cache->key[1] ||
-		     ip4_0->src_address.as_u32 != (u32) cache->key[0]))
+  /* Make sure VXLAN tunnel exist according to packet S/D IP, VRF, and VNI */
+  u32 dst = ip4_0->dst_address.as_u32;
+  u32 src = ip4_0->src_address.as_u32;
+  vxlan4_tunnel_key_t key4 = {
+    .key[0] = ((u64) dst << 32) | src,
+    .key[1] = ((u64) fib_index << 32) | vxlan0->vni_reserved,
+  };
+
+  if (PREDICT_TRUE
+      (key4.key[0] == cache->key[0] || key4.key[1] == cache->key[1]))
     {
-      key4.key[0] = ip4_0->src_address.as_u32;
-      int rv =
-	clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
-      if (PREDICT_FALSE (rv != 0))
-	{
-	  *stats_t0 = 0;
-	  return 0;
-	}
+      /* cache hit */
+      vxlan_decap_info_t di = {.as_u64 = cache->value };
+      *stats_sw_if_index = di.sw_if_index;
+      return di;
+    }
 
+  int rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
+  if (PREDICT_TRUE (rv == 0))
+    {
       *cache = key4;
-    }
-  vxlan_tunnel_t *t0 = pool_elt_at_index (vxm->tunnels, cache->value);
-
-  /* Validate VXLAN tunnel SIP against packet DIP */
-  if (PREDICT_TRUE (ip4_0->dst_address.as_u32 == t0->src.ip4.as_u32))
-    *stats_t0 = t0;
-  else
-    {
-      /* try multicast */
-      if (PREDICT_TRUE (!ip4_address_is_multicast (&ip4_0->dst_address)))
-	{
-	  *stats_t0 = 0;
-	  return 0;
-	}
-
-      key4.key[0] = ip4_0->dst_address.as_u32;
-      /* Make sure mcast VXLAN tunnel exist by packet DIP and VNI */
-      int rv =
-	clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
-      if (PREDICT_FALSE (rv != 0))
-	{
-	  *stats_t0 = 0;
-	  return 0;
-	}
-
-      *stats_t0 = pool_elt_at_index (vxm->tunnels, key4.value);
+      vxlan_decap_info_t di = {.as_u64 = key4.value };
+      *stats_sw_if_index = di.sw_if_index;
+      return di;
     }
 
-  return t0;
+  /* try multicast */
+  if (PREDICT_TRUE (!ip4_address_is_multicast (&ip4_0->dst_address)))
+    return decap_not_found;
+
+  /* search for mcast decap info by mcast address */
+  key4.key[0] = dst;
+  rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
+  if (rv != 0)
+    return decap_not_found;
+
+  /* search for unicast tunnel using the mcast tunnel local(src) ip */
+  vxlan_decap_info_t mdi = {.as_u64 = key4.value };
+  key4.key[0] = ((u64) mdi.local_ip.as_u32 << 32) | src;
+  rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
+  if (PREDICT_FALSE (rv != 0))
+    return decap_not_found;
+
+  /* mcast traffic does not update the cache */
+  *stats_sw_if_index = mdi.sw_if_index;
+  vxlan_decap_info_t di = {.as_u64 = key4.value };
+  return di;
 }
 
 typedef vxlan6_tunnel_key_t last_tunnel_cache6;
 
-always_inline vxlan_tunnel_t *
+always_inline vxlan_decap_info_t
 vxlan6_find_tunnel (vxlan_main_t * vxm, last_tunnel_cache6 * cache,
 		    u32 fib_index, ip6_header_t * ip6_0,
-		    vxlan_header_t * vxlan0, vxlan_tunnel_t ** stats_t0)
+		    vxlan_header_t * vxlan0, u32 * stats_sw_if_index)
 {
+  if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I))
+    return decap_bad_flags;
+
   /* Make sure VXLAN tunnel exist according to packet SIP and VNI */
   vxlan6_tunnel_key_t key6 = {
-    .key = {
-	    [0] = ip6_0->src_address.as_u64[0],
-	    [1] = ip6_0->src_address.as_u64[1],
-	    [2] = (((u64) fib_index) << 32) | vxlan0->vni_reserved,
-	    }
+    .key[0] = ip6_0->src_address.as_u64[0],
+    .key[1] = ip6_0->src_address.as_u64[1],
+    .key[2] = (((u64) fib_index) << 32) | vxlan0->vni_reserved,
   };
 
   if (PREDICT_FALSE
@@ -137,10 +154,7 @@
       int rv =
 	clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6);
       if (PREDICT_FALSE (rv != 0))
-	{
-	  *stats_t0 = 0;
-	  return 0;
-	}
+	return decap_not_found;
 
       *cache = key6;
     }
@@ -148,15 +162,12 @@
 
   /* Validate VXLAN tunnel SIP against packet DIP */
   if (PREDICT_TRUE (ip6_address_is_equal (&ip6_0->dst_address, &t0->src.ip6)))
-    *stats_t0 = t0;
+    *stats_sw_if_index = t0->sw_if_index;
   else
     {
       /* try multicast */
       if (PREDICT_TRUE (!ip6_address_is_multicast (&ip6_0->dst_address)))
-	{
-	  *stats_t0 = 0;
-	  return 0;
-	}
+	return decap_not_found;
 
       /* Make sure mcast VXLAN tunnel exist by packet DIP and VNI */
       key6.key[0] = ip6_0->dst_address.as_u64[0];
@@ -164,15 +175,17 @@
       int rv =
 	clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6);
       if (PREDICT_FALSE (rv != 0))
-	{
-	  *stats_t0 = 0;
-	  return 0;
-	}
+	return decap_not_found;
 
-      *stats_t0 = pool_elt_at_index (vxm->tunnels, key6.value);
+      vxlan_tunnel_t *mcast_t0 = pool_elt_at_index (vxm->tunnels, key6.value);
+      *stats_sw_if_index = mcast_t0->sw_if_index;
     }
 
-  return t0;
+  vxlan_decap_info_t di = {
+    .sw_if_index = t0->sw_if_index,
+    .next_index = t0->decap_next_index,
+  };
+  return di;
 }
 
 always_inline uword
@@ -185,8 +198,6 @@
   vnet_interface_main_t *im = &vnm->interface_main;
   vlib_combined_counter_main_t *rx_counter =
     im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX;
-  vlib_combined_counter_main_t *drop_counter =
-    im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_DROP;
   last_tunnel_cache4 last4;
   last_tunnel_cache6 last6;
   u32 pkts_dropped = 0;
@@ -203,6 +214,7 @@
   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
   vlib_get_buffers (vm, from, bufs, n_left_from);
 
+  u32 stats_if0 = ~0, stats_if1 = ~0;
   u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
   while (n_left_from >= 4)
     {
@@ -216,8 +228,6 @@
       vxlan_header_t *vxlan0 = cur0;
       vxlan_header_t *vxlan1 = cur1;
 
-      u8 error0 = vxlan0->flags != VXLAN_FLAGS_I ? VXLAN_ERROR_BAD_FLAGS : 0;
-      u8 error1 = vxlan1->flags != VXLAN_FLAGS_I ? VXLAN_ERROR_BAD_FLAGS : 0;
 
       ip4_header_t *ip4_0, *ip4_1;
       ip6_header_t *ip6_0, *ip6_1;
@@ -239,25 +249,12 @@
       u32 fi0 = buf_fib_index (b[0], is_ip4);
       u32 fi1 = buf_fib_index (b[1], is_ip4);
 
-      vxlan_tunnel_t *t0, *stats_t0;
-      vxlan_tunnel_t *t1, *stats_t1;
-      if (is_ip4)
-	{
-	  t0 =
-	    vxlan4_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan0, &stats_t0);
-	  t1 =
-	    vxlan4_find_tunnel (vxm, &last4, fi1, ip4_1, vxlan1, &stats_t1);
-	}
-      else
-	{
-	  t0 =
-	    vxlan6_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan0, &stats_t0);
-	  t1 =
-	    vxlan6_find_tunnel (vxm, &last6, fi1, ip6_1, vxlan1, &stats_t1);
-	}
-
-      error0 = t0 == 0 ? VXLAN_ERROR_NO_SUCH_TUNNEL : error0;
-      error1 = t1 == 0 ? VXLAN_ERROR_NO_SUCH_TUNNEL : error1;
+      vxlan_decap_info_t di0 = is_ip4 ?
+	vxlan4_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan0, &stats_if0) :
+	vxlan6_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan0, &stats_if0);
+      vxlan_decap_info_t di1 = is_ip4 ?
+	vxlan4_find_tunnel (vxm, &last4, fi1, ip4_1, vxlan1, &stats_if1) :
+	vxlan6_find_tunnel (vxm, &last6, fi1, ip6_1, vxlan1, &stats_if1);
 
       /* Prefetch next iteration. */
       CLIB_PREFETCH (b[2]->data, CLIB_CACHE_LINE_BYTES, LOAD);
@@ -266,59 +263,50 @@
       u32 len0 = vlib_buffer_length_in_chain (vm, b[0]);
       u32 len1 = vlib_buffer_length_in_chain (vm, b[1]);
 
-      /* Validate VXLAN tunnel encap-fib index against packet */
-      if (PREDICT_FALSE (error0 != 0))
-	{
-	  next[0] = VXLAN_INPUT_NEXT_DROP;
+      next[0] = di0.next_index;
+      next[1] = di1.next_index;
 
-	  if (error0 == VXLAN_ERROR_BAD_FLAGS)
-	    {
-	      vlib_increment_combined_counter
-		(drop_counter, thread_index, stats_t0->sw_if_index, 1, len0);
-	    }
-	  b[0]->error = node->errors[error0];
-	  pkts_dropped++;
+      u8 any_error = di0.error | di1.error;
+      if (PREDICT_TRUE (any_error == 0))
+	{
+	  /* Required to make the l2 tag push / pop code work on l2 subifs */
+	  vnet_update_l2_len (b[0]);
+	  vnet_update_l2_len (b[1]);
+	  /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
+	  vnet_buffer (b[0])->sw_if_index[VLIB_RX] = di0.sw_if_index;
+	  vnet_buffer (b[1])->sw_if_index[VLIB_RX] = di1.sw_if_index;
+	  vlib_increment_combined_counter (rx_counter, thread_index,
+					   stats_if0, 1, len0);
+	  vlib_increment_combined_counter (rx_counter, thread_index,
+					   stats_if1, 1, len1);
 	}
       else
 	{
-	  next[0] = t0->decap_next_index;
-
-	  /* Required to make the l2 tag push / pop code work on l2 subifs */
-	  if (PREDICT_TRUE (next[0] == VXLAN_INPUT_NEXT_L2_INPUT))
-	    vnet_update_l2_len (b[0]);
-
-	  /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
-	  vnet_buffer (b[0])->sw_if_index[VLIB_RX] = t0->sw_if_index;
-	  vlib_increment_combined_counter
-	    (rx_counter, thread_index, stats_t0->sw_if_index, 1, len0);
-	}
-
-      /* Validate VXLAN tunnel encap-fib index against packet */
-      if (PREDICT_FALSE (error1 != 0))
-	{
-	  next[1] = VXLAN_INPUT_NEXT_DROP;
-
-	  if (error1 == VXLAN_ERROR_BAD_FLAGS)
+	  if (di0.error == 0)
 	    {
-	      vlib_increment_combined_counter
-		(drop_counter, thread_index, stats_t1->sw_if_index, 1, len1);
+	      vnet_update_l2_len (b[0]);
+	      vnet_buffer (b[0])->sw_if_index[VLIB_RX] = di0.sw_if_index;
+	      vlib_increment_combined_counter (rx_counter, thread_index,
+					       stats_if0, 1, len0);
 	    }
-	  b[1]->error = node->errors[error1];
-	  pkts_dropped++;
-	}
-      else
-	{
-	  next[1] = t1->decap_next_index;
+	  else
+	    {
+	      b[0]->error = node->errors[di0.error];
+	      pkts_dropped++;
+	    }
 
-	  /* Required to make the l2 tag push / pop code work on l2 subifs */
-	  if (PREDICT_TRUE (next[1] == VXLAN_INPUT_NEXT_L2_INPUT))
-	    vnet_update_l2_len (b[1]);
-
-	  /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
-	  vnet_buffer (b[1])->sw_if_index[VLIB_RX] = t1->sw_if_index;
-
-	  vlib_increment_combined_counter
-	    (rx_counter, thread_index, stats_t1->sw_if_index, 1, len1);
+	  if (di1.error == 0)
+	    {
+	      vnet_update_l2_len (b[1]);
+	      vnet_buffer (b[1])->sw_if_index[VLIB_RX] = di1.sw_if_index;
+	      vlib_increment_combined_counter (rx_counter, thread_index,
+					       stats_if1, 1, len1);
+	    }
+	  else
+	    {
+	      b[1]->error = node->errors[di1.error];
+	      pkts_dropped++;
+	    }
 	}
 
       if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
@@ -326,8 +314,9 @@
 	  vxlan_rx_trace_t *tr =
 	    vlib_add_trace (vm, node, b[0], sizeof (*tr));
 	  tr->next_index = next[0];
-	  tr->error = error0;
-	  tr->tunnel_index = t0 == 0 ? ~0 : t0 - vxm->tunnels;
+	  tr->error = di0.error;
+	  tr->tunnel_index = di0.sw_if_index == ~0 ?
+	    ~0 : vxm->tunnel_index_by_sw_if_index[di0.sw_if_index];
 	  tr->vni = vnet_get_vni (vxlan0);
 	}
       if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
@@ -335,8 +324,9 @@
 	  vxlan_rx_trace_t *tr =
 	    vlib_add_trace (vm, node, b[1], sizeof (*tr));
 	  tr->next_index = next[1];
-	  tr->error = error1;
-	  tr->tunnel_index = t1 == 0 ? ~0 : t1 - vxm->tunnels;
+	  tr->error = di1.error;
+	  tr->tunnel_index = di1.sw_if_index == ~0 ?
+	    ~0 : vxm->tunnel_index_by_sw_if_index[di1.sw_if_index];
 	  tr->vni = vnet_get_vni (vxlan1);
 	}
       b += 2;
@@ -349,7 +339,6 @@
       /* udp leaves current_data pointing at the vxlan header */
       void *cur0 = vlib_buffer_get_current (b[0]);
       vxlan_header_t *vxlan0 = cur0;
-      u8 error0 = vxlan0->flags != VXLAN_FLAGS_I ? VXLAN_ERROR_BAD_FLAGS : 0;
       ip4_header_t *ip4_0;
       ip6_header_t *ip6_0;
       if (is_ip4)
@@ -362,41 +351,30 @@
 
       u32 fi0 = buf_fib_index (b[0], is_ip4);
 
-      vxlan_tunnel_t *t0, *stats_t0;
-      if (is_ip4)
-	t0 = vxlan4_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan0, &stats_t0);
-      else
-	t0 = vxlan6_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan0, &stats_t0);
+      vxlan_decap_info_t di0 = is_ip4 ?
+	vxlan4_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan0, &stats_if0) :
+	vxlan6_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan0, &stats_if0);
 
-      error0 = t0 == 0 ? VXLAN_ERROR_NO_SUCH_TUNNEL : error0;
       uword len0 = vlib_buffer_length_in_chain (vm, b[0]);
 
-      /* Validate VXLAN tunnel encap-fib index against packet */
-      if (PREDICT_FALSE (error0 != 0))
-	{
-	  next[0] = VXLAN_INPUT_NEXT_DROP;
+      next[0] = di0.next_index;
 
-	  if (error0 == VXLAN_ERROR_BAD_FLAGS)
-	    {
-	      vlib_increment_combined_counter
-		(drop_counter, thread_index, stats_t0->sw_if_index, 1, len0);
-	    }
-	  b[0]->error = node->errors[error0];
-	  pkts_dropped++;
+      /* Validate VXLAN tunnel encap-fib index against packet */
+      if (di0.error == 0)
+	{
+	  /* Required to make the l2 tag push / pop code work on l2 subifs */
+	  vnet_update_l2_len (b[0]);
+
+	  /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
+	  vnet_buffer (b[0])->sw_if_index[VLIB_RX] = di0.sw_if_index;
+
+	  vlib_increment_combined_counter (rx_counter, thread_index,
+					   stats_if0, 1, len0);
 	}
       else
 	{
-	  next[0] = t0->decap_next_index;
-
-	  /* Required to make the l2 tag push / pop code work on l2 subifs */
-	  if (PREDICT_TRUE (next[0] == VXLAN_INPUT_NEXT_L2_INPUT))
-	    vnet_update_l2_len (b[0]);
-
-	  /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
-	  vnet_buffer (b[0])->sw_if_index[VLIB_RX] = t0->sw_if_index;
-
-	  vlib_increment_combined_counter
-	    (rx_counter, thread_index, stats_t0->sw_if_index, 1, len0);
+	  b[0]->error = node->errors[di0.error];
+	  pkts_dropped++;
 	}
 
       if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
@@ -404,8 +382,9 @@
 	  vxlan_rx_trace_t *tr
 	    = vlib_add_trace (vm, node, b[0], sizeof (*tr));
 	  tr->next_index = next[0];
-	  tr->error = error0;
-	  tr->tunnel_index = t0 == 0 ? ~0 : t0 - vxm->tunnels;
+	  tr->error = di0.error;
+	  tr->tunnel_index = di0.sw_if_index == ~0 ?
+	    ~0 : vxm->tunnel_index_by_sw_if_index[di0.sw_if_index];
 	  tr->vni = vnet_get_vni (vxlan0);
 	}
       b += 1;