hash: refactor crc32_5tuple

Type: improvement
Change-Id: I31cae2367e2ec7fc89991ca0df994a73da93aaed
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c
index 27616db..3ed9c03 100644
--- a/src/vnet/bfd/bfd_main.c
+++ b/src/vnet/bfd/bfd_main.c
@@ -35,9 +35,9 @@
 {
   u64 checksum = 0;
 #if defined(clib_crc32c_uses_intrinsics) && !defined (__i386__)
-  checksum = crc32_u64 (0, discriminator);
-  checksum = crc32_u64 (checksum, expire_time);
-  checksum = crc32_u64 (checksum, secret);
+  checksum = clib_crc32c_u64 (0, discriminator);
+  checksum = clib_crc32c_u64 (checksum, expire_time);
+  checksum = clib_crc32c_u64 (checksum, secret);
 #else
   checksum = clib_xxhash (discriminator ^ expire_time ^ secret);
 #endif
diff --git a/src/vnet/hash/crc32_5tuple.c b/src/vnet/hash/crc32_5tuple.c
index 0a05b80..2cdb194 100644
--- a/src/vnet/hash/crc32_5tuple.c
+++ b/src/vnet/hash/crc32_5tuple.c
@@ -7,32 +7,11 @@
 #include <vnet/ethernet/ethernet.h>
 #include <vnet/ip/ip4_packet.h>
 #include <vnet/ip/ip6_packet.h>
-#include <vnet/ip/ip46_address.h>
-#include <vnet/udp/udp_packet.h>
 #include <vnet/hash/hash.h>
 #include <vppinfra/crc32.h>
 
 #ifdef clib_crc32c_uses_intrinsics
 
-typedef union
-{
-  struct
-  {
-    ip46_address_t src_address;
-    ip46_address_t dst_address;
-    union
-    {
-      struct
-      {
-	u16 src_port;
-	u16 dst_port;
-      };
-      u32 l4_hdr;
-    };
-  };
-  u8 as_u8[36];
-} crc32c_5tuple_key_t;
-
 static const u8 l4_mask_bits[256] = {
   [IP_PROTOCOL_ICMP] = 16,	[IP_PROTOCOL_IGMP] = 8,
   [IP_PROTOCOL_TCP] = 32,	[IP_PROTOCOL_UDP] = 32,
@@ -40,39 +19,42 @@
   [IP_PROTOCOL_ICMP6] = 16,
 };
 
-static_always_inline void
-compute_ip6_key (ip6_header_t *ip, crc32c_5tuple_key_t *k)
+static_always_inline u32
+compute_ip6_key (ip6_header_t *ip)
 {
+  u32 hash = 0, l4hdr;
   u8 pr;
-
-  /* copy 32 bytes of ip6 src and dst addresses into hash_key_t */
-  clib_memcpy_fast ((u8 *) k, (u8 *) ip + 8, sizeof (ip6_address_t) * 2);
+  /* dst + src ip as u64 */
+  hash = clib_crc32c_u64 (hash, *(u64u *) ((u8 *) ip + 8));
+  hash = clib_crc32c_u64 (hash, *(u64u *) ((u8 *) ip + 16));
+  hash = clib_crc32c_u64 (hash, *(u64u *) ((u8 *) ip + 24));
+  hash = clib_crc32c_u64 (hash, *(u64u *) ((u8 *) ip + 32));
   pr = ip->protocol;
-  /* write l4 header */
-  k->l4_hdr = *(u32 *) ip6_next_header (ip) & pow2_mask (l4_mask_bits[pr]);
+  l4hdr = *(u32 *) ip6_next_header (ip) & pow2_mask (l4_mask_bits[pr]);
+  /* protocol + l4 hdr */
+  return clib_crc32c_u64 (hash, ((u64) pr << 32) | l4hdr);
 }
 
-static_always_inline void
-compute_ip4_key (ip4_header_t *ip, crc32c_5tuple_key_t *k)
+static_always_inline u32
+compute_ip4_key (ip4_header_t *ip)
 {
+  u32 hash = 0, l4hdr;
   u8 pr;
-  u64 *key = (u64 *) k;
-  /* copy 8 bytes of ip src and dst addresses into hash_key_t */
-  key[0] = 0;
-  key[1] = 0;
-  key[2] = 0;
-  key[3] = *(u64 *) ((u8 *) ip + 12);
+  /* dst + src ip as u64 */
+  hash = clib_crc32c_u64 (0, *(u64 *) ((u8 *) ip + 12));
   pr = ip->protocol;
-  /* write l4 header */
-  k->l4_hdr = *(u32 *) ip4_next_header (ip) & pow2_mask (l4_mask_bits[pr]);
+  l4hdr = *(u32 *) ip4_next_header (ip) & pow2_mask (l4_mask_bits[pr]);
+  /* protocol + l4 hdr */
+  return clib_crc32c_u64 (hash, ((u64) pr << 32) | l4hdr);
 }
-static_always_inline void
-compute_ip_key (void *p, crc32c_5tuple_key_t *key)
+static_always_inline u32
+compute_ip_key (void *p)
 {
   if ((((u8 *) p)[0] & 0xf0) == 0x40)
-    compute_ip4_key (p, key);
+    return compute_ip4_key (p);
   else if ((((u8 *) p)[0] & 0xf0) == 0x60)
-    compute_ip6_key (p, key);
+    return compute_ip6_key (p);
+  return 0;
 }
 
 void
@@ -82,22 +64,15 @@
 
   while (n_left_from >= 8)
     {
-      crc32c_5tuple_key_t key[4] = {};
-
       clib_prefetch_load (p[4]);
       clib_prefetch_load (p[5]);
       clib_prefetch_load (p[6]);
       clib_prefetch_load (p[7]);
 
-      compute_ip_key (p[0], &key[0]);
-      compute_ip_key (p[1], &key[1]);
-      compute_ip_key (p[2], &key[2]);
-      compute_ip_key (p[3], &key[3]);
-
-      hash[0] = clib_crc32c (key[0].as_u8, sizeof (key[0]));
-      hash[1] = clib_crc32c (key[1].as_u8, sizeof (key[1]));
-      hash[2] = clib_crc32c (key[2].as_u8, sizeof (key[2]));
-      hash[3] = clib_crc32c (key[3].as_u8, sizeof (key[3]));
+      hash[0] = compute_ip_key (p[0]);
+      hash[1] = compute_ip_key (p[1]);
+      hash[2] = compute_ip_key (p[2]);
+      hash[3] = compute_ip_key (p[3]);
 
       hash += 4;
       n_left_from -= 4;
@@ -106,11 +81,7 @@
 
   while (n_left_from > 0)
     {
-      crc32c_5tuple_key_t key = {};
-
-      compute_ip_key (p[0], &key);
-
-      hash[0] = clib_crc32c (key.as_u8, sizeof (key));
+      hash[0] = compute_ip_key (p[0]);
 
       hash += 1;
       n_left_from -= 1;
@@ -118,8 +89,8 @@
     }
 }
 
-static_always_inline void
-compute_ethernet_key (void *p, crc32c_5tuple_key_t *key)
+static_always_inline u32
+compute_ethernet_key (void *p)
 {
   u16 ethertype = 0, l2hdr_sz = 0;
 
@@ -144,13 +115,14 @@
   if (ethertype == ETHERNET_TYPE_IP4)
     {
       ip4_header_t *ip4 = (ip4_header_t *) (p + l2hdr_sz);
-      compute_ip4_key (ip4, key);
+      return compute_ip4_key (ip4);
     }
   else if (ethertype == ETHERNET_TYPE_IP6)
     {
       ip6_header_t *ip6 = (ip6_header_t *) (p + l2hdr_sz);
-      compute_ip6_key (ip6, key);
+      return compute_ip6_key (ip6);
     }
+  return 0;
 }
 
 void
@@ -160,22 +132,15 @@
 
   while (n_left_from >= 8)
     {
-      crc32c_5tuple_key_t key[4] = {};
-
       clib_prefetch_load (p[4]);
       clib_prefetch_load (p[5]);
       clib_prefetch_load (p[6]);
       clib_prefetch_load (p[7]);
 
-      compute_ethernet_key (p[0], &key[0]);
-      compute_ethernet_key (p[1], &key[1]);
-      compute_ethernet_key (p[2], &key[2]);
-      compute_ethernet_key (p[3], &key[3]);
-
-      hash[0] = clib_crc32c (key[0].as_u8, sizeof (key[0]));
-      hash[1] = clib_crc32c (key[1].as_u8, sizeof (key[1]));
-      hash[2] = clib_crc32c (key[2].as_u8, sizeof (key[2]));
-      hash[3] = clib_crc32c (key[3].as_u8, sizeof (key[3]));
+      hash[0] = compute_ethernet_key (p[0]);
+      hash[1] = compute_ethernet_key (p[1]);
+      hash[2] = compute_ethernet_key (p[2]);
+      hash[3] = compute_ethernet_key (p[3]);
 
       hash += 4;
       n_left_from -= 4;
@@ -184,11 +149,7 @@
 
   while (n_left_from > 0)
     {
-      crc32c_5tuple_key_t key = {};
-
-      compute_ethernet_key (p[0], &key);
-
-      hash[0] = clib_crc32c (key.as_u8, sizeof (key));
+      hash[0] = compute_ethernet_key (p[0]);
 
       hash += 1;
       n_left_from -= 1;
diff --git a/src/vppinfra/crc32.h b/src/vppinfra/crc32.h
index fec67cd..3b81daf 100644
--- a/src/vppinfra/crc32.h
+++ b/src/vppinfra/crc32.h
@@ -21,67 +21,81 @@
 #if __SSE4_2__
 #define clib_crc32c_uses_intrinsics
 #include <x86intrin.h>
-
-#define crc32_u64 _mm_crc32_u64
-#define crc32_u32 _mm_crc32_u32
-
 static_always_inline u32
-clib_crc32c (u8 * s, int len)
+clib_crc32c_u8 (u32 last, u8 data)
 {
-  u32 v = 0;
-
-#if defined(__x86_64__)
-  for (; len >= 8; len -= 8, s += 8)
-    v = _mm_crc32_u64 (v, *((u64 *) s));
-#else
-  /* workaround weird GCC bug when using _mm_crc32_u32
-     which happens with -O2 optimization */
-#if !defined (__i686__)
-  asm volatile ("":::"memory");
-#endif
-#endif
-
-  for (; len >= 4; len -= 4, s += 4)
-    v = _mm_crc32_u32 (v, *((u32 *) s));
-
-  for (; len >= 2; len -= 2, s += 2)
-    v = _mm_crc32_u16 (v, *((u16 *) s));
-
-  for (; len >= 1; len -= 1, s += 1)
-    v = _mm_crc32_u8 (v, *((u16 *) s));
-
-  return v;
+  return _mm_crc32_u8 (last, data);
 }
 
-#elif __ARM_FEATURE_CRC32
+static_always_inline u32
+clib_crc32c_u16 (u32 last, u16 data)
+{
+  return _mm_crc32_u16 (last, data);
+}
+
+static_always_inline u32
+clib_crc32c_u32 (u32 last, u32 data)
+{
+  return _mm_crc32_u32 (last, data);
+}
+
+static_always_inline u32
+clib_crc32c_u64 (u32 last, u64 data)
+{
+  return _mm_crc32_u64 (last, data);
+}
+#endif
+
+#if __ARM_FEATURE_CRC32
 #define clib_crc32c_uses_intrinsics
 #include <arm_acle.h>
+static_always_inline u32
+clib_crc32c_u8 (u32 last, u8 data)
+{
+  return __crc32cd (last, data);
+}
 
+static_always_inline u32
+clib_crc32c_u16 (u32 last, u16 data)
+{
+  return __crc32ch (last, data);
+}
 
-#define crc32_u64 __crc32cd
-#define crc32_u32 __crc32cw
+static_always_inline u32
+clib_crc32c_u32 (u32 last, u32 data)
+{
+  return __crc32cw (last, data);
+}
 
 static_always_inline u32
+clib_crc32c_u64 (u32 last, u64 data)
+{
+  return __crc32cd (last, data);
+}
+#endif
+
+#ifdef clib_crc32c_uses_intrinsics
+static_always_inline u32
 clib_crc32c (u8 * s, int len)
 {
   u32 v = 0;
 
   for (; len >= 8; len -= 8, s += 8)
-    v = __crc32cd (v, *((u64 *) s));
+    v = clib_crc32c_u64 (v, *((u64u *) s));
 
   for (; len >= 4; len -= 4, s += 4)
-    v = __crc32cw (v, *((u32 *) s));
+    v = clib_crc32c_u32 (v, *((u32u *) s));
 
   for (; len >= 2; len -= 2, s += 2)
-    v = __crc32ch (v, *((u16 *) s));
+    v = clib_crc32c_u16 (v, *((u16u *) s));
 
   for (; len >= 1; len -= 1, s += 1)
-    v = __crc32cb (v, *((u8 *) s));
+    v = clib_crc32c_u8 (v, *((u8 *) s));
 
   return v;
 }
-
 #endif
+
 #endif /* __included_crc32_h__ */
 
 /*
diff --git a/src/vppinfra/lb_hash_hash.h b/src/vppinfra/lb_hash_hash.h
index fb25159..f355515 100644
--- a/src/vppinfra/lb_hash_hash.h
+++ b/src/vppinfra/lb_hash_hash.h
@@ -24,11 +24,11 @@
 lb_hash_hash (u64 k0, u64 k1, u64 k2, u64 k3, u64 k4)
 {
   u64 val = 0;
-  val = crc32_u64 (val, k0);
-  val = crc32_u64 (val, k1);
-  val = crc32_u64 (val, k2);
-  val = crc32_u64 (val, k3);
-  val = crc32_u64 (val, k4);
+  val = clib_crc32c_u64 (val, k0);
+  val = clib_crc32c_u64 (val, k1);
+  val = clib_crc32c_u64 (val, k2);
+  val = clib_crc32c_u64 (val, k3);
+  val = clib_crc32c_u64 (val, k4);
   return (u32) val;
 }
 
@@ -37,8 +37,8 @@
 lb_hash_hash_2_tuples (u64 k0, u32 k1)
 {
   u64 val = 0;
-  val = crc32_u64 (val, k0);
-  val = crc32_u32 (val, k1);
+  val = clib_crc32c_u64 (val, k0);
+  val = clib_crc32c_u32 (val, k1);
   return (u32) val;
 }
 #else