vppinfra: bitops cleanup

Type: refactor
Change-Id: I7fa113e924640f9d798c1eb6ae64b9c0a9e2104c
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vnet/interface/caps.c b/src/vnet/interface/caps.c
index 8b5fda1..54e8d90 100644
--- a/src/vnet/interface/caps.c
+++ b/src/vnet/interface/caps.c
@@ -54,7 +54,7 @@
       else
 	s = format (s, "unknown-%u", bit);
 
-      caps = reset_lowest_set_bit (caps);
+      caps = clear_lowest_set_bit (caps);
       if (caps)
 	vec_add1 (s, ' ');
     }
diff --git a/src/vnet/srp/packet.h b/src/vnet/srp/packet.h
index 96dab64..38296ac 100644
--- a/src/vnet/srp/packet.h
+++ b/src/vnet/srp/packet.h
@@ -40,8 +40,7 @@
 #ifndef included_srp_packet_h
 #define included_srp_packet_h
 
-#include <vppinfra/byte_order.h>
-#include <vppinfra/bitops.h>
+#include <vppinfra/clib.h>
 #include <vnet/ethernet/packet.h>
 
 /* SRP version 2. */
diff --git a/src/vppinfra/bitmap.h b/src/vppinfra/bitmap.h
index d9bdd0f..459e6f2 100644
--- a/src/vppinfra/bitmap.h
+++ b/src/vppinfra/bitmap.h
@@ -45,7 +45,6 @@
 #include <vppinfra/vec.h>
 #include <vppinfra/random.h>
 #include <vppinfra/error.h>
-#include <vppinfra/bitops.h>	/* for count_set_bits */
 
 typedef uword clib_bitmap_t;
 
diff --git a/src/vppinfra/bitops.h b/src/vppinfra/bitops.h
index 0436569..15454ca 100644
--- a/src/vppinfra/bitops.h
+++ b/src/vppinfra/bitops.h
@@ -38,18 +38,38 @@
 #ifndef included_clib_bitops_h
 #define included_clib_bitops_h
 
-#include <vppinfra/clib.h>
+static_always_inline uword
+clear_lowest_set_bit (uword x)
+{
+#ifdef __BMI__
+  return uword_bits > 32 ? _blsr_u64 (x) : _blsr_u32 (x);
+#else
+  return x & (x - 1);
+#endif
+}
+
+static_always_inline uword
+get_lowest_set_bit (uword x)
+{
+#ifdef __BMI__
+  return uword_bits > 32 ? _blsi_u64 (x) : _blsi_u32 (x);
+#else
+  return x & -x;
+#endif
+}
+
+static_always_inline u8
+get_lowest_set_bit_index (uword x)
+{
+  return uword_bits > 32 ? __builtin_ctzll (x) : __builtin_ctz (x);
+}
 
 /* Population count from Hacker's Delight. */
 always_inline uword
 count_set_bits (uword x)
 {
 #ifdef __POPCNT__
-#if uword_bits == 64
-  return __builtin_popcountll (x);
-#else
-  return __builtin_popcount (x);
-#endif
+  return uword_bits > 32 ? __builtin_popcountll (x) : __builtin_popcount (x);
 #else
 #if uword_bits == 64
   const uword c1 = 0x5555555555555555;
@@ -81,6 +101,15 @@
 #endif
 }
 
+#if uword_bits == 64
+#define count_leading_zeros(x) __builtin_clzll (x)
+#else
+#define count_leading_zeros(x) __builtin_clzll (x)
+#endif
+
+#define count_trailing_zeros(x) get_lowest_set_bit_index (x)
+#define log2_first_set(x)	get_lowest_set_bit_index (x)
+
 /* Based on "Hacker's Delight" code from GLS. */
 typedef struct
 {
@@ -163,45 +192,13 @@
   return ripple | ones;
 }
 
-#define foreach_set_bit(var,mask,body)					\
-do {									\
-  uword _foreach_set_bit_m_##var = (mask);				\
-  uword _foreach_set_bit_f_##var;					\
-  while (_foreach_set_bit_m_##var != 0)					\
-    {									\
-      _foreach_set_bit_f_##var = first_set (_foreach_set_bit_m_##var);	\
-      _foreach_set_bit_m_##var ^= _foreach_set_bit_f_##var;		\
-      (var) = min_log2 (_foreach_set_bit_f_##var);			\
-      do { body; } while (0);						\
-    }									\
-} while (0)
+#define foreach_set_bit_index(i, v)                                           \
+  for (uword _tmp = (v) + 0 * (uword) (i = get_lowest_set_bit_index (v));     \
+       _tmp;                                                                  \
+       i = get_lowest_set_bit_index (_tmp = clear_lowest_set_bit (_tmp)))
 
-static_always_inline u64
-reset_lowest_set_bit (u64 x)
-{
-#ifdef __BMI__
-  return _blsr_u64 (x);
 #else
-  return x & (x - 1);
-#endif
-}
-
-static_always_inline u64
-get_lowest_set_bit (u64 x)
-{
-#ifdef __BMI__
-  return _blsi_u64 (x);
-#else
-  return x & -x;
-#endif
-}
-
-static_always_inline u64
-get_lowest_set_bit_index (u64 x)
-{
-  return __builtin_ctzll (x);
-}
-
+#warning "already included"
 #endif /* included_clib_bitops_h */
 
 /*
diff --git a/src/vppinfra/clib.h b/src/vppinfra/clib.h
index 1b6ab41..b3a2580 100644
--- a/src/vppinfra/clib.h
+++ b/src/vppinfra/clib.h
@@ -164,25 +164,7 @@
   decl __attribute ((destructor));		\
   decl
 
-/* Use __builtin_clz if available. */
-#if uword_bits == 64
-#define count_leading_zeros(x) __builtin_clzll (x)
-#define count_trailing_zeros(x) __builtin_ctzll (x)
-#else
-#define count_leading_zeros(x) __builtin_clzl (x)
-#define count_trailing_zeros(x) __builtin_ctzl (x)
-#endif
-
-#if defined (count_leading_zeros)
-always_inline uword
-clear_lowest_set_bit (uword x)
-{
-#ifdef __BMI2__
-  return _blsr_u64 (x);
-#else
-  return x ^ (1ULL << count_trailing_zeros (x));
-#endif
-}
+#include <vppinfra/bitops.h>
 
 always_inline uword
 min_log2 (uword x)
@@ -191,45 +173,6 @@
   n = count_leading_zeros (x);
   return BITS (uword) - n - 1;
 }
-#else
-always_inline uword
-min_log2 (uword x)
-{
-  uword a = x, b = BITS (uword) / 2, c = 0, r = 0;
-
-  /* Reduce x to 4 bit result. */
-#define _					\
-{						\
-  c = a >> b;					\
-  if (c) a = c;					\
-  if (c) r += b;				\
-  b /= 2;					\
-}
-
-  if (BITS (uword) > 32)
-    _;
-  _;
-  _;
-  _;
-#undef _
-
-  /* Do table lookup on 4 bit partial. */
-  if (BITS (uword) > 32)
-    {
-      const u64 table = 0x3333333322221104LL;
-      uword t = (table >> (4 * a)) & 0xf;
-      r = t < 4 ? r + t : ~0;
-    }
-  else
-    {
-      const u32 table = 0x22221104;
-      uword t = (a & 8) ? 3 : ((table >> (4 * a)) & 0xf);
-      r = t < 4 ? r + t : ~0;
-    }
-
-  return r;
-}
-#endif
 
 always_inline uword
 max_log2 (uword x)
@@ -308,18 +251,6 @@
   return x & -x;
 }
 
-always_inline uword
-log2_first_set (uword x)
-{
-  uword result;
-#ifdef count_trailing_zeros
-  result = count_trailing_zeros (x);
-#else
-  result = min_log2 (first_set (x));
-#endif
-  return result;
-}
-
 always_inline f64
 flt_round_down (f64 x)
 {
diff --git a/src/vppinfra/interrupt.c b/src/vppinfra/interrupt.c
index 20b7450..df242d9 100644
--- a/src/vppinfra/interrupt.c
+++ b/src/vppinfra/interrupt.c
@@ -15,7 +15,6 @@
  */
 
 #include <vppinfra/clib.h>
-#include <vppinfra/bitops.h> /* for count_set_bits */
 #include <vppinfra/vec.h>
 #include <vppinfra/interrupt.h>
 #include <vppinfra/format.h>
diff --git a/src/vppinfra/interrupt.h b/src/vppinfra/interrupt.h
index 60c01fa..393574b 100644
--- a/src/vppinfra/interrupt.h
+++ b/src/vppinfra/interrupt.h
@@ -17,7 +17,6 @@
 #define included_clib_interrupt_h
 
 #include <vppinfra/clib.h>
-#include <vppinfra/bitops.h> /* for count_set_bits */
 #include <vppinfra/vec.h>
 
 typedef struct
diff --git a/src/vppinfra/sparse_vec.h b/src/vppinfra/sparse_vec.h
index 54a92ce..fc8b3cf 100644
--- a/src/vppinfra/sparse_vec.h
+++ b/src/vppinfra/sparse_vec.h
@@ -38,8 +38,8 @@
 #ifndef included_sparse_vec_h
 #define included_sparse_vec_h
 
+#include <vppinfra/clib.h>
 #include <vppinfra/vec.h>
-#include <vppinfra/bitops.h>
 
 /* Sparsely indexed vectors.  Basic idea taken from Hacker's delight.
    Eliot added ranges. */
diff --git a/src/vppinfra/unix-formats.c b/src/vppinfra/unix-formats.c
index fd11267..fb3a728 100644
--- a/src/vppinfra/unix-formats.c
+++ b/src/vppinfra/unix-formats.c
@@ -91,7 +91,6 @@
 # include <netinet/if_ether.h>
 #endif /* __KERNEL__ */
 
-#include <vppinfra/bitops.h> /* foreach_set_bit */
 #include <vppinfra/format.h>
 #include <vppinfra/error.h>
 
diff --git a/src/vppinfra/vector/compress.h b/src/vppinfra/vector/compress.h
index adb6503..d2ed716 100644
--- a/src/vppinfra/vector/compress.h
+++ b/src/vppinfra/vector/compress.h
@@ -27,12 +27,9 @@
       mask >>= 4;
     }
 #else
-  while (mask)
-    {
-      u16 bit = count_trailing_zeros (mask);
-      mask = clear_lowest_set_bit (mask);
-      dst++[0] = src[bit];
-    }
+  u32 i;
+  foreach_set_bit_index (i, mask)
+    dst++[0] = src[i];
 #endif
   return dst;
 }
@@ -93,12 +90,9 @@
       mask >>= 8;
     }
 #else
-  while (mask)
-    {
-      u16 bit = count_trailing_zeros (mask);
-      mask = clear_lowest_set_bit (mask);
-      dst++[0] = src[bit];
-    }
+  u32 i;
+  foreach_set_bit_index (i, mask)
+    dst++[0] = src[i];
 #endif
   return dst;
 }
@@ -150,12 +144,9 @@
       mask >>= 32;
     }
 #else
-  while (mask)
-    {
-      u16 bit = count_trailing_zeros (mask);
-      mask = clear_lowest_set_bit (mask);
-      dst++[0] = src[bit];
-    }
+  u32 i;
+  foreach_set_bit_index (i, mask)
+    dst++[0] = src[i];
 #endif
   return dst;
 }
@@ -203,12 +194,9 @@
   u8x64_compress_store (sv[0], mask, dst);
   dst += _popcnt64 (mask);
 #else
-  while (mask)
-    {
-      u16 bit = count_trailing_zeros (mask);
-      mask = clear_lowest_set_bit (mask);
-      dst++[0] = src[bit];
-    }
+  u32 i;
+  foreach_set_bit_index (i, mask)
+    dst++[0] = src[i];
 #endif
   return dst;
 }