vppinfra: add clib_count_equal_uXX and clib_memset_uXX functions
Change-Id: I56782652d8ef10304900cc293cfc0502689d800e
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vppinfra/string.h b/src/vppinfra/string.h
index 7873e46..c70996c 100644
--- a/src/vppinfra/string.h
+++ b/src/vppinfra/string.h
@@ -146,6 +146,340 @@
#endif
}
+static_always_inline void
+clib_memset_u64 (void *p, u64 val, uword count)
+{
+ u64 *ptr = p;
+#if defined(CLIB_HAVE_VEC512)
+ u64x8 v512 = u64x8_splat (val);
+ while (count >= 8)
+ {
+ u64x8_store_unaligned (v512, ptr);
+ ptr += 8;
+ count -= 8;
+ }
+ if (count == 0)
+ return;
+#endif
+#if defined(CLIB_HAVE_VEC256)
+ u64x4 v256 = u64x4_splat (val);
+ while (count >= 4)
+ {
+ u64x4_store_unaligned (v256, ptr);
+ ptr += 4;
+ count -= 4;
+ }
+ if (count == 0)
+ return;
+#else
+ while (count >= 4)
+ {
+ ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
+ ptr += 4;
+ count -= 4;
+ }
+#endif
+ while (count--)
+ ptr++[0] = val;
+}
+
+static_always_inline void
+clib_memset_u32 (void *p, u32 val, uword count)
+{
+ u32 *ptr = p;
+#if defined(CLIB_HAVE_VEC512)
+ u32x16 v512 = u32x16_splat (val);
+ while (count >= 16)
+ {
+ u32x16_store_unaligned (v512, ptr);
+ ptr += 16;
+ count -= 16;
+ }
+ if (count == 0)
+ return;
+#endif
+#if defined(CLIB_HAVE_VEC256)
+ u32x8 v256 = u32x8_splat (val);
+ while (count >= 8)
+ {
+ u32x8_store_unaligned (v256, ptr);
+ ptr += 8;
+ count -= 8;
+ }
+ if (count == 0)
+ return;
+#endif
+#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
+ u32x4 v128 = u32x4_splat (val);
+ while (count >= 4)
+ {
+ u32x4_store_unaligned (v128, ptr);
+ ptr += 4;
+ count -= 4;
+ }
+#else
+ while (count >= 4)
+ {
+ ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
+ ptr += 4;
+ count -= 4;
+ }
+#endif
+ while (count--)
+ ptr++[0] = val;
+}
+
+static_always_inline void
+clib_memset_u16 (void *p, u16 val, uword count)
+{
+ u16 *ptr = p;
+#if defined(CLIB_HAVE_VEC512)
+ u16x32 v512 = u16x32_splat (val);
+ while (count >= 32)
+ {
+ u16x32_store_unaligned (v512, ptr);
+ ptr += 32;
+ count -= 32;
+ }
+ if (count == 0)
+ return;
+#endif
+#if defined(CLIB_HAVE_VEC256)
+ u16x16 v256 = u16x16_splat (val);
+ while (count >= 16)
+ {
+ u16x16_store_unaligned (v256, ptr);
+ ptr += 16;
+ count -= 16;
+ }
+ if (count == 0)
+ return;
+#endif
+#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
+ u16x8 v128 = u16x8_splat (val);
+ while (count >= 8)
+ {
+ u16x8_store_unaligned (v128, ptr);
+ ptr += 8;
+ count -= 8;
+ }
+#else
+ while (count >= 4)
+ {
+ ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
+ ptr += 4;
+ count -= 4;
+ }
+#endif
+ while (count--)
+ ptr++[0] = val;
+}
+
+static_always_inline void
+clib_memset_u8 (void *p, u8 val, uword count)
+{
+ u8 *ptr = p;
+#if defined(CLIB_HAVE_VEC512)
+ u8x64 v512 = u8x64_splat (val);
+ while (count >= 64)
+ {
+ u8x64_store_unaligned (v512, ptr);
+ ptr += 64;
+ count -= 64;
+ }
+ if (count == 0)
+ return;
+#endif
+#if defined(CLIB_HAVE_VEC256)
+ u8x32 v256 = u8x32_splat (val);
+ while (count >= 32)
+ {
+ u8x32_store_unaligned (v256, ptr);
+ ptr += 32;
+ count -= 32;
+ }
+ if (count == 0)
+ return;
+#endif
+#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
+ u8x16 v128 = u8x16_splat (val);
+ while (count >= 16)
+ {
+ u8x16_store_unaligned (v128, ptr);
+ ptr += 16;
+ count -= 16;
+ }
+#else
+ while (count >= 4)
+ {
+ ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
+ ptr += 4;
+ count -= 4;
+ }
+#endif
+ while (count--)
+ ptr++[0] = val;
+}
+
+static_always_inline uword
+clib_count_equal_u64 (u64 * data, uword max_count)
+{
+ uword count = 0;
+ u64 first = data[0];
+
+#if defined(CLIB_HAVE_VEC512)
+ while (u64x8_is_all_equal (u64x8_load_unaligned (data), first))
+ {
+ data += 8;
+ count += 8;
+ if (count >= max_count)
+ return max_count;
+ }
+#endif
+#if defined(CLIB_HAVE_VEC256)
+ while (u64x4_is_all_equal (u64x4_load_unaligned (data), first))
+ {
+ data += 4;
+ count += 4;
+ if (count >= max_count)
+ return max_count;
+ }
+#endif
+#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
+ while (u64x2_is_all_equal (u64x2_load_unaligned (data), first))
+ {
+ data += 2;
+ count += 2;
+ if (count >= max_count)
+ return max_count;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+static_always_inline uword
+clib_count_equal_u32 (u32 * data, uword max_count)
+{
+ uword count = 0;
+ u32 first = data[0];
+
+#if defined(CLIB_HAVE_VEC512)
+ while (u32x16_is_all_equal (u32x16_load_unaligned (data), first))
+ {
+ data += 16;
+ count += 16;
+ if (count >= max_count)
+ return max_count;
+ }
+#endif
+#if defined(CLIB_HAVE_VEC256)
+ while (u32x8_is_all_equal (u32x8_load_unaligned (data), first))
+ {
+ data += 8;
+ count += 8;
+ if (count >= max_count)
+ return max_count;
+ }
+#endif
+#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
+ while (u32x4_is_all_equal (u32x4_load_unaligned (data), first))
+ {
+ data += 4;
+ count += 4;
+ if (count >= max_count)
+ return max_count;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+static_always_inline uword
+clib_count_equal_u16 (u16 * data, uword max_count)
+{
+ uword count = 0;
+ u16 first = data[0];
+
+#if defined(CLIB_HAVE_VEC512)
+ while (count + 32 <= max_count &&
+ u16x32_is_all_equal (u16x32_load_unaligned (data), first))
+ {
+ data += 32;
+ count += 32;
+ }
+#endif
+#if defined(CLIB_HAVE_VEC256)
+ while (count + 16 <= max_count &&
+ u16x16_is_all_equal (u16x16_load_unaligned (data), first))
+ {
+ data += 16;
+ count += 16;
+ }
+#endif
+#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
+ while (count + 8 <= max_count &&
+ u16x8_is_all_equal (u16x8_load_unaligned (data), first))
+ {
+ data += 8;
+ count += 8;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+static_always_inline u32
+clib_count_equal_u8 (u32 * data, uword max_count)
+{
+ uword count = 0;
+ u8 first = data[0];
+
+#if defined(CLIB_HAVE_VEC512)
+ while (count + 64 <= max_count &&
+ u8x64_is_all_equal (u8x64_load_unaligned (data), first))
+ {
+ data += 64;
+ count += 64;
+ }
+#endif
+#if defined(CLIB_HAVE_VEC256)
+ while (count + 32 <= max_count &&
+ u8x32_is_all_equal (u8x32_load_unaligned (data), first))
+ {
+ data += 32;
+ count += 32;
+ }
+#endif
+#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
+ while (count + 16 <= max_count &&
+ u8x16_is_all_equal (u8x16_load_unaligned (data), first))
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+
#endif /* included_clib_string_h */
/*
diff --git a/src/vppinfra/vector_avx2.h b/src/vppinfra/vector_avx2.h
index 1fb41df..e2d5701 100644
--- a/src/vppinfra/vector_avx2.h
+++ b/src/vppinfra/vector_avx2.h
@@ -47,8 +47,8 @@
{ return _mm256_testz_si256 ((__m256i) x, (__m256i) x); } \
\
static_always_inline int \
-t##s##x##c##_is_equal (t##s##x##c x, t##s##x##c y) \
-{ return _mm256_testc_si256 ((__m256i) x, (__m256i) y); } \
+t##s##x##c##_is_equal (t##s##x##c a, t##s##x##c b) \
+{ return t##s##x##c##_is_all_zero (a ^ b); } \
\
static_always_inline int \
t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \
diff --git a/src/vppinfra/vector_avx512.h b/src/vppinfra/vector_avx512.h
index c2903e2..4aee8d1 100644
--- a/src/vppinfra/vector_avx512.h
+++ b/src/vppinfra/vector_avx512.h
@@ -48,7 +48,7 @@
\
static_always_inline int \
t##s##x##c##_is_equal (t##s##x##c a, t##s##x##c b) \
-{ return t##s##x##c##_is_all_zero (a ^b); } \
+{ return t##s##x##c##_is_all_zero (a ^ b); } \
\
static_always_inline int \
t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \
diff --git a/src/vppinfra/vector_sse42.h b/src/vppinfra/vector_sse42.h
index 053826d..fa377cc 100644
--- a/src/vppinfra/vector_sse42.h
+++ b/src/vppinfra/vector_sse42.h
@@ -69,8 +69,8 @@
{ return _mm_testz_si128 ((__m128i) x, (__m128i) x); } \
\
static_always_inline int \
-t##s##x##c##_is_equal (t##s##x##c x, t##s##x##c y) \
-{ return _mm_testc_si128 ((__m128i) x, (__m128i) y); } \
+t##s##x##c##_is_equal (t##s##x##c a, t##s##x##c b) \
+{ return t##s##x##c##_is_all_zero (a ^ b); } \
\
static_always_inline int \
t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \