build: add scalar (no-simd) march variant

for testing purposes, disabled by default

Type: improvement
Signed-off-by: Damjan Marion <damarion@cisco.com>
Change-Id: Id616e2b3b21ae0f0b44e2b55ecefd501afacc7f2
diff --git a/src/cmake/cpu.cmake b/src/cmake/cpu.cmake
index c10158b..031a9bc 100644
--- a/src/cmake/cpu.cmake
+++ b/src/cmake/cpu.cmake
@@ -131,6 +131,11 @@
     OFF
   )
 
+  add_vpp_march_variant(scalar
+    FLAGS -march=core2 -mno-mmx -mno-sse
+    OFF
+  )
+
   if (GNU_ASSEMBLER_AVX512_BUG)
      message(WARNING "AVX-512 multiarch variant(s) disabled due to GNU Assembler bug")
   else()
diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h
index a30401a..60439e0 100644
--- a/src/vppinfra/cpu.h
+++ b/src/vppinfra/cpu.h
@@ -21,6 +21,7 @@
 
 #if defined(__x86_64__)
 #define foreach_march_variant                                                 \
+  _ (scalar, "Generic (SIMD disabled)")                                       \
   _ (hsw, "Intel Haswell")                                                    \
   _ (trm, "Intel Tremont")                                                    \
   _ (skx, "Intel Skylake (server) / Cascade Lake")                            \
@@ -242,6 +243,12 @@
 }
 
 static inline int
+clib_cpu_march_priority_scalar ()
+{
+  return 1;
+}
+
+static inline int
 clib_cpu_march_priority_spr ()
 {
   if (clib_cpu_supports_enqcmd ())
diff --git a/src/vppinfra/memcpy_x86_64.h b/src/vppinfra/memcpy_x86_64.h
index e206c69..39258f1 100644
--- a/src/vppinfra/memcpy_x86_64.h
+++ b/src/vppinfra/memcpy_x86_64.h
@@ -38,13 +38,16 @@
   *(u64u *) d = *(u64u *) s;
 }
 
-#ifdef CLIB_HAVE_VEC128
 static_always_inline void
 clib_memcpy16 (void *d, void *s)
 {
+#ifdef CLIB_HAVE_VEC128
   *(u8x16u *) d = *(u8x16u *) s;
-}
+#else
+  clib_memcpy8 (d, s);
+  clib_memcpy8 (d + 8, s + 8);
 #endif
+}
 
 #ifdef CLIB_HAVE_VEC256
 static_always_inline void
diff --git a/src/vppinfra/test/memcpy_x86_64.c b/src/vppinfra/test/memcpy_x86_64.c
index 4d9525d..9b93bb1 100644
--- a/src/vppinfra/test/memcpy_x86_64.c
+++ b/src/vppinfra/test/memcpy_x86_64.c
@@ -6,7 +6,7 @@
 
 #include <vppinfra/format.h>
 #include <vppinfra/test/test.h>
-#include <vppinfra/vector/mask_compare.h>
+#include <vppinfra/memcpy_x86_64.h>
 
 __test_funct_fn void
 wrapper (u8 *dst, u8 *src, uword n)