blob: f3adc78d53df4ebc483614fd63e93c56da9c925e [file] [log] [blame]
/* SPDX-License-Identifier: Apache-2.0
* Copyright(c) 2021 Cisco Systems, Inc.
*/
#include <vppinfra/clib.h>
#ifndef included_memcpy_h
#define included_memcpy_h
#ifndef __COVERITY__
static_always_inline void
clib_memcpy_u32_x4 (u32 *dst, u32 *src)
{
#if defined(CLIB_HAVE_VEC128)
u32x4_store_unaligned (u32x4_load_unaligned (src), dst);
#else
clib_memcpy_fast (dst, src, 4 * sizeof (u32));
#endif
}
static_always_inline void
clib_memcpy_u32_x8 (u32 *dst, u32 *src)
{
#if defined(CLIB_HAVE_VEC256)
u32x8_store_unaligned (u32x8_load_unaligned (src), dst);
#else
clib_memcpy_u32_x4 (dst, src);
clib_memcpy_u32_x4 (dst + 4, src + 4);
#endif
}
static_always_inline void
clib_memcpy_u32_x16 (u32 *dst, u32 *src)
{
#if defined(CLIB_HAVE_VEC512)
u32x16_store_unaligned (u32x16_load_unaligned (src), dst);
#else
clib_memcpy_u32_x8 (dst, src);
clib_memcpy_u32_x8 (dst + 8, src + 8);
#endif
}
static_always_inline void
clib_memcpy_u32 (u32 *dst, u32 *src, u32 n_left)
{
#if defined(CLIB_HAVE_VEC128)
if (COMPILE_TIME_CONST (n_left))
{
/* for n_left defined as compile-time constant we should prevent compiler
* to use more expensive mask load/store for common cases where smaller
* register load/store exists */
switch (n_left)
{
case 4:
clib_memcpy_u32_x4 (dst, src);
return;
case 8:
clib_memcpy_u32_x8 (dst, src);
return;
case 12:
clib_memcpy_u32_x8 (dst, src);
clib_memcpy_u32_x4 (dst + 8, src + 8);
return;
case 16:
clib_memcpy_u32_x16 (dst, src);
return;
case 32:
clib_memcpy_u32_x16 (dst, src);
clib_memcpy_u32_x16 (dst + 16, src + 16);
return;
case 64:
clib_memcpy_u32_x16 (dst, src);
clib_memcpy_u32_x16 (dst + 16, src + 16);
clib_memcpy_u32_x16 (dst + 32, src + 32);
clib_memcpy_u32_x16 (dst + 48, src + 48);
return;
default:
break;
}
}
#if defined(CLIB_HAVE_VEC512)
while (n_left >= 64)
{
clib_memcpy_u32_x16 (dst, src);
clib_memcpy_u32_x16 (dst + 16, src + 16);
clib_memcpy_u32_x16 (dst + 32, src + 32);
clib_memcpy_u32_x16 (dst + 48, src + 48);
dst += 64;
src += 64;
n_left -= 64;
}
#endif
#if defined(CLIB_HAVE_VEC256)
while (n_left >= 32)
{
clib_memcpy_u32_x16 (dst, src);
clib_memcpy_u32_x16 (dst + 16, src + 16);
dst += 32;
src += 32;
n_left -= 32;
}
#endif
while (n_left >= 16)
{
clib_memcpy_u32_x16 (dst, src);
dst += 16;
src += 16;
n_left -= 16;
}
#if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
if (n_left)
{
u16 mask = pow2_mask (n_left);
u32x16_mask_store (u32x16_mask_load_zero (src, mask), dst, mask);
}
return;
#endif
if (n_left >= 8)
{
clib_memcpy_u32_x8 (dst, src);
dst += 8;
src += 8;
n_left -= 8;
}
#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
if (n_left)
{
u8 mask = pow2_mask (n_left);
u32x8_mask_store (u32x8_mask_load_zero (src, mask), dst, mask);
}
return;
#endif
if (n_left >= 4)
{
clib_memcpy_u32_x4 (dst, src);
dst += 4;
src += 4;
n_left -= 4;
}
#endif
while (n_left)
{
dst[0] = src[0];
dst += 1;
src += 1;
n_left -= 1;
}
}
#else /* __COVERITY__ */
static_always_inline void
clib_memcpy_u32 (u32 *dst, u32 *src, u32 n_left)
{
memcpy (dst, src, n_left * sizeof (u32));
}
#endif
#endif