| /* SPDX-License-Identifier: Apache-2.0 |
| * Copyright(c) 2021 Cisco Systems, Inc. |
| */ |
| |
| #include <vppinfra/clib.h> |
| #ifndef included_memcpy_h |
| #define included_memcpy_h |
| |
| #ifndef __COVERITY__ |
| |
| static_always_inline void |
| clib_memcpy_u32_x4 (u32 *dst, u32 *src) |
| { |
| #if defined(CLIB_HAVE_VEC128) |
| u32x4_store_unaligned (u32x4_load_unaligned (src), dst); |
| #else |
| clib_memcpy_fast (dst, src, 4 * sizeof (u32)); |
| #endif |
| } |
| static_always_inline void |
| clib_memcpy_u32_x8 (u32 *dst, u32 *src) |
| { |
| #if defined(CLIB_HAVE_VEC256) |
| u32x8_store_unaligned (u32x8_load_unaligned (src), dst); |
| #else |
| clib_memcpy_u32_x4 (dst, src); |
| clib_memcpy_u32_x4 (dst + 4, src + 4); |
| #endif |
| } |
| |
| static_always_inline void |
| clib_memcpy_u32_x16 (u32 *dst, u32 *src) |
| { |
| #if defined(CLIB_HAVE_VEC512) |
| u32x16_store_unaligned (u32x16_load_unaligned (src), dst); |
| #else |
| clib_memcpy_u32_x8 (dst, src); |
| clib_memcpy_u32_x8 (dst + 8, src + 8); |
| #endif |
| } |
| |
| static_always_inline void |
| clib_memcpy_u32 (u32 *dst, u32 *src, u32 n_left) |
| { |
| #if defined(CLIB_HAVE_VEC128) |
| if (COMPILE_TIME_CONST (n_left)) |
| { |
| /* for n_left defined as compile-time constant we should prevent compiler |
| * to use more expensive mask load/store for common cases where smaller |
| * register load/store exists */ |
| switch (n_left) |
| { |
| case 4: |
| clib_memcpy_u32_x4 (dst, src); |
| return; |
| case 8: |
| clib_memcpy_u32_x8 (dst, src); |
| return; |
| case 12: |
| clib_memcpy_u32_x8 (dst, src); |
| clib_memcpy_u32_x4 (dst + 8, src + 8); |
| return; |
| case 16: |
| clib_memcpy_u32_x16 (dst, src); |
| return; |
| case 32: |
| clib_memcpy_u32_x16 (dst, src); |
| clib_memcpy_u32_x16 (dst + 16, src + 16); |
| return; |
| case 64: |
| clib_memcpy_u32_x16 (dst, src); |
| clib_memcpy_u32_x16 (dst + 16, src + 16); |
| clib_memcpy_u32_x16 (dst + 32, src + 32); |
| clib_memcpy_u32_x16 (dst + 48, src + 48); |
| return; |
| default: |
| break; |
| } |
| } |
| |
| #if defined(CLIB_HAVE_VEC512) |
| while (n_left >= 64) |
| { |
| clib_memcpy_u32_x16 (dst, src); |
| clib_memcpy_u32_x16 (dst + 16, src + 16); |
| clib_memcpy_u32_x16 (dst + 32, src + 32); |
| clib_memcpy_u32_x16 (dst + 48, src + 48); |
| dst += 64; |
| src += 64; |
| n_left -= 64; |
| } |
| #endif |
| |
| #if defined(CLIB_HAVE_VEC256) |
| while (n_left >= 32) |
| { |
| clib_memcpy_u32_x16 (dst, src); |
| clib_memcpy_u32_x16 (dst + 16, src + 16); |
| dst += 32; |
| src += 32; |
| n_left -= 32; |
| } |
| #endif |
| |
| while (n_left >= 16) |
| { |
| clib_memcpy_u32_x16 (dst, src); |
| dst += 16; |
| src += 16; |
| n_left -= 16; |
| } |
| |
| #if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE) |
| if (n_left) |
| { |
| u16 mask = pow2_mask (n_left); |
| u32x16_mask_store (u32x16_mask_load_zero (src, mask), dst, mask); |
| } |
| return; |
| #endif |
| |
| if (n_left >= 8) |
| { |
| clib_memcpy_u32_x8 (dst, src); |
| dst += 8; |
| src += 8; |
| n_left -= 8; |
| } |
| |
| #if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE) |
| if (n_left) |
| { |
| u8 mask = pow2_mask (n_left); |
| u32x8_mask_store (u32x8_mask_load_zero (src, mask), dst, mask); |
| } |
| return; |
| #endif |
| |
| if (n_left >= 4) |
| { |
| clib_memcpy_u32_x4 (dst, src); |
| dst += 4; |
| src += 4; |
| n_left -= 4; |
| } |
| #endif |
| |
| while (n_left) |
| { |
| dst[0] = src[0]; |
| dst += 1; |
| src += 1; |
| n_left -= 1; |
| } |
| } |
| |
| #else /* __COVERITY__ */ |
| static_always_inline void |
| clib_memcpy_u32 (u32 *dst, u32 *src, u32 n_left) |
| { |
| memcpy (dst, src, n_left * sizeof (u32)); |
| } |
| #endif |
| |
| #endif |