Damjan Marion | 856d062 | 2021-04-21 21:11:35 +0200 | [diff] [blame] | 1 | /* SPDX-License-Identifier: Apache-2.0 |
| 2 | * Copyright(c) 2021 Cisco Systems, Inc. |
| 3 | */ |
| 4 | |
| 5 | #include <vppinfra/clib.h> |
| 6 | #ifndef included_memcpy_h |
| 7 | #define included_memcpy_h |
| 8 | |
Damjan Marion | bc0ef7a | 2022-02-10 14:31:59 +0100 | [diff] [blame] | 9 | static_always_inline void |
| 10 | clib_memcpy_may_overrun (void *dst, void *src, u32 n_bytes) |
| 11 | { |
| 12 | word n_left = n_bytes; |
| 13 | #if defined(CLIB_HAVE_VEC512) |
| 14 | u8x64u *sv = (u8x64u *) src; |
| 15 | u8x64u *dv = (u8x64u *) dst; |
| 16 | #elif defined(CLIB_HAVE_VEC256) |
| 17 | u8x32u *sv = (u8x32u *) src; |
| 18 | u8x32u *dv = (u8x32u *) dst; |
| 19 | #elif defined(CLIB_HAVE_VEC128) |
| 20 | u8x16u *sv = (u8x16u *) src; |
| 21 | u8x16u *dv = (u8x16u *) dst; |
| 22 | #else |
| 23 | u64u *sv = (u64u *) src; |
| 24 | u64u *dv = (u64u *) dst; |
| 25 | #endif |
| 26 | |
| 27 | while (n_left >= 4 * sizeof (sv[0])) |
| 28 | { |
| 29 | __typeof__ (*sv) v0, v1, v2, v3; |
| 30 | v0 = sv[0]; |
| 31 | v1 = sv[1]; |
| 32 | v2 = sv[2]; |
| 33 | v3 = sv[3]; |
| 34 | sv += 4; |
| 35 | n_left -= 4 * sizeof (sv[0]); |
| 36 | dv[0] = v0; |
| 37 | dv[1] = v1; |
| 38 | dv[2] = v2; |
| 39 | dv[3] = v3; |
| 40 | dv += 4; |
| 41 | } |
| 42 | |
| 43 | while (n_left > 0) |
| 44 | { |
| 45 | dv[0] = sv[0]; |
| 46 | sv += 1; |
| 47 | dv += 1; |
| 48 | n_left -= sizeof (sv[0]); |
| 49 | } |
| 50 | } |
| 51 | |
Damjan Marion | 83b2f5e | 2021-04-27 11:00:54 +0200 | [diff] [blame] | 52 | #ifndef __COVERITY__ |
| 53 | |
Damjan Marion | 856d062 | 2021-04-21 21:11:35 +0200 | [diff] [blame] | 54 | static_always_inline void |
| 55 | clib_memcpy_u32_x4 (u32 *dst, u32 *src) |
| 56 | { |
| 57 | #if defined(CLIB_HAVE_VEC128) |
| 58 | u32x4_store_unaligned (u32x4_load_unaligned (src), dst); |
| 59 | #else |
| 60 | clib_memcpy_fast (dst, src, 4 * sizeof (u32)); |
| 61 | #endif |
| 62 | } |
| 63 | static_always_inline void |
| 64 | clib_memcpy_u32_x8 (u32 *dst, u32 *src) |
| 65 | { |
| 66 | #if defined(CLIB_HAVE_VEC256) |
| 67 | u32x8_store_unaligned (u32x8_load_unaligned (src), dst); |
| 68 | #else |
| 69 | clib_memcpy_u32_x4 (dst, src); |
| 70 | clib_memcpy_u32_x4 (dst + 4, src + 4); |
| 71 | #endif |
| 72 | } |
| 73 | |
| 74 | static_always_inline void |
| 75 | clib_memcpy_u32_x16 (u32 *dst, u32 *src) |
| 76 | { |
| 77 | #if defined(CLIB_HAVE_VEC512) |
| 78 | u32x16_store_unaligned (u32x16_load_unaligned (src), dst); |
| 79 | #else |
| 80 | clib_memcpy_u32_x8 (dst, src); |
| 81 | clib_memcpy_u32_x8 (dst + 8, src + 8); |
| 82 | #endif |
| 83 | } |
| 84 | |
| 85 | static_always_inline void |
| 86 | clib_memcpy_u32 (u32 *dst, u32 *src, u32 n_left) |
| 87 | { |
| 88 | #if defined(CLIB_HAVE_VEC128) |
| 89 | if (COMPILE_TIME_CONST (n_left)) |
| 90 | { |
| 91 | /* for n_left defined as compile-time constant we should prevent compiler |
| 92 | * to use more expensive mask load/store for common cases where smaller |
| 93 | * register load/store exists */ |
| 94 | switch (n_left) |
| 95 | { |
| 96 | case 4: |
| 97 | clib_memcpy_u32_x4 (dst, src); |
| 98 | return; |
| 99 | case 8: |
| 100 | clib_memcpy_u32_x8 (dst, src); |
| 101 | return; |
| 102 | case 12: |
| 103 | clib_memcpy_u32_x8 (dst, src); |
| 104 | clib_memcpy_u32_x4 (dst + 8, src + 8); |
| 105 | return; |
| 106 | case 16: |
| 107 | clib_memcpy_u32_x16 (dst, src); |
| 108 | return; |
| 109 | case 32: |
| 110 | clib_memcpy_u32_x16 (dst, src); |
| 111 | clib_memcpy_u32_x16 (dst + 16, src + 16); |
| 112 | return; |
| 113 | case 64: |
| 114 | clib_memcpy_u32_x16 (dst, src); |
| 115 | clib_memcpy_u32_x16 (dst + 16, src + 16); |
| 116 | clib_memcpy_u32_x16 (dst + 32, src + 32); |
| 117 | clib_memcpy_u32_x16 (dst + 48, src + 48); |
| 118 | return; |
| 119 | default: |
| 120 | break; |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | #if defined(CLIB_HAVE_VEC512) |
| 125 | while (n_left >= 64) |
| 126 | { |
| 127 | clib_memcpy_u32_x16 (dst, src); |
| 128 | clib_memcpy_u32_x16 (dst + 16, src + 16); |
| 129 | clib_memcpy_u32_x16 (dst + 32, src + 32); |
| 130 | clib_memcpy_u32_x16 (dst + 48, src + 48); |
| 131 | dst += 64; |
| 132 | src += 64; |
| 133 | n_left -= 64; |
| 134 | } |
| 135 | #endif |
| 136 | |
| 137 | #if defined(CLIB_HAVE_VEC256) |
| 138 | while (n_left >= 32) |
| 139 | { |
| 140 | clib_memcpy_u32_x16 (dst, src); |
| 141 | clib_memcpy_u32_x16 (dst + 16, src + 16); |
| 142 | dst += 32; |
| 143 | src += 32; |
| 144 | n_left -= 32; |
| 145 | } |
| 146 | #endif |
| 147 | |
| 148 | while (n_left >= 16) |
| 149 | { |
| 150 | clib_memcpy_u32_x16 (dst, src); |
| 151 | dst += 16; |
| 152 | src += 16; |
| 153 | n_left -= 16; |
| 154 | } |
| 155 | |
| 156 | #if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE) |
| 157 | if (n_left) |
| 158 | { |
| 159 | u16 mask = pow2_mask (n_left); |
| 160 | u32x16_mask_store (u32x16_mask_load_zero (src, mask), dst, mask); |
| 161 | } |
| 162 | return; |
| 163 | #endif |
| 164 | |
| 165 | if (n_left >= 8) |
| 166 | { |
| 167 | clib_memcpy_u32_x8 (dst, src); |
| 168 | dst += 8; |
| 169 | src += 8; |
| 170 | n_left -= 8; |
| 171 | } |
| 172 | |
| 173 | #if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE) |
| 174 | if (n_left) |
| 175 | { |
| 176 | u8 mask = pow2_mask (n_left); |
| 177 | u32x8_mask_store (u32x8_mask_load_zero (src, mask), dst, mask); |
| 178 | } |
| 179 | return; |
| 180 | #endif |
| 181 | |
| 182 | if (n_left >= 4) |
| 183 | { |
| 184 | clib_memcpy_u32_x4 (dst, src); |
| 185 | dst += 4; |
| 186 | src += 4; |
| 187 | n_left -= 4; |
| 188 | } |
| 189 | #endif |
| 190 | |
| 191 | while (n_left) |
| 192 | { |
| 193 | dst[0] = src[0]; |
| 194 | dst += 1; |
| 195 | src += 1; |
| 196 | n_left -= 1; |
| 197 | } |
| 198 | } |
| 199 | |
Damjan Marion | 83b2f5e | 2021-04-27 11:00:54 +0200 | [diff] [blame] | 200 | #else /* __COVERITY__ */ |
| 201 | static_always_inline void |
| 202 | clib_memcpy_u32 (u32 *dst, u32 *src, u32 n_left) |
| 203 | { |
| 204 | memcpy (dst, src, n_left * sizeof (u32)); |
| 205 | } |
| 206 | #endif |
| 207 | |
Damjan Marion | 856d062 | 2021-04-21 21:11:35 +0200 | [diff] [blame] | 208 | #endif |