blob: e895cbf7485f2a52f7d4cce018500aa87885e46d [file] [log] [blame]
Damjan Marion856d0622021-04-21 21:11:35 +02001/* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
3 */
4
5#include <vppinfra/clib.h>
6#ifndef included_memcpy_h
7#define included_memcpy_h
8
Damjan Marionbc0ef7a2022-02-10 14:31:59 +01009static_always_inline void
10clib_memcpy_may_overrun (void *dst, void *src, u32 n_bytes)
11{
12 word n_left = n_bytes;
13#if defined(CLIB_HAVE_VEC512)
14 u8x64u *sv = (u8x64u *) src;
15 u8x64u *dv = (u8x64u *) dst;
16#elif defined(CLIB_HAVE_VEC256)
17 u8x32u *sv = (u8x32u *) src;
18 u8x32u *dv = (u8x32u *) dst;
19#elif defined(CLIB_HAVE_VEC128)
20 u8x16u *sv = (u8x16u *) src;
21 u8x16u *dv = (u8x16u *) dst;
22#else
23 u64u *sv = (u64u *) src;
24 u64u *dv = (u64u *) dst;
25#endif
26
27 while (n_left >= 4 * sizeof (sv[0]))
28 {
29 __typeof__ (*sv) v0, v1, v2, v3;
30 v0 = sv[0];
31 v1 = sv[1];
32 v2 = sv[2];
33 v3 = sv[3];
34 sv += 4;
35 n_left -= 4 * sizeof (sv[0]);
36 dv[0] = v0;
37 dv[1] = v1;
38 dv[2] = v2;
39 dv[3] = v3;
40 dv += 4;
41 }
42
43 while (n_left > 0)
44 {
45 dv[0] = sv[0];
46 sv += 1;
47 dv += 1;
48 n_left -= sizeof (sv[0]);
49 }
50}
51
Damjan Marion83b2f5e2021-04-27 11:00:54 +020052#ifndef __COVERITY__
53
Damjan Marion856d0622021-04-21 21:11:35 +020054static_always_inline void
55clib_memcpy_u32_x4 (u32 *dst, u32 *src)
56{
57#if defined(CLIB_HAVE_VEC128)
58 u32x4_store_unaligned (u32x4_load_unaligned (src), dst);
59#else
60 clib_memcpy_fast (dst, src, 4 * sizeof (u32));
61#endif
62}
63static_always_inline void
64clib_memcpy_u32_x8 (u32 *dst, u32 *src)
65{
66#if defined(CLIB_HAVE_VEC256)
67 u32x8_store_unaligned (u32x8_load_unaligned (src), dst);
68#else
69 clib_memcpy_u32_x4 (dst, src);
70 clib_memcpy_u32_x4 (dst + 4, src + 4);
71#endif
72}
73
74static_always_inline void
75clib_memcpy_u32_x16 (u32 *dst, u32 *src)
76{
77#if defined(CLIB_HAVE_VEC512)
78 u32x16_store_unaligned (u32x16_load_unaligned (src), dst);
79#else
80 clib_memcpy_u32_x8 (dst, src);
81 clib_memcpy_u32_x8 (dst + 8, src + 8);
82#endif
83}
84
85static_always_inline void
86clib_memcpy_u32 (u32 *dst, u32 *src, u32 n_left)
87{
88#if defined(CLIB_HAVE_VEC128)
89 if (COMPILE_TIME_CONST (n_left))
90 {
91 /* for n_left defined as compile-time constant we should prevent compiler
92 * to use more expensive mask load/store for common cases where smaller
93 * register load/store exists */
94 switch (n_left)
95 {
96 case 4:
97 clib_memcpy_u32_x4 (dst, src);
98 return;
99 case 8:
100 clib_memcpy_u32_x8 (dst, src);
101 return;
102 case 12:
103 clib_memcpy_u32_x8 (dst, src);
104 clib_memcpy_u32_x4 (dst + 8, src + 8);
105 return;
106 case 16:
107 clib_memcpy_u32_x16 (dst, src);
108 return;
109 case 32:
110 clib_memcpy_u32_x16 (dst, src);
111 clib_memcpy_u32_x16 (dst + 16, src + 16);
112 return;
113 case 64:
114 clib_memcpy_u32_x16 (dst, src);
115 clib_memcpy_u32_x16 (dst + 16, src + 16);
116 clib_memcpy_u32_x16 (dst + 32, src + 32);
117 clib_memcpy_u32_x16 (dst + 48, src + 48);
118 return;
119 default:
120 break;
121 }
122 }
123
124#if defined(CLIB_HAVE_VEC512)
125 while (n_left >= 64)
126 {
127 clib_memcpy_u32_x16 (dst, src);
128 clib_memcpy_u32_x16 (dst + 16, src + 16);
129 clib_memcpy_u32_x16 (dst + 32, src + 32);
130 clib_memcpy_u32_x16 (dst + 48, src + 48);
131 dst += 64;
132 src += 64;
133 n_left -= 64;
134 }
135#endif
136
137#if defined(CLIB_HAVE_VEC256)
138 while (n_left >= 32)
139 {
140 clib_memcpy_u32_x16 (dst, src);
141 clib_memcpy_u32_x16 (dst + 16, src + 16);
142 dst += 32;
143 src += 32;
144 n_left -= 32;
145 }
146#endif
147
148 while (n_left >= 16)
149 {
150 clib_memcpy_u32_x16 (dst, src);
151 dst += 16;
152 src += 16;
153 n_left -= 16;
154 }
155
156#if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
157 if (n_left)
158 {
159 u16 mask = pow2_mask (n_left);
160 u32x16_mask_store (u32x16_mask_load_zero (src, mask), dst, mask);
161 }
162 return;
163#endif
164
165 if (n_left >= 8)
166 {
167 clib_memcpy_u32_x8 (dst, src);
168 dst += 8;
169 src += 8;
170 n_left -= 8;
171 }
172
173#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
174 if (n_left)
175 {
176 u8 mask = pow2_mask (n_left);
177 u32x8_mask_store (u32x8_mask_load_zero (src, mask), dst, mask);
178 }
179 return;
180#endif
181
182 if (n_left >= 4)
183 {
184 clib_memcpy_u32_x4 (dst, src);
185 dst += 4;
186 src += 4;
187 n_left -= 4;
188 }
189#endif
190
191 while (n_left)
192 {
193 dst[0] = src[0];
194 dst += 1;
195 src += 1;
196 n_left -= 1;
197 }
198}
199
Damjan Marion83b2f5e2021-04-27 11:00:54 +0200200#else /* __COVERITY__ */
201static_always_inline void
202clib_memcpy_u32 (u32 *dst, u32 *src, u32 n_left)
203{
204 memcpy (dst, src, n_left * sizeof (u32));
205}
206#endif
207
Damjan Marion856d0622021-04-21 21:11:35 +0200208#endif