blob: 5429113984ba77f87a45f1ea20cc64f7e0334a80 [file] [log] [blame]
Damjan Mariond154a172021-07-13 21:12:41 +02001/* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
3 */
4
5#ifndef included_vector_compress_h
6#define included_vector_compress_h
7#include <vppinfra/clib.h>
8#include <vppinfra/memcpy.h>
9
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +020010static_always_inline u64 *
11clib_compress_u64_x64 (u64 *dst, u64 *src, u64 mask)
12{
13#if defined(CLIB_HAVE_VEC512_COMPRESS)
14 u64x8u *sv = (u64x8u *) src;
15 for (int i = 0; i < 8; i++)
16 {
17 u64x8_compress_store (sv[i], mask, dst);
18 dst += _popcnt32 ((u8) mask);
19 mask >>= 8;
20 }
21#elif defined(CLIB_HAVE_VEC256_COMPRESS)
22 u64x4u *sv = (u64x4u *) src;
23 for (int i = 0; i < 16; i++)
24 {
25 u64x4_compress_store (sv[i], mask, dst);
26 dst += _popcnt32 (((u8) mask) & 0x0f);
27 mask >>= 4;
28 }
29#else
Damjan Marion7b90f662022-01-13 00:28:14 +010030 u32 i;
31 foreach_set_bit_index (i, mask)
32 dst++[0] = src[i];
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +020033#endif
34 return dst;
35}
36
Dmitry Valter31502502024-03-06 22:43:27 +000037static_always_inline u64 *
38clib_compress_u64_x64_masked (u64 *dst, u64 *src, u64 mask)
39{
40#if defined(CLIB_HAVE_VEC512_COMPRESS) && \
41 defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
42 u64x8u *sv = (u64x8u *) src;
43 for (int i = 0; i < 8; i++)
44 {
45 u64x8u s = u64x8_mask_load_zero (&sv[i], mask);
46 u64x8_compress_store (s, mask, dst);
47 dst += _popcnt32 ((u8) mask);
48 mask >>= 8;
49 }
50#elif defined(CLIB_HAVE_VEC256_COMPRESS) && \
51 defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
52 u64x4u *sv = (u64x4u *) src;
53 for (int i = 0; i < 16; i++)
54 {
55 u64x4u s = u64x4_mask_load_zero (&sv[i], mask);
56 u64x4_compress_store (s, mask, dst);
57 dst += _popcnt32 (((u8) mask) & 0x0f);
58 mask >>= 4;
59 }
60#else
61 u32 i;
62 foreach_set_bit_index (i, mask)
63 dst++[0] = src[i];
64#endif
65 return dst;
66}
67
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +020068/** \brief Compress array of 64-bit elemments into destination array based on
69 * mask
70
71 @param dst destination array of u64 elements
72 @param src source array of u64 elements
73 @param mask array of u64 values representing compress mask
74 @param n_elts number of elements in the source array
75 @return number of elements stored in destionation array
76*/
77
78static_always_inline u32
79clib_compress_u64 (u64 *dst, u64 *src, u64 *mask, u32 n_elts)
80{
81 u64 *dst0 = dst;
82 while (n_elts >= 64)
83 {
84 if (mask[0] == ~0ULL)
85 {
86 clib_memcpy_fast (dst, src, 64 * sizeof (u64));
87 dst += 64;
88 }
89 else
90 dst = clib_compress_u64_x64 (dst, src, mask[0]);
91
92 mask++;
93 src += 64;
94 n_elts -= 64;
95 }
96
97 if (PREDICT_TRUE (n_elts == 0))
98 return dst - dst0;
99
Dmitry Valter31502502024-03-06 22:43:27 +0000100 return clib_compress_u64_x64_masked (dst, src,
101 mask[0] & pow2_mask (n_elts)) -
102 dst0;
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +0200103}
104
Damjan Mariond154a172021-07-13 21:12:41 +0200105static_always_inline u32 *
106clib_compress_u32_x64 (u32 *dst, u32 *src, u64 mask)
107{
108#if defined(CLIB_HAVE_VEC512_COMPRESS)
109 u32x16u *sv = (u32x16u *) src;
110 for (int i = 0; i < 4; i++)
111 {
Damjan Mariond154a172021-07-13 21:12:41 +0200112 u32x16_compress_store (sv[i], mask, dst);
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +0200113 dst += _popcnt32 ((u16) mask);
Damjan Mariond154a172021-07-13 21:12:41 +0200114 mask >>= 16;
115 }
116
117#elif defined(CLIB_HAVE_VEC256_COMPRESS)
118 u32x8u *sv = (u32x8u *) src;
119 for (int i = 0; i < 8; i++)
120 {
Damjan Mariond154a172021-07-13 21:12:41 +0200121 u32x8_compress_store (sv[i], mask, dst);
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +0200122 dst += _popcnt32 ((u8) mask);
Damjan Mariond154a172021-07-13 21:12:41 +0200123 mask >>= 8;
124 }
125#else
Damjan Marion7b90f662022-01-13 00:28:14 +0100126 u32 i;
127 foreach_set_bit_index (i, mask)
128 dst++[0] = src[i];
Damjan Mariond154a172021-07-13 21:12:41 +0200129#endif
130 return dst;
131}
132
Dmitry Valter31502502024-03-06 22:43:27 +0000133static_always_inline u32 *
134clib_compress_u32_x64_masked (u32 *dst, u32 *src, u64 mask)
135{
136#if defined(CLIB_HAVE_VEC512_COMPRESS) && \
137 defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
138 u32x16u *sv = (u32x16u *) src;
139 for (int i = 0; i < 4; i++)
140 {
141 u32x16u s = u32x16_mask_load_zero (&sv[i], mask);
142 u32x16_compress_store (s, mask, dst);
143 dst += _popcnt32 ((u16) mask);
144 mask >>= 16;
145 }
146
147#elif defined(CLIB_HAVE_VEC256_COMPRESS) && \
148 defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
149 u32x8u *sv = (u32x8u *) src;
150 for (int i = 0; i < 8; i++)
151 {
152 u32x8u s = u32x8_mask_load_zero (&sv[i], mask);
153 u32x8_compress_store (s, mask, dst);
154 dst += _popcnt32 ((u8) mask);
155 mask >>= 8;
156 }
157#else
158 u32 i;
159 foreach_set_bit_index (i, mask)
160 dst++[0] = src[i];
161#endif
162 return dst;
163}
164
Damjan Mariond154a172021-07-13 21:12:41 +0200165/** \brief Compress array of 32-bit elemments into destination array based on
166 * mask
167
168 @param dst destination array of u32 elements
169 @param src source array of u32 elements
170 @param mask array of u64 values representing compress mask
171 @param n_elts number of elements in the source array
172 @return number of elements stored in destionation array
173*/
174
175static_always_inline u32
176clib_compress_u32 (u32 *dst, u32 *src, u64 *mask, u32 n_elts)
177{
178 u32 *dst0 = dst;
179 while (n_elts >= 64)
180 {
181 if (mask[0] == ~0ULL)
182 {
183 clib_memcpy_u32 (dst, src, 64);
184 dst += 64;
185 }
186 else
187 dst = clib_compress_u32_x64 (dst, src, mask[0]);
188
189 mask++;
190 src += 64;
191 n_elts -= 64;
192 }
193
194 if (PREDICT_TRUE (n_elts == 0))
195 return dst - dst0;
196
Dmitry Valter31502502024-03-06 22:43:27 +0000197 return clib_compress_u32_x64_masked (dst, src,
198 mask[0] & pow2_mask (n_elts)) -
199 dst0;
Damjan Mariond154a172021-07-13 21:12:41 +0200200}
201
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +0200202static_always_inline u16 *
203clib_compress_u16_x64 (u16 *dst, u16 *src, u64 mask)
204{
205#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16)
206 u16x32u *sv = (u16x32u *) src;
207 for (int i = 0; i < 2; i++)
208 {
209 u16x32_compress_store (sv[i], mask, dst);
210 dst += _popcnt32 ((u32) mask);
211 mask >>= 32;
212 }
213#else
Damjan Marion7b90f662022-01-13 00:28:14 +0100214 u32 i;
215 foreach_set_bit_index (i, mask)
216 dst++[0] = src[i];
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +0200217#endif
218 return dst;
219}
220
Dmitry Valter31502502024-03-06 22:43:27 +0000221static_always_inline u16 *
222clib_compress_u16_x64_masked (u16 *dst, u16 *src, u64 mask)
223{
224#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16) && \
225 defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
226 u16x32u *sv = (u16x32u *) src;
227 for (int i = 0; i < 2; i++)
228 {
229 u16x32u s = u16x32_mask_load_zero (&sv[i], mask);
230 u16x32_compress_store (s, mask, dst);
231 dst += _popcnt32 ((u32) mask);
232 mask >>= 32;
233 }
234#else
235 u32 i;
236 foreach_set_bit_index (i, mask)
237 dst++[0] = src[i];
238#endif
239 return dst;
240}
241
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +0200242/** \brief Compress array of 16-bit elemments into destination array based on
243 * mask
244
245 @param dst destination array of u16 elements
246 @param src source array of u16 elements
247 @param mask array of u64 values representing compress mask
248 @param n_elts number of elements in the source array
249 @return number of elements stored in destionation array
250*/
251
252static_always_inline u32
253clib_compress_u16 (u16 *dst, u16 *src, u64 *mask, u32 n_elts)
254{
255 u16 *dst0 = dst;
256 while (n_elts >= 64)
257 {
258 if (mask[0] == ~0ULL)
259 {
260 clib_memcpy_fast (dst, src, 64 * sizeof (u16));
261 dst += 64;
262 }
263 else
264 dst = clib_compress_u16_x64 (dst, src, mask[0]);
265
266 mask++;
267 src += 64;
268 n_elts -= 64;
269 }
270
271 if (PREDICT_TRUE (n_elts == 0))
272 return dst - dst0;
273
Dmitry Valter31502502024-03-06 22:43:27 +0000274 return clib_compress_u16_x64_masked (dst, src,
275 mask[0] & pow2_mask (n_elts)) -
276 dst0;
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +0200277}
278
279static_always_inline u8 *
280clib_compress_u8_x64 (u8 *dst, u8 *src, u64 mask)
281{
282#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16)
283 u8x64u *sv = (u8x64u *) src;
284 u8x64_compress_store (sv[0], mask, dst);
285 dst += _popcnt64 (mask);
286#else
Damjan Marion7b90f662022-01-13 00:28:14 +0100287 u32 i;
288 foreach_set_bit_index (i, mask)
289 dst++[0] = src[i];
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +0200290#endif
291 return dst;
292}
293
Dmitry Valter31502502024-03-06 22:43:27 +0000294static_always_inline u8 *
295clib_compress_u8_x64_masked (u8 *dst, u8 *src, u64 mask)
296{
297#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16) && \
298 defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
299 u8x64u *sv = (u8x64u *) src;
300 u8x64u s = u8x64_mask_load_zero (sv, mask);
301 u8x64_compress_store (s, mask, dst);
302 dst += _popcnt64 (mask);
303#else
304 u32 i;
305 foreach_set_bit_index (i, mask)
306 dst++[0] = src[i];
307#endif
308 return dst;
309}
310
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +0200311/** \brief Compress array of 8-bit elemments into destination array based on
312 * mask
313
314 @param dst destination array of u8 elements
315 @param src source array of u8 elements
316 @param mask array of u64 values representing compress mask
317 @param n_elts number of elements in the source array
318 @return number of elements stored in destionation array
319*/
320
321static_always_inline u32
322clib_compress_u8 (u8 *dst, u8 *src, u64 *mask, u32 n_elts)
323{
324 u8 *dst0 = dst;
325 while (n_elts >= 64)
326 {
327 if (mask[0] == ~0ULL)
328 {
329 clib_memcpy_fast (dst, src, 64);
330 dst += 64;
331 }
332 else
333 dst = clib_compress_u8_x64 (dst, src, mask[0]);
334
335 mask++;
336 src += 64;
337 n_elts -= 64;
338 }
339
340 if (PREDICT_TRUE (n_elts == 0))
341 return dst - dst0;
342
Dmitry Valter31502502024-03-06 22:43:27 +0000343 return clib_compress_u8_x64_masked (dst, src, mask[0] & pow2_mask (n_elts)) -
344 dst0;
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +0200345}
346
Damjan Mariond154a172021-07-13 21:12:41 +0200347#endif