blob: adb6503f7115c0524d8a7bf7f2975077e587e9da [file] [log] [blame]
Damjan Mariond154a172021-07-13 21:12:41 +02001/* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
3 */
4
5#ifndef included_vector_compress_h
6#define included_vector_compress_h
7#include <vppinfra/clib.h>
8#include <vppinfra/memcpy.h>
9
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +020010static_always_inline u64 *
11clib_compress_u64_x64 (u64 *dst, u64 *src, u64 mask)
12{
13#if defined(CLIB_HAVE_VEC512_COMPRESS)
14 u64x8u *sv = (u64x8u *) src;
15 for (int i = 0; i < 8; i++)
16 {
17 u64x8_compress_store (sv[i], mask, dst);
18 dst += _popcnt32 ((u8) mask);
19 mask >>= 8;
20 }
21#elif defined(CLIB_HAVE_VEC256_COMPRESS)
22 u64x4u *sv = (u64x4u *) src;
23 for (int i = 0; i < 16; i++)
24 {
25 u64x4_compress_store (sv[i], mask, dst);
26 dst += _popcnt32 (((u8) mask) & 0x0f);
27 mask >>= 4;
28 }
29#else
30 while (mask)
31 {
32 u16 bit = count_trailing_zeros (mask);
33 mask = clear_lowest_set_bit (mask);
34 dst++[0] = src[bit];
35 }
36#endif
37 return dst;
38}
39
40/** \brief Compress array of 64-bit elemments into destination array based on
41 * mask
42
43 @param dst destination array of u64 elements
44 @param src source array of u64 elements
45 @param mask array of u64 values representing compress mask
46 @param n_elts number of elements in the source array
47 @return number of elements stored in destionation array
48*/
49
50static_always_inline u32
51clib_compress_u64 (u64 *dst, u64 *src, u64 *mask, u32 n_elts)
52{
53 u64 *dst0 = dst;
54 while (n_elts >= 64)
55 {
56 if (mask[0] == ~0ULL)
57 {
58 clib_memcpy_fast (dst, src, 64 * sizeof (u64));
59 dst += 64;
60 }
61 else
62 dst = clib_compress_u64_x64 (dst, src, mask[0]);
63
64 mask++;
65 src += 64;
66 n_elts -= 64;
67 }
68
69 if (PREDICT_TRUE (n_elts == 0))
70 return dst - dst0;
71
72 return clib_compress_u64_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0;
73}
74
Damjan Mariond154a172021-07-13 21:12:41 +020075static_always_inline u32 *
76clib_compress_u32_x64 (u32 *dst, u32 *src, u64 mask)
77{
78#if defined(CLIB_HAVE_VEC512_COMPRESS)
79 u32x16u *sv = (u32x16u *) src;
80 for (int i = 0; i < 4; i++)
81 {
Damjan Mariond154a172021-07-13 21:12:41 +020082 u32x16_compress_store (sv[i], mask, dst);
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +020083 dst += _popcnt32 ((u16) mask);
Damjan Mariond154a172021-07-13 21:12:41 +020084 mask >>= 16;
85 }
86
87#elif defined(CLIB_HAVE_VEC256_COMPRESS)
88 u32x8u *sv = (u32x8u *) src;
89 for (int i = 0; i < 8; i++)
90 {
Damjan Mariond154a172021-07-13 21:12:41 +020091 u32x8_compress_store (sv[i], mask, dst);
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +020092 dst += _popcnt32 ((u8) mask);
Damjan Mariond154a172021-07-13 21:12:41 +020093 mask >>= 8;
94 }
95#else
96 while (mask)
97 {
98 u16 bit = count_trailing_zeros (mask);
99 mask = clear_lowest_set_bit (mask);
100 dst++[0] = src[bit];
101 }
102#endif
103 return dst;
104}
105
106/** \brief Compress array of 32-bit elemments into destination array based on
107 * mask
108
109 @param dst destination array of u32 elements
110 @param src source array of u32 elements
111 @param mask array of u64 values representing compress mask
112 @param n_elts number of elements in the source array
113 @return number of elements stored in destionation array
114*/
115
116static_always_inline u32
117clib_compress_u32 (u32 *dst, u32 *src, u64 *mask, u32 n_elts)
118{
119 u32 *dst0 = dst;
120 while (n_elts >= 64)
121 {
122 if (mask[0] == ~0ULL)
123 {
124 clib_memcpy_u32 (dst, src, 64);
125 dst += 64;
126 }
127 else
128 dst = clib_compress_u32_x64 (dst, src, mask[0]);
129
130 mask++;
131 src += 64;
132 n_elts -= 64;
133 }
134
135 if (PREDICT_TRUE (n_elts == 0))
136 return dst - dst0;
137
138 return clib_compress_u32_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0;
139}
140
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +0200141static_always_inline u16 *
142clib_compress_u16_x64 (u16 *dst, u16 *src, u64 mask)
143{
144#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16)
145 u16x32u *sv = (u16x32u *) src;
146 for (int i = 0; i < 2; i++)
147 {
148 u16x32_compress_store (sv[i], mask, dst);
149 dst += _popcnt32 ((u32) mask);
150 mask >>= 32;
151 }
152#else
153 while (mask)
154 {
155 u16 bit = count_trailing_zeros (mask);
156 mask = clear_lowest_set_bit (mask);
157 dst++[0] = src[bit];
158 }
159#endif
160 return dst;
161}
162
163/** \brief Compress array of 16-bit elemments into destination array based on
164 * mask
165
166 @param dst destination array of u16 elements
167 @param src source array of u16 elements
168 @param mask array of u64 values representing compress mask
169 @param n_elts number of elements in the source array
170 @return number of elements stored in destionation array
171*/
172
173static_always_inline u32
174clib_compress_u16 (u16 *dst, u16 *src, u64 *mask, u32 n_elts)
175{
176 u16 *dst0 = dst;
177 while (n_elts >= 64)
178 {
179 if (mask[0] == ~0ULL)
180 {
181 clib_memcpy_fast (dst, src, 64 * sizeof (u16));
182 dst += 64;
183 }
184 else
185 dst = clib_compress_u16_x64 (dst, src, mask[0]);
186
187 mask++;
188 src += 64;
189 n_elts -= 64;
190 }
191
192 if (PREDICT_TRUE (n_elts == 0))
193 return dst - dst0;
194
195 return clib_compress_u16_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0;
196}
197
198static_always_inline u8 *
199clib_compress_u8_x64 (u8 *dst, u8 *src, u64 mask)
200{
201#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16)
202 u8x64u *sv = (u8x64u *) src;
203 u8x64_compress_store (sv[0], mask, dst);
204 dst += _popcnt64 (mask);
205#else
206 while (mask)
207 {
208 u16 bit = count_trailing_zeros (mask);
209 mask = clear_lowest_set_bit (mask);
210 dst++[0] = src[bit];
211 }
212#endif
213 return dst;
214}
215
216/** \brief Compress array of 8-bit elemments into destination array based on
217 * mask
218
219 @param dst destination array of u8 elements
220 @param src source array of u8 elements
221 @param mask array of u64 values representing compress mask
222 @param n_elts number of elements in the source array
223 @return number of elements stored in destionation array
224*/
225
226static_always_inline u32
227clib_compress_u8 (u8 *dst, u8 *src, u64 *mask, u32 n_elts)
228{
229 u8 *dst0 = dst;
230 while (n_elts >= 64)
231 {
232 if (mask[0] == ~0ULL)
233 {
234 clib_memcpy_fast (dst, src, 64);
235 dst += 64;
236 }
237 else
238 dst = clib_compress_u8_x64 (dst, src, mask[0]);
239
240 mask++;
241 src += 64;
242 n_elts -= 64;
243 }
244
245 if (PREDICT_TRUE (n_elts == 0))
246 return dst - dst0;
247
248 return clib_compress_u8_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0;
249}
250
Damjan Mariond154a172021-07-13 21:12:41 +0200251#endif