blob: d2ed716ac8ea1c35612ec45f172de856e74deb21 [file] [log] [blame]
Damjan Mariond154a172021-07-13 21:12:41 +02001/* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
3 */
4
5#ifndef included_vector_compress_h
6#define included_vector_compress_h
7#include <vppinfra/clib.h>
8#include <vppinfra/memcpy.h>
9
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +020010static_always_inline u64 *
11clib_compress_u64_x64 (u64 *dst, u64 *src, u64 mask)
12{
13#if defined(CLIB_HAVE_VEC512_COMPRESS)
14 u64x8u *sv = (u64x8u *) src;
15 for (int i = 0; i < 8; i++)
16 {
17 u64x8_compress_store (sv[i], mask, dst);
18 dst += _popcnt32 ((u8) mask);
19 mask >>= 8;
20 }
21#elif defined(CLIB_HAVE_VEC256_COMPRESS)
22 u64x4u *sv = (u64x4u *) src;
23 for (int i = 0; i < 16; i++)
24 {
25 u64x4_compress_store (sv[i], mask, dst);
26 dst += _popcnt32 (((u8) mask) & 0x0f);
27 mask >>= 4;
28 }
29#else
Damjan Marion7b90f662022-01-13 00:28:14 +010030 u32 i;
31 foreach_set_bit_index (i, mask)
32 dst++[0] = src[i];
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +020033#endif
34 return dst;
35}
36
37/** \brief Compress array of 64-bit elemments into destination array based on
38 * mask
39
40 @param dst destination array of u64 elements
41 @param src source array of u64 elements
42 @param mask array of u64 values representing compress mask
43 @param n_elts number of elements in the source array
44 @return number of elements stored in destionation array
45*/
46
47static_always_inline u32
48clib_compress_u64 (u64 *dst, u64 *src, u64 *mask, u32 n_elts)
49{
50 u64 *dst0 = dst;
51 while (n_elts >= 64)
52 {
53 if (mask[0] == ~0ULL)
54 {
55 clib_memcpy_fast (dst, src, 64 * sizeof (u64));
56 dst += 64;
57 }
58 else
59 dst = clib_compress_u64_x64 (dst, src, mask[0]);
60
61 mask++;
62 src += 64;
63 n_elts -= 64;
64 }
65
66 if (PREDICT_TRUE (n_elts == 0))
67 return dst - dst0;
68
69 return clib_compress_u64_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0;
70}
71
Damjan Mariond154a172021-07-13 21:12:41 +020072static_always_inline u32 *
73clib_compress_u32_x64 (u32 *dst, u32 *src, u64 mask)
74{
75#if defined(CLIB_HAVE_VEC512_COMPRESS)
76 u32x16u *sv = (u32x16u *) src;
77 for (int i = 0; i < 4; i++)
78 {
Damjan Mariond154a172021-07-13 21:12:41 +020079 u32x16_compress_store (sv[i], mask, dst);
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +020080 dst += _popcnt32 ((u16) mask);
Damjan Mariond154a172021-07-13 21:12:41 +020081 mask >>= 16;
82 }
83
84#elif defined(CLIB_HAVE_VEC256_COMPRESS)
85 u32x8u *sv = (u32x8u *) src;
86 for (int i = 0; i < 8; i++)
87 {
Damjan Mariond154a172021-07-13 21:12:41 +020088 u32x8_compress_store (sv[i], mask, dst);
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +020089 dst += _popcnt32 ((u8) mask);
Damjan Mariond154a172021-07-13 21:12:41 +020090 mask >>= 8;
91 }
92#else
Damjan Marion7b90f662022-01-13 00:28:14 +010093 u32 i;
94 foreach_set_bit_index (i, mask)
95 dst++[0] = src[i];
Damjan Mariond154a172021-07-13 21:12:41 +020096#endif
97 return dst;
98}
99
100/** \brief Compress array of 32-bit elemments into destination array based on
101 * mask
102
103 @param dst destination array of u32 elements
104 @param src source array of u32 elements
105 @param mask array of u64 values representing compress mask
106 @param n_elts number of elements in the source array
107 @return number of elements stored in destionation array
108*/
109
110static_always_inline u32
111clib_compress_u32 (u32 *dst, u32 *src, u64 *mask, u32 n_elts)
112{
113 u32 *dst0 = dst;
114 while (n_elts >= 64)
115 {
116 if (mask[0] == ~0ULL)
117 {
118 clib_memcpy_u32 (dst, src, 64);
119 dst += 64;
120 }
121 else
122 dst = clib_compress_u32_x64 (dst, src, mask[0]);
123
124 mask++;
125 src += 64;
126 n_elts -= 64;
127 }
128
129 if (PREDICT_TRUE (n_elts == 0))
130 return dst - dst0;
131
132 return clib_compress_u32_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0;
133}
134
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +0200135static_always_inline u16 *
136clib_compress_u16_x64 (u16 *dst, u16 *src, u64 mask)
137{
138#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16)
139 u16x32u *sv = (u16x32u *) src;
140 for (int i = 0; i < 2; i++)
141 {
142 u16x32_compress_store (sv[i], mask, dst);
143 dst += _popcnt32 ((u32) mask);
144 mask >>= 32;
145 }
146#else
Damjan Marion7b90f662022-01-13 00:28:14 +0100147 u32 i;
148 foreach_set_bit_index (i, mask)
149 dst++[0] = src[i];
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +0200150#endif
151 return dst;
152}
153
154/** \brief Compress array of 16-bit elemments into destination array based on
155 * mask
156
157 @param dst destination array of u16 elements
158 @param src source array of u16 elements
159 @param mask array of u64 values representing compress mask
160 @param n_elts number of elements in the source array
161 @return number of elements stored in destionation array
162*/
163
164static_always_inline u32
165clib_compress_u16 (u16 *dst, u16 *src, u64 *mask, u32 n_elts)
166{
167 u16 *dst0 = dst;
168 while (n_elts >= 64)
169 {
170 if (mask[0] == ~0ULL)
171 {
172 clib_memcpy_fast (dst, src, 64 * sizeof (u16));
173 dst += 64;
174 }
175 else
176 dst = clib_compress_u16_x64 (dst, src, mask[0]);
177
178 mask++;
179 src += 64;
180 n_elts -= 64;
181 }
182
183 if (PREDICT_TRUE (n_elts == 0))
184 return dst - dst0;
185
186 return clib_compress_u16_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0;
187}
188
189static_always_inline u8 *
190clib_compress_u8_x64 (u8 *dst, u8 *src, u64 mask)
191{
192#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16)
193 u8x64u *sv = (u8x64u *) src;
194 u8x64_compress_store (sv[0], mask, dst);
195 dst += _popcnt64 (mask);
196#else
Damjan Marion7b90f662022-01-13 00:28:14 +0100197 u32 i;
198 foreach_set_bit_index (i, mask)
199 dst++[0] = src[i];
Mohsin Kazmi7da9b5b2021-08-27 18:57:16 +0200200#endif
201 return dst;
202}
203
204/** \brief Compress array of 8-bit elemments into destination array based on
205 * mask
206
207 @param dst destination array of u8 elements
208 @param src source array of u8 elements
209 @param mask array of u64 values representing compress mask
210 @param n_elts number of elements in the source array
211 @return number of elements stored in destionation array
212*/
213
214static_always_inline u32
215clib_compress_u8 (u8 *dst, u8 *src, u64 *mask, u32 n_elts)
216{
217 u8 *dst0 = dst;
218 while (n_elts >= 64)
219 {
220 if (mask[0] == ~0ULL)
221 {
222 clib_memcpy_fast (dst, src, 64);
223 dst += 64;
224 }
225 else
226 dst = clib_compress_u8_x64 (dst, src, mask[0]);
227
228 mask++;
229 src += 64;
230 n_elts -= 64;
231 }
232
233 if (PREDICT_TRUE (n_elts == 0))
234 return dst - dst0;
235
236 return clib_compress_u8_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0;
237}
238
Damjan Mariond154a172021-07-13 21:12:41 +0200239#endif