blob: a2aeecd9ba033daa851cda9184a8977e2caadf5a [file] [log] [blame]
Damjan Marion4c276f02021-11-06 13:17:31 +01001/* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
3 */
4
5#ifndef included_vector_count_equal_h
6#define included_vector_count_equal_h
7#include <vppinfra/clib.h>
8
9static_always_inline uword
10clib_count_equal_u64 (u64 *data, uword max_count)
11{
12 uword count;
13 u64 first;
14
15 if (max_count <= 1)
16 return max_count;
17 if (data[0] != data[1])
18 return 1;
19
20 count = 0;
21 first = data[0];
22
23#if defined(CLIB_HAVE_VEC256)
24 u64x4 splat = u64x4_splat (first);
25 while (count + 3 < max_count)
26 {
27 u64 bmp;
28 bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
29 if (bmp != 0xffffffff)
30 {
31 count += count_trailing_zeros (~bmp) / 8;
32 return count;
33 }
34
35 data += 4;
36 count += 4;
37 }
38#else
39 count += 2;
40 data += 2;
41 while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
42 (data[2] ^ first) | (data[3] ^ first)) == 0)
43 {
44 data += 4;
45 count += 4;
46 }
47#endif
48 while (count < max_count && (data[0] == first))
49 {
50 data += 1;
51 count += 1;
52 }
53 return count;
54}
55
56static_always_inline uword
57clib_count_equal_u32 (u32 *data, uword max_count)
58{
59 uword count;
60 u32 first;
61
62 if (max_count <= 1)
63 return max_count;
64 if (data[0] != data[1])
65 return 1;
66
67 count = 0;
68 first = data[0];
69
Damjan Marion7459be12021-11-05 20:44:09 +010070#if defined(CLIB_HAVE_VEC512)
71 u32x16 splat = u32x16_splat (first);
72 while (count + 15 < max_count)
73 {
74 u32 bmp;
75 bmp = u32x16_is_equal_mask (u32x16_load_unaligned (data), splat);
76 if (bmp != pow2_mask (16))
77 return count + count_trailing_zeros (~bmp);
78
79 data += 16;
80 count += 16;
81 }
82 if (count == max_count)
83 return count;
84 else
85 {
86 u32 mask = pow2_mask (max_count - count);
87 u32 bmp =
88 u32x16_is_equal_mask (u32x16_mask_load_zero (data, mask), splat);
89 return count + count_trailing_zeros (~bmp);
90 }
91#elif defined(CLIB_HAVE_VEC256)
Damjan Marion4c276f02021-11-06 13:17:31 +010092 u32x8 splat = u32x8_splat (first);
93 while (count + 7 < max_count)
94 {
Damjan Marion7459be12021-11-05 20:44:09 +010095 u32 bmp;
96#ifdef __AVX512F__
97 bmp = u32x8_is_equal_mask (u32x8_load_unaligned (data), splat);
98 if (bmp != pow2_mask (8))
99 return count + count_trailing_zeros (~bmp);
100#else
Damjan Marion4c276f02021-11-06 13:17:31 +0100101 bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
102 if (bmp != 0xffffffff)
Damjan Marion7459be12021-11-05 20:44:09 +0100103 return count + count_trailing_zeros (~bmp) / 4;
104#endif
Damjan Marion4c276f02021-11-06 13:17:31 +0100105
106 data += 8;
107 count += 8;
108 }
Damjan Marion7459be12021-11-05 20:44:09 +0100109 if (count == max_count)
110 return count;
111#if defined(CxLIB_HAVE_VEC256_MASK_LOAD_STORE)
112 else
113 {
114 u32 mask = pow2_mask (max_count - count);
115 u32 bmp = u32x8_is_equal_mask (u32x8_mask_load_zero (data, mask), splat);
116 return count + count_trailing_zeros (~bmp);
117 }
118#endif
Damjan Marion4c276f02021-11-06 13:17:31 +0100119#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
120 u32x4 splat = u32x4_splat (first);
121 while (count + 3 < max_count)
122 {
123 u64 bmp;
124 bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
Damjan Marion7459be12021-11-05 20:44:09 +0100125 if (bmp != pow2_mask (4 * 4))
Damjan Marion4c276f02021-11-06 13:17:31 +0100126 {
127 count += count_trailing_zeros (~bmp) / 4;
128 return count;
129 }
130
131 data += 4;
132 count += 4;
133 }
134#else
135 count += 2;
136 data += 2;
137 while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
138 (data[2] ^ first) | (data[3] ^ first)) == 0)
139 {
140 data += 4;
141 count += 4;
142 }
143#endif
144 while (count < max_count && (data[0] == first))
145 {
146 data += 1;
147 count += 1;
148 }
149 return count;
150}
151
152static_always_inline uword
153clib_count_equal_u16 (u16 *data, uword max_count)
154{
155 uword count;
156 u16 first;
157
158 if (max_count <= 1)
159 return max_count;
160 if (data[0] != data[1])
161 return 1;
162
163 count = 0;
164 first = data[0];
165
166#if defined(CLIB_HAVE_VEC256)
167 u16x16 splat = u16x16_splat (first);
168 while (count + 15 < max_count)
169 {
170 u64 bmp;
171 bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
172 if (bmp != 0xffffffff)
173 {
174 count += count_trailing_zeros (~bmp) / 2;
175 return count;
176 }
177
178 data += 16;
179 count += 16;
180 }
181#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
182 u16x8 splat = u16x8_splat (first);
183 while (count + 7 < max_count)
184 {
185 u64 bmp;
186 bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
187 if (bmp != 0xffff)
188 {
189 count += count_trailing_zeros (~bmp) / 2;
190 return count;
191 }
192
193 data += 8;
194 count += 8;
195 }
196#else
197 count += 2;
198 data += 2;
199 while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
200 (data[2] ^ first) | (data[3] ^ first)) == 0)
201 {
202 data += 4;
203 count += 4;
204 }
205#endif
206 while (count < max_count && (data[0] == first))
207 {
208 data += 1;
209 count += 1;
210 }
211 return count;
212}
213
214static_always_inline uword
215clib_count_equal_u8 (u8 *data, uword max_count)
216{
217 uword count;
218 u8 first;
219
220 if (max_count <= 1)
221 return max_count;
222 if (data[0] != data[1])
223 return 1;
224
225 count = 0;
226 first = data[0];
227
Damjan Marion7459be12021-11-05 20:44:09 +0100228#if defined(CLIB_HAVE_VEC512)
229 u8x64 splat = u8x64_splat (first);
230 while (count + 63 < max_count)
231 {
232 u64 bmp;
233 bmp = u8x64_is_equal_mask (u8x64_load_unaligned (data), splat);
234 if (bmp != -1)
235 return count + count_trailing_zeros (~bmp);
236
237 data += 64;
238 count += 64;
239 }
240 if (count == max_count)
241 return count;
242#if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
243 else
244 {
245 u64 mask = pow2_mask (max_count - count);
246 u64 bmp = u8x64_is_equal_mask (u8x64_mask_load_zero (data, mask), splat);
247 return count + count_trailing_zeros (~bmp);
248 }
249#endif
250#elif defined(CLIB_HAVE_VEC256)
Damjan Marion4c276f02021-11-06 13:17:31 +0100251 u8x32 splat = u8x32_splat (first);
252 while (count + 31 < max_count)
253 {
254 u64 bmp;
255 bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
256 if (bmp != 0xffffffff)
Damjan Marion7459be12021-11-05 20:44:09 +0100257 return count + count_trailing_zeros (~bmp);
Damjan Marion4c276f02021-11-06 13:17:31 +0100258
259 data += 32;
260 count += 32;
261 }
Damjan Marion7459be12021-11-05 20:44:09 +0100262 if (count == max_count)
263 return count;
264#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
265 else
266 {
267 u32 mask = pow2_mask (max_count - count);
268 u64 bmp = u8x32_msb_mask (u8x32_mask_load_zero (data, mask) == splat);
269 return count + count_trailing_zeros (~bmp);
270 }
271#endif
Damjan Marion4c276f02021-11-06 13:17:31 +0100272#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
273 u8x16 splat = u8x16_splat (first);
274 while (count + 15 < max_count)
275 {
276 u64 bmp;
277 bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
278 if (bmp != 0xffff)
Damjan Marion7459be12021-11-05 20:44:09 +0100279 return count + count_trailing_zeros (~bmp);
Damjan Marion4c276f02021-11-06 13:17:31 +0100280
281 data += 16;
282 count += 16;
283 }
284#else
285 count += 2;
286 data += 2;
287 while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
288 (data[2] ^ first) | (data[3] ^ first)) == 0)
289 {
290 data += 4;
291 count += 4;
292 }
293#endif
294 while (count < max_count && (data[0] == first))
295 {
296 data += 1;
297 count += 1;
298 }
299 return count;
300}
Damjan Marion7459be12021-11-05 20:44:09 +0100301
Damjan Marion4c276f02021-11-06 13:17:31 +0100302#endif