blob: 5c1d82677428cec4d2337efcb18574cb3f8e5624 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
Damjan Marionf1213b82016-03-13 02:22:06 +01002 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
Ed Warnickecb9cada2015-12-08 15:45:58 -070016 Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
17
18 Permission is hereby granted, free of charge, to any person obtaining
19 a copy of this software and associated documentation files (the
20 "Software"), to deal in the Software without restriction, including
21 without limitation the rights to use, copy, modify, merge, publish,
22 distribute, sublicense, and/or sell copies of the Software, and to
23 permit persons to whom the Software is furnished to do so, subject to
24 the following conditions:
25
26 The above copyright notice and this permission notice shall be
27 included in all copies or substantial portions of the Software.
28
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
36*/
37
Dave Barachb7b92992018-10-17 10:38:51 -040038/** \file
39
40 Optimized string handling code, including c11-compliant
41 "safe C library" variants.
42*/
43
Ed Warnickecb9cada2015-12-08 15:45:58 -070044#ifndef included_clib_string_h
45#define included_clib_string_h
46
Dave Barachc3799992016-08-15 11:12:27 -040047#include <vppinfra/clib.h> /* for CLIB_LINUX_KERNEL */
Damjan Marione319de02016-10-21 19:30:42 +020048#include <vppinfra/vector.h>
Ed Warnickecb9cada2015-12-08 15:45:58 -070049
50#ifdef CLIB_LINUX_KERNEL
51#include <linux/string.h>
52#endif
53
54#ifdef CLIB_UNIX
55#include <string.h>
56#endif
57
58#ifdef CLIB_STANDALONE
59#include <vppinfra/standalone_string.h>
60#endif
61
Damjan Marionb2e1fe92017-11-22 12:41:32 +010062#if _x86_64_
63#include <x86intrin.h>
64#endif
65
Ed Warnickecb9cada2015-12-08 15:45:58 -070066/* Exchanges source and destination. */
Dave Barachc3799992016-08-15 11:12:27 -040067void clib_memswap (void *_a, void *_b, uword bytes);
Ed Warnickecb9cada2015-12-08 15:45:58 -070068
Dave Barachd4048a42016-11-07 09:55:55 -050069/*
70 * the vector unit memcpy variants confuse coverity
71 * so don't let it anywhere near them.
72 */
73#ifndef __COVERITY__
Damjan Marionfad3fb32017-12-14 09:30:11 +010074#if __AVX512F__
75#include <vppinfra/memcpy_avx512.h>
76#elif __AVX2__
77#include <vppinfra/memcpy_avx2.h>
Damjan Marion793b18d2016-05-16 16:52:55 +020078#elif __SSSE3__
Damjan Marionf1213b82016-03-13 02:22:06 +010079#include <vppinfra/memcpy_sse3.h>
80#else
Dave Barachb7b92992018-10-17 10:38:51 -040081#define _clib_memcpy(a,b,c) memcpy(a,b,c)
Damjan Marionf1213b82016-03-13 02:22:06 +010082#endif
Dave Barachd4048a42016-11-07 09:55:55 -050083#else /* __COVERITY__ */
Dave Barachb7b92992018-10-17 10:38:51 -040084#define _clib_memcpy(a,b,c) memcpy(a,b,c)
Dave Barachd4048a42016-11-07 09:55:55 -050085#endif
Damjan Marionf1213b82016-03-13 02:22:06 +010086
Dave Barachb7b92992018-10-17 10:38:51 -040087/* c-11 string manipulation variants */
88
89#ifndef EOK
90#define EOK 0
91#endif
92#ifndef EINVAL
93#define EINVAL 22
94#endif
95
96typedef int errno_t;
97typedef uword rsize_t;
98
99void clib_c11_violation (const char *s);
100errno_t memcpy_s (void *__restrict__ dest, rsize_t dmax,
101 const void *__restrict__ src, rsize_t n);
102
103always_inline errno_t
104memcpy_s_inline (void *__restrict__ dest, rsize_t dmax,
105 const void *__restrict__ src, rsize_t n)
106{
107 uword low, hi;
108 u8 bad;
109
110 /*
111 * call bogus if: src or dst NULL, trying to copy
112 * more data than we have space in dst, or src == dst.
113 * n == 0 isn't really "bad", so check first in the
114 * "wall-of-shame" department...
115 */
116 bad = (dest == 0) + (src == 0) + (n > dmax) + (dest == src) + (n == 0);
117 if (PREDICT_FALSE (bad != 0))
118 {
119 /* Not actually trying to copy anything is OK */
120 if (n == 0)
121 return EOK;
122 if (dest == NULL)
123 clib_c11_violation ("dest NULL");
124 if (src == NULL)
125 clib_c11_violation ("src NULL");
126 if (n > dmax)
127 clib_c11_violation ("n > dmax");
128 if (dest == src)
129 clib_c11_violation ("dest == src");
130 return EINVAL;
131 }
132
133 /* Check for src/dst overlap, which is not allowed */
134 low = (uword) (src < dest ? src : dest);
135 hi = (uword) (src < dest ? dest : src);
136
137 if (PREDICT_FALSE (low + (n - 1) >= hi))
138 {
139 clib_c11_violation ("src/dest overlap");
140 return EINVAL;
141 }
142
143 _clib_memcpy (dest, src, n);
144 return EOK;
145}
146
147/*
148 * Note: $$$ This macro is a crutch. Folks need to manually
149 * inspect every extant clib_memcpy(...) call and
150 * attempt to provide a real destination buffer size
151 * argument...
152 */
153#define clib_memcpy(d,s,n) memcpy_s_inline(d,n,s,n)
154
155errno_t memset_s (void *s, rsize_t smax, int c, rsize_t n);
156
157always_inline errno_t
158memset_s_inline (void *s, rsize_t smax, int c, rsize_t n)
159{
160 u8 bad;
161
162 bad = (s == 0) + (n > smax);
163
164 if (PREDICT_FALSE (bad != 0))
165 {
166 if (s == 0)
167 clib_c11_violation ("s NULL");
168 if (n > smax)
169 clib_c11_violation ("n > smax");
170 return (EINVAL);
171 }
172 memset (s, c, n);
173 return (EOK);
174}
175
176/*
177 * This macro is not [so much of] a crutch.
178 * It's super-typical to write:
179 *
180 * ep = pool_get (<pool>);
181 * clib_memset(ep, 0, sizeof (*ep));
182 *
183 * The compiler should delete the not-so useful
184 * (n > smax) test. TBH the NULL pointer check isn't
185 * so useful in this case, but so be it.
186 */
187#define clib_memset(s,c,n) memset_s_inline(s,n,c,n)
188
Damjan Marion04f3db32017-11-10 21:55:45 +0100189/*
190 * Copy 64 bytes of data to 4 destinations
191 * this function is typically used in quad-loop case when whole cacheline
192 * needs to be copied to 4 different places. First it reads whole cacheline
193 * to 1/2/4 SIMD registers and then it writes data to 4 destinations.
194 */
195
196static_always_inline void
197clib_memcpy64_x4 (void *d0, void *d1, void *d2, void *d3, void *s)
198{
Damjan Marionb2e1fe92017-11-22 12:41:32 +0100199#if defined (__AVX512F__)
200 __m512i r0 = _mm512_loadu_si512 (s);
Damjan Marion04f3db32017-11-10 21:55:45 +0100201
Damjan Marionb2e1fe92017-11-22 12:41:32 +0100202 _mm512_storeu_si512 (d0, r0);
203 _mm512_storeu_si512 (d1, r0);
204 _mm512_storeu_si512 (d2, r0);
205 _mm512_storeu_si512 (d3, r0);
Damjan Marion04f3db32017-11-10 21:55:45 +0100206
Damjan Marionb2e1fe92017-11-22 12:41:32 +0100207#elif defined (__AVX2__)
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000208 __m256i r0 = _mm256_loadu_si256 ((__m256i *) (s + 0 * 32));
209 __m256i r1 = _mm256_loadu_si256 ((__m256i *) (s + 1 * 32));
Damjan Marion04f3db32017-11-10 21:55:45 +0100210
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000211 _mm256_storeu_si256 ((__m256i *) (d0 + 0 * 32), r0);
212 _mm256_storeu_si256 ((__m256i *) (d0 + 1 * 32), r1);
Damjan Marion04f3db32017-11-10 21:55:45 +0100213
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000214 _mm256_storeu_si256 ((__m256i *) (d1 + 0 * 32), r0);
215 _mm256_storeu_si256 ((__m256i *) (d1 + 1 * 32), r1);
Damjan Marion04f3db32017-11-10 21:55:45 +0100216
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000217 _mm256_storeu_si256 ((__m256i *) (d2 + 0 * 32), r0);
218 _mm256_storeu_si256 ((__m256i *) (d2 + 1 * 32), r1);
Damjan Marion04f3db32017-11-10 21:55:45 +0100219
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000220 _mm256_storeu_si256 ((__m256i *) (d3 + 0 * 32), r0);
221 _mm256_storeu_si256 ((__m256i *) (d3 + 1 * 32), r1);
Damjan Marion04f3db32017-11-10 21:55:45 +0100222
Damjan Marionb2e1fe92017-11-22 12:41:32 +0100223#elif defined (__SSSE3__)
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000224 __m128i r0 = _mm_loadu_si128 ((__m128i *) (s + 0 * 16));
225 __m128i r1 = _mm_loadu_si128 ((__m128i *) (s + 1 * 16));
226 __m128i r2 = _mm_loadu_si128 ((__m128i *) (s + 2 * 16));
227 __m128i r3 = _mm_loadu_si128 ((__m128i *) (s + 3 * 16));
Damjan Marion04f3db32017-11-10 21:55:45 +0100228
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000229 _mm_storeu_si128 ((__m128i *) (d0 + 0 * 16), r0);
230 _mm_storeu_si128 ((__m128i *) (d0 + 1 * 16), r1);
231 _mm_storeu_si128 ((__m128i *) (d0 + 2 * 16), r2);
232 _mm_storeu_si128 ((__m128i *) (d0 + 3 * 16), r3);
Damjan Marion04f3db32017-11-10 21:55:45 +0100233
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000234 _mm_storeu_si128 ((__m128i *) (d1 + 0 * 16), r0);
235 _mm_storeu_si128 ((__m128i *) (d1 + 1 * 16), r1);
236 _mm_storeu_si128 ((__m128i *) (d1 + 2 * 16), r2);
237 _mm_storeu_si128 ((__m128i *) (d1 + 3 * 16), r3);
Damjan Marionb2e1fe92017-11-22 12:41:32 +0100238
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000239 _mm_storeu_si128 ((__m128i *) (d2 + 0 * 16), r0);
240 _mm_storeu_si128 ((__m128i *) (d2 + 1 * 16), r1);
241 _mm_storeu_si128 ((__m128i *) (d2 + 2 * 16), r2);
242 _mm_storeu_si128 ((__m128i *) (d2 + 3 * 16), r3);
Damjan Marionb2e1fe92017-11-22 12:41:32 +0100243
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000244 _mm_storeu_si128 ((__m128i *) (d3 + 0 * 16), r0);
245 _mm_storeu_si128 ((__m128i *) (d3 + 1 * 16), r1);
246 _mm_storeu_si128 ((__m128i *) (d3 + 2 * 16), r2);
247 _mm_storeu_si128 ((__m128i *) (d3 + 3 * 16), r3);
Damjan Marionb2e1fe92017-11-22 12:41:32 +0100248
Damjan Marion04f3db32017-11-10 21:55:45 +0100249#else
250 clib_memcpy (d0, s, 64);
251 clib_memcpy (d1, s, 64);
252 clib_memcpy (d2, s, 64);
253 clib_memcpy (d3, s, 64);
254#endif
255}
256
Damjan Marion14864772018-05-22 14:07:47 +0200257static_always_inline void
258clib_memset_u64 (void *p, u64 val, uword count)
259{
260 u64 *ptr = p;
261#if defined(CLIB_HAVE_VEC512)
262 u64x8 v512 = u64x8_splat (val);
263 while (count >= 8)
264 {
265 u64x8_store_unaligned (v512, ptr);
266 ptr += 8;
267 count -= 8;
268 }
269 if (count == 0)
270 return;
271#endif
272#if defined(CLIB_HAVE_VEC256)
273 u64x4 v256 = u64x4_splat (val);
274 while (count >= 4)
275 {
276 u64x4_store_unaligned (v256, ptr);
277 ptr += 4;
278 count -= 4;
279 }
280 if (count == 0)
281 return;
282#else
283 while (count >= 4)
284 {
285 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
286 ptr += 4;
287 count -= 4;
288 }
289#endif
290 while (count--)
291 ptr++[0] = val;
292}
293
294static_always_inline void
295clib_memset_u32 (void *p, u32 val, uword count)
296{
297 u32 *ptr = p;
298#if defined(CLIB_HAVE_VEC512)
299 u32x16 v512 = u32x16_splat (val);
300 while (count >= 16)
301 {
302 u32x16_store_unaligned (v512, ptr);
303 ptr += 16;
304 count -= 16;
305 }
306 if (count == 0)
307 return;
308#endif
309#if defined(CLIB_HAVE_VEC256)
310 u32x8 v256 = u32x8_splat (val);
311 while (count >= 8)
312 {
313 u32x8_store_unaligned (v256, ptr);
314 ptr += 8;
315 count -= 8;
316 }
317 if (count == 0)
318 return;
319#endif
320#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
321 u32x4 v128 = u32x4_splat (val);
322 while (count >= 4)
323 {
324 u32x4_store_unaligned (v128, ptr);
325 ptr += 4;
326 count -= 4;
327 }
328#else
329 while (count >= 4)
330 {
331 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
332 ptr += 4;
333 count -= 4;
334 }
335#endif
336 while (count--)
337 ptr++[0] = val;
338}
339
340static_always_inline void
341clib_memset_u16 (void *p, u16 val, uword count)
342{
343 u16 *ptr = p;
344#if defined(CLIB_HAVE_VEC512)
345 u16x32 v512 = u16x32_splat (val);
346 while (count >= 32)
347 {
348 u16x32_store_unaligned (v512, ptr);
349 ptr += 32;
350 count -= 32;
351 }
352 if (count == 0)
353 return;
354#endif
355#if defined(CLIB_HAVE_VEC256)
356 u16x16 v256 = u16x16_splat (val);
357 while (count >= 16)
358 {
359 u16x16_store_unaligned (v256, ptr);
360 ptr += 16;
361 count -= 16;
362 }
363 if (count == 0)
364 return;
365#endif
366#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
367 u16x8 v128 = u16x8_splat (val);
368 while (count >= 8)
369 {
370 u16x8_store_unaligned (v128, ptr);
371 ptr += 8;
372 count -= 8;
373 }
374#else
375 while (count >= 4)
376 {
377 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
378 ptr += 4;
379 count -= 4;
380 }
381#endif
382 while (count--)
383 ptr++[0] = val;
384}
385
386static_always_inline void
387clib_memset_u8 (void *p, u8 val, uword count)
388{
389 u8 *ptr = p;
390#if defined(CLIB_HAVE_VEC512)
391 u8x64 v512 = u8x64_splat (val);
392 while (count >= 64)
393 {
394 u8x64_store_unaligned (v512, ptr);
395 ptr += 64;
396 count -= 64;
397 }
398 if (count == 0)
399 return;
400#endif
401#if defined(CLIB_HAVE_VEC256)
402 u8x32 v256 = u8x32_splat (val);
403 while (count >= 32)
404 {
405 u8x32_store_unaligned (v256, ptr);
406 ptr += 32;
407 count -= 32;
408 }
409 if (count == 0)
410 return;
411#endif
412#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
413 u8x16 v128 = u8x16_splat (val);
414 while (count >= 16)
415 {
416 u8x16_store_unaligned (v128, ptr);
417 ptr += 16;
418 count -= 16;
419 }
420#else
421 while (count >= 4)
422 {
423 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
424 ptr += 4;
425 count -= 4;
426 }
427#endif
428 while (count--)
429 ptr++[0] = val;
430}
431
432static_always_inline uword
433clib_count_equal_u64 (u64 * data, uword max_count)
434{
Neale Ranns2329e092018-10-03 14:13:27 -0400435 uword count;
436 u64 first;
Damjan Marion14864772018-05-22 14:07:47 +0200437
Neale Ranns2329e092018-10-03 14:13:27 -0400438 if (max_count == 1)
439 return 1;
Damjan Marion008eef32018-09-12 22:37:30 +0200440 if (data[0] != data[1])
441 return 1;
442
Neale Ranns2329e092018-10-03 14:13:27 -0400443 count = 0;
444 first = data[0];
445
Damjan Marion008eef32018-09-12 22:37:30 +0200446#if defined(CLIB_HAVE_VEC256)
447 u64x4 splat = u64x4_splat (first);
448 while (1)
Damjan Marion14864772018-05-22 14:07:47 +0200449 {
Damjan Marion008eef32018-09-12 22:37:30 +0200450 u64 bmp;
451 bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
452 if (bmp != 0xffffffff)
453 {
454 count += count_trailing_zeros (~bmp) / 8;
455 return clib_min (count, max_count);
456 }
457
458 data += 4;
459 count += 4;
460
Damjan Marion14864772018-05-22 14:07:47 +0200461 if (count >= max_count)
462 return max_count;
463 }
464#endif
Damjan Marion008eef32018-09-12 22:37:30 +0200465 count += 2;
466 data += 2;
Neale Ranns825fc482018-10-10 13:27:00 +0000467 while (count + 3 < max_count &&
Damjan Marion008eef32018-09-12 22:37:30 +0200468 ((data[0] ^ first) | (data[1] ^ first) |
469 (data[2] ^ first) | (data[3] ^ first)) == 0)
Damjan Marion14864772018-05-22 14:07:47 +0200470 {
471 data += 4;
472 count += 4;
Damjan Marion14864772018-05-22 14:07:47 +0200473 }
Damjan Marion14864772018-05-22 14:07:47 +0200474 while (count < max_count && (data[0] == first))
475 {
476 data += 1;
477 count += 1;
478 }
479 return count;
480}
481
482static_always_inline uword
483clib_count_equal_u32 (u32 * data, uword max_count)
484{
Neale Ranns2329e092018-10-03 14:13:27 -0400485 uword count;
486 u32 first;
Damjan Marion14864772018-05-22 14:07:47 +0200487
Neale Ranns2329e092018-10-03 14:13:27 -0400488 if (max_count == 1)
489 return 1;
Damjan Marion008eef32018-09-12 22:37:30 +0200490 if (data[0] != data[1])
491 return 1;
492
Neale Ranns2329e092018-10-03 14:13:27 -0400493 count = 0;
494 first = data[0];
495
Damjan Marion14864772018-05-22 14:07:47 +0200496#if defined(CLIB_HAVE_VEC256)
Damjan Marion008eef32018-09-12 22:37:30 +0200497 u32x8 splat = u32x8_splat (first);
498 while (1)
Damjan Marion14864772018-05-22 14:07:47 +0200499 {
Damjan Marion008eef32018-09-12 22:37:30 +0200500 u64 bmp;
501 bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
502 if (bmp != 0xffffffff)
503 {
504 count += count_trailing_zeros (~bmp) / 4;
505 return clib_min (count, max_count);
506 }
507
Damjan Marion14864772018-05-22 14:07:47 +0200508 data += 8;
509 count += 8;
Damjan Marion008eef32018-09-12 22:37:30 +0200510
511 if (count >= max_count)
512 return max_count;
513 }
514#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
515 u32x4 splat = u32x4_splat (first);
516 while (1)
517 {
518 u64 bmp;
519 bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
520 if (bmp != 0xffff)
521 {
522 count += count_trailing_zeros (~bmp) / 4;
523 return clib_min (count, max_count);
524 }
525
526 data += 4;
527 count += 4;
528
Damjan Marion14864772018-05-22 14:07:47 +0200529 if (count >= max_count)
530 return max_count;
531 }
532#endif
Damjan Marion008eef32018-09-12 22:37:30 +0200533 count += 2;
534 data += 2;
Neale Ranns825fc482018-10-10 13:27:00 +0000535 while (count + 3 < max_count &&
Damjan Marion008eef32018-09-12 22:37:30 +0200536 ((data[0] ^ first) | (data[1] ^ first) |
537 (data[2] ^ first) | (data[3] ^ first)) == 0)
Damjan Marion14864772018-05-22 14:07:47 +0200538 {
539 data += 4;
540 count += 4;
Damjan Marion14864772018-05-22 14:07:47 +0200541 }
Damjan Marion14864772018-05-22 14:07:47 +0200542 while (count < max_count && (data[0] == first))
543 {
544 data += 1;
545 count += 1;
546 }
547 return count;
548}
549
550static_always_inline uword
551clib_count_equal_u16 (u16 * data, uword max_count)
552{
Neale Ranns2329e092018-10-03 14:13:27 -0400553 uword count;
554 u16 first;
Damjan Marion14864772018-05-22 14:07:47 +0200555
Neale Ranns2329e092018-10-03 14:13:27 -0400556 if (max_count == 1)
557 return 1;
Damjan Marion008eef32018-09-12 22:37:30 +0200558 if (data[0] != data[1])
559 return 1;
560
Neale Ranns2329e092018-10-03 14:13:27 -0400561 count = 0;
562 first = data[0];
563
Damjan Marion14864772018-05-22 14:07:47 +0200564#if defined(CLIB_HAVE_VEC256)
Damjan Marion008eef32018-09-12 22:37:30 +0200565 u16x16 splat = u16x16_splat (first);
566 while (1)
Damjan Marion14864772018-05-22 14:07:47 +0200567 {
Damjan Marion008eef32018-09-12 22:37:30 +0200568 u64 bmp;
569 bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
570 if (bmp != 0xffffffff)
571 {
572 count += count_trailing_zeros (~bmp) / 2;
573 return clib_min (count, max_count);
574 }
575
Damjan Marion14864772018-05-22 14:07:47 +0200576 data += 16;
577 count += 16;
Damjan Marion008eef32018-09-12 22:37:30 +0200578
579 if (count >= max_count)
580 return max_count;
Damjan Marion14864772018-05-22 14:07:47 +0200581 }
Damjan Marion008eef32018-09-12 22:37:30 +0200582#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
583 u16x8 splat = u16x8_splat (first);
584 while (1)
Damjan Marion14864772018-05-22 14:07:47 +0200585 {
Damjan Marion008eef32018-09-12 22:37:30 +0200586 u64 bmp;
587 bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
588 if (bmp != 0xffff)
589 {
590 count += count_trailing_zeros (~bmp) / 2;
591 return clib_min (count, max_count);
592 }
593
Damjan Marion14864772018-05-22 14:07:47 +0200594 data += 8;
595 count += 8;
Damjan Marion008eef32018-09-12 22:37:30 +0200596
597 if (count >= max_count)
598 return max_count;
Damjan Marion14864772018-05-22 14:07:47 +0200599 }
600#endif
Damjan Marion008eef32018-09-12 22:37:30 +0200601 count += 2;
602 data += 2;
Neale Ranns825fc482018-10-10 13:27:00 +0000603 while (count + 3 < max_count &&
Damjan Marion008eef32018-09-12 22:37:30 +0200604 ((data[0] ^ first) | (data[1] ^ first) |
605 (data[2] ^ first) | (data[3] ^ first)) == 0)
606 {
607 data += 4;
608 count += 4;
609 }
Damjan Marion14864772018-05-22 14:07:47 +0200610 while (count < max_count && (data[0] == first))
611 {
612 data += 1;
613 count += 1;
614 }
615 return count;
616}
617
Damjan Marion008eef32018-09-12 22:37:30 +0200618static_always_inline uword
619clib_count_equal_u8 (u8 * data, uword max_count)
Damjan Marion14864772018-05-22 14:07:47 +0200620{
Neale Ranns2329e092018-10-03 14:13:27 -0400621 uword count;
622 u8 first;
Damjan Marion14864772018-05-22 14:07:47 +0200623
Neale Ranns2329e092018-10-03 14:13:27 -0400624 if (max_count == 1)
625 return 1;
Damjan Marion008eef32018-09-12 22:37:30 +0200626 if (data[0] != data[1])
627 return 1;
628
Neale Ranns2329e092018-10-03 14:13:27 -0400629 count = 0;
630 first = data[0];
631
Damjan Marion14864772018-05-22 14:07:47 +0200632#if defined(CLIB_HAVE_VEC256)
Damjan Marion008eef32018-09-12 22:37:30 +0200633 u8x32 splat = u8x32_splat (first);
634 while (1)
Damjan Marion14864772018-05-22 14:07:47 +0200635 {
Damjan Marion008eef32018-09-12 22:37:30 +0200636 u64 bmp;
637 bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
638 if (bmp != 0xffffffff)
639 {
640 count += count_trailing_zeros (~bmp);
641 return clib_min (count, max_count);
642 }
643
Damjan Marion14864772018-05-22 14:07:47 +0200644 data += 32;
645 count += 32;
Damjan Marion008eef32018-09-12 22:37:30 +0200646
647 if (count >= max_count)
648 return max_count;
649 }
650#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
651 u8x16 splat = u8x16_splat (first);
652 while (1)
653 {
654 u64 bmp;
655 bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
656 if (bmp != 0xffff)
657 {
658 count += count_trailing_zeros (~bmp);
659 return clib_min (count, max_count);
660 }
661
662 data += 16;
663 count += 16;
664
665 if (count >= max_count)
666 return max_count;
Damjan Marion14864772018-05-22 14:07:47 +0200667 }
668#endif
Damjan Marion008eef32018-09-12 22:37:30 +0200669 count += 2;
670 data += 2;
Neale Ranns825fc482018-10-10 13:27:00 +0000671 while (count + 3 < max_count &&
Damjan Marion008eef32018-09-12 22:37:30 +0200672 ((data[0] ^ first) | (data[1] ^ first) |
673 (data[2] ^ first) | (data[3] ^ first)) == 0)
Damjan Marion14864772018-05-22 14:07:47 +0200674 {
675 data += 4;
676 count += 4;
677 }
Damjan Marion14864772018-05-22 14:07:47 +0200678 while (count < max_count && (data[0] == first))
679 {
680 data += 1;
681 count += 1;
682 }
683 return count;
684}
685
Ed Warnickecb9cada2015-12-08 15:45:58 -0700686#endif /* included_clib_string_h */
Dave Barachc3799992016-08-15 11:12:27 -0400687
688/*
689 * fd.io coding-style-patch-verification: ON
690 *
691 * Local Variables:
692 * eval: (c-set-style "gnu")
693 * End:
694 */