blob: b00c0cfbcc231d198fb0b2c64fee533101d9e7de [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
Damjan Marionf1213b82016-03-13 02:22:06 +01002 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
Ed Warnickecb9cada2015-12-08 15:45:58 -070016 Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
17
18 Permission is hereby granted, free of charge, to any person obtaining
19 a copy of this software and associated documentation files (the
20 "Software"), to deal in the Software without restriction, including
21 without limitation the rights to use, copy, modify, merge, publish,
22 distribute, sublicense, and/or sell copies of the Software, and to
23 permit persons to whom the Software is furnished to do so, subject to
24 the following conditions:
25
26 The above copyright notice and this permission notice shall be
27 included in all copies or substantial portions of the Software.
28
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
36*/
37
Dave Barachb7b92992018-10-17 10:38:51 -040038/** \file
39
40 Optimized string handling code, including c11-compliant
41 "safe C library" variants.
42*/
43
Ed Warnickecb9cada2015-12-08 15:45:58 -070044#ifndef included_clib_string_h
45#define included_clib_string_h
46
Dave Barachc3799992016-08-15 11:12:27 -040047#include <vppinfra/clib.h> /* for CLIB_LINUX_KERNEL */
Damjan Marione319de02016-10-21 19:30:42 +020048#include <vppinfra/vector.h>
Ed Warnickecb9cada2015-12-08 15:45:58 -070049
50#ifdef CLIB_LINUX_KERNEL
51#include <linux/string.h>
52#endif
53
54#ifdef CLIB_UNIX
55#include <string.h>
56#endif
57
58#ifdef CLIB_STANDALONE
59#include <vppinfra/standalone_string.h>
60#endif
61
Damjan Marionb2e1fe92017-11-22 12:41:32 +010062#if _x86_64_
63#include <x86intrin.h>
64#endif
65
Ed Warnickecb9cada2015-12-08 15:45:58 -070066/* Exchanges source and destination. */
Dave Barachc3799992016-08-15 11:12:27 -040067void clib_memswap (void *_a, void *_b, uword bytes);
Ed Warnickecb9cada2015-12-08 15:45:58 -070068
Dave Barachd4048a42016-11-07 09:55:55 -050069/*
70 * the vector unit memcpy variants confuse coverity
71 * so don't let it anywhere near them.
72 */
73#ifndef __COVERITY__
Damjan Marionfad3fb32017-12-14 09:30:11 +010074#if __AVX512F__
75#include <vppinfra/memcpy_avx512.h>
76#elif __AVX2__
77#include <vppinfra/memcpy_avx2.h>
Damjan Marion793b18d2016-05-16 16:52:55 +020078#elif __SSSE3__
Damjan Marionf1213b82016-03-13 02:22:06 +010079#include <vppinfra/memcpy_sse3.h>
80#else
Dave Barach178cf492018-11-13 16:34:13 -050081#define clib_memcpy_fast(a,b,c) memcpy(a,b,c)
Damjan Marionf1213b82016-03-13 02:22:06 +010082#endif
Dave Barachd4048a42016-11-07 09:55:55 -050083#else /* __COVERITY__ */
Dave Barach178cf492018-11-13 16:34:13 -050084#define clib_memcpy_fast(a,b,c) memcpy(a,b,c)
Dave Barachd4048a42016-11-07 09:55:55 -050085#endif
Damjan Marionf1213b82016-03-13 02:22:06 +010086
Dave Barachb7b92992018-10-17 10:38:51 -040087/* c-11 string manipulation variants */
88
89#ifndef EOK
90#define EOK 0
91#endif
92#ifndef EINVAL
93#define EINVAL 22
94#endif
95
96typedef int errno_t;
97typedef uword rsize_t;
98
99void clib_c11_violation (const char *s);
100errno_t memcpy_s (void *__restrict__ dest, rsize_t dmax,
101 const void *__restrict__ src, rsize_t n);
102
103always_inline errno_t
104memcpy_s_inline (void *__restrict__ dest, rsize_t dmax,
105 const void *__restrict__ src, rsize_t n)
106{
107 uword low, hi;
108 u8 bad;
109
110 /*
Dave Barach178cf492018-11-13 16:34:13 -0500111 * Optimize constant-number-of-bytes calls without asking
112 * "too many questions for someone from New Jersey"
113 */
114 if (__builtin_constant_p (n))
115 {
116 clib_memcpy_fast (dest, src, n);
117 return EOK;
118 }
119
120 /*
Dave Barachb7b92992018-10-17 10:38:51 -0400121 * call bogus if: src or dst NULL, trying to copy
122 * more data than we have space in dst, or src == dst.
123 * n == 0 isn't really "bad", so check first in the
124 * "wall-of-shame" department...
125 */
126 bad = (dest == 0) + (src == 0) + (n > dmax) + (dest == src) + (n == 0);
127 if (PREDICT_FALSE (bad != 0))
128 {
129 /* Not actually trying to copy anything is OK */
130 if (n == 0)
131 return EOK;
132 if (dest == NULL)
133 clib_c11_violation ("dest NULL");
134 if (src == NULL)
135 clib_c11_violation ("src NULL");
136 if (n > dmax)
137 clib_c11_violation ("n > dmax");
138 if (dest == src)
139 clib_c11_violation ("dest == src");
140 return EINVAL;
141 }
142
143 /* Check for src/dst overlap, which is not allowed */
144 low = (uword) (src < dest ? src : dest);
145 hi = (uword) (src < dest ? dest : src);
146
147 if (PREDICT_FALSE (low + (n - 1) >= hi))
148 {
149 clib_c11_violation ("src/dest overlap");
150 return EINVAL;
151 }
152
Dave Barach178cf492018-11-13 16:34:13 -0500153 clib_memcpy_fast (dest, src, n);
Dave Barachb7b92992018-10-17 10:38:51 -0400154 return EOK;
155}
156
157/*
158 * Note: $$$ This macro is a crutch. Folks need to manually
159 * inspect every extant clib_memcpy(...) call and
160 * attempt to provide a real destination buffer size
161 * argument...
162 */
163#define clib_memcpy(d,s,n) memcpy_s_inline(d,n,s,n)
164
165errno_t memset_s (void *s, rsize_t smax, int c, rsize_t n);
166
167always_inline errno_t
168memset_s_inline (void *s, rsize_t smax, int c, rsize_t n)
169{
170 u8 bad;
171
172 bad = (s == 0) + (n > smax);
173
174 if (PREDICT_FALSE (bad != 0))
175 {
176 if (s == 0)
177 clib_c11_violation ("s NULL");
178 if (n > smax)
179 clib_c11_violation ("n > smax");
180 return (EINVAL);
181 }
182 memset (s, c, n);
183 return (EOK);
184}
185
186/*
187 * This macro is not [so much of] a crutch.
188 * It's super-typical to write:
189 *
190 * ep = pool_get (<pool>);
191 * clib_memset(ep, 0, sizeof (*ep));
192 *
193 * The compiler should delete the not-so useful
194 * (n > smax) test. TBH the NULL pointer check isn't
195 * so useful in this case, but so be it.
196 */
197#define clib_memset(s,c,n) memset_s_inline(s,n,c,n)
198
Damjan Marion04f3db32017-11-10 21:55:45 +0100199/*
200 * Copy 64 bytes of data to 4 destinations
201 * this function is typically used in quad-loop case when whole cacheline
202 * needs to be copied to 4 different places. First it reads whole cacheline
203 * to 1/2/4 SIMD registers and then it writes data to 4 destinations.
204 */
205
206static_always_inline void
207clib_memcpy64_x4 (void *d0, void *d1, void *d2, void *d3, void *s)
208{
Damjan Marionb2e1fe92017-11-22 12:41:32 +0100209#if defined (__AVX512F__)
210 __m512i r0 = _mm512_loadu_si512 (s);
Damjan Marion04f3db32017-11-10 21:55:45 +0100211
Damjan Marionb2e1fe92017-11-22 12:41:32 +0100212 _mm512_storeu_si512 (d0, r0);
213 _mm512_storeu_si512 (d1, r0);
214 _mm512_storeu_si512 (d2, r0);
215 _mm512_storeu_si512 (d3, r0);
Damjan Marion04f3db32017-11-10 21:55:45 +0100216
Damjan Marionb2e1fe92017-11-22 12:41:32 +0100217#elif defined (__AVX2__)
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000218 __m256i r0 = _mm256_loadu_si256 ((__m256i *) (s + 0 * 32));
219 __m256i r1 = _mm256_loadu_si256 ((__m256i *) (s + 1 * 32));
Damjan Marion04f3db32017-11-10 21:55:45 +0100220
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000221 _mm256_storeu_si256 ((__m256i *) (d0 + 0 * 32), r0);
222 _mm256_storeu_si256 ((__m256i *) (d0 + 1 * 32), r1);
Damjan Marion04f3db32017-11-10 21:55:45 +0100223
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000224 _mm256_storeu_si256 ((__m256i *) (d1 + 0 * 32), r0);
225 _mm256_storeu_si256 ((__m256i *) (d1 + 1 * 32), r1);
Damjan Marion04f3db32017-11-10 21:55:45 +0100226
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000227 _mm256_storeu_si256 ((__m256i *) (d2 + 0 * 32), r0);
228 _mm256_storeu_si256 ((__m256i *) (d2 + 1 * 32), r1);
Damjan Marion04f3db32017-11-10 21:55:45 +0100229
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000230 _mm256_storeu_si256 ((__m256i *) (d3 + 0 * 32), r0);
231 _mm256_storeu_si256 ((__m256i *) (d3 + 1 * 32), r1);
Damjan Marion04f3db32017-11-10 21:55:45 +0100232
Damjan Marionb2e1fe92017-11-22 12:41:32 +0100233#elif defined (__SSSE3__)
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000234 __m128i r0 = _mm_loadu_si128 ((__m128i *) (s + 0 * 16));
235 __m128i r1 = _mm_loadu_si128 ((__m128i *) (s + 1 * 16));
236 __m128i r2 = _mm_loadu_si128 ((__m128i *) (s + 2 * 16));
237 __m128i r3 = _mm_loadu_si128 ((__m128i *) (s + 3 * 16));
Damjan Marion04f3db32017-11-10 21:55:45 +0100238
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000239 _mm_storeu_si128 ((__m128i *) (d0 + 0 * 16), r0);
240 _mm_storeu_si128 ((__m128i *) (d0 + 1 * 16), r1);
241 _mm_storeu_si128 ((__m128i *) (d0 + 2 * 16), r2);
242 _mm_storeu_si128 ((__m128i *) (d0 + 3 * 16), r3);
Damjan Marion04f3db32017-11-10 21:55:45 +0100243
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000244 _mm_storeu_si128 ((__m128i *) (d1 + 0 * 16), r0);
245 _mm_storeu_si128 ((__m128i *) (d1 + 1 * 16), r1);
246 _mm_storeu_si128 ((__m128i *) (d1 + 2 * 16), r2);
247 _mm_storeu_si128 ((__m128i *) (d1 + 3 * 16), r3);
Damjan Marionb2e1fe92017-11-22 12:41:32 +0100248
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000249 _mm_storeu_si128 ((__m128i *) (d2 + 0 * 16), r0);
250 _mm_storeu_si128 ((__m128i *) (d2 + 1 * 16), r1);
251 _mm_storeu_si128 ((__m128i *) (d2 + 2 * 16), r2);
252 _mm_storeu_si128 ((__m128i *) (d2 + 3 * 16), r3);
Damjan Marionb2e1fe92017-11-22 12:41:32 +0100253
Sergio Gonzalez Monroy20ec7162017-12-08 11:25:13 +0000254 _mm_storeu_si128 ((__m128i *) (d3 + 0 * 16), r0);
255 _mm_storeu_si128 ((__m128i *) (d3 + 1 * 16), r1);
256 _mm_storeu_si128 ((__m128i *) (d3 + 2 * 16), r2);
257 _mm_storeu_si128 ((__m128i *) (d3 + 3 * 16), r3);
Damjan Marionb2e1fe92017-11-22 12:41:32 +0100258
Damjan Marion04f3db32017-11-10 21:55:45 +0100259#else
Dave Barach178cf492018-11-13 16:34:13 -0500260 clib_memcpy_fast (d0, s, 64);
261 clib_memcpy_fast (d1, s, 64);
262 clib_memcpy_fast (d2, s, 64);
263 clib_memcpy_fast (d3, s, 64);
Damjan Marion04f3db32017-11-10 21:55:45 +0100264#endif
265}
266
Damjan Marion14864772018-05-22 14:07:47 +0200267static_always_inline void
268clib_memset_u64 (void *p, u64 val, uword count)
269{
270 u64 *ptr = p;
271#if defined(CLIB_HAVE_VEC512)
272 u64x8 v512 = u64x8_splat (val);
273 while (count >= 8)
274 {
275 u64x8_store_unaligned (v512, ptr);
276 ptr += 8;
277 count -= 8;
278 }
279 if (count == 0)
280 return;
281#endif
282#if defined(CLIB_HAVE_VEC256)
283 u64x4 v256 = u64x4_splat (val);
284 while (count >= 4)
285 {
286 u64x4_store_unaligned (v256, ptr);
287 ptr += 4;
288 count -= 4;
289 }
290 if (count == 0)
291 return;
292#else
293 while (count >= 4)
294 {
295 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
296 ptr += 4;
297 count -= 4;
298 }
299#endif
300 while (count--)
301 ptr++[0] = val;
302}
303
304static_always_inline void
305clib_memset_u32 (void *p, u32 val, uword count)
306{
307 u32 *ptr = p;
308#if defined(CLIB_HAVE_VEC512)
309 u32x16 v512 = u32x16_splat (val);
310 while (count >= 16)
311 {
312 u32x16_store_unaligned (v512, ptr);
313 ptr += 16;
314 count -= 16;
315 }
316 if (count == 0)
317 return;
318#endif
319#if defined(CLIB_HAVE_VEC256)
320 u32x8 v256 = u32x8_splat (val);
321 while (count >= 8)
322 {
323 u32x8_store_unaligned (v256, ptr);
324 ptr += 8;
325 count -= 8;
326 }
327 if (count == 0)
328 return;
329#endif
330#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
331 u32x4 v128 = u32x4_splat (val);
332 while (count >= 4)
333 {
334 u32x4_store_unaligned (v128, ptr);
335 ptr += 4;
336 count -= 4;
337 }
338#else
339 while (count >= 4)
340 {
341 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
342 ptr += 4;
343 count -= 4;
344 }
345#endif
346 while (count--)
347 ptr++[0] = val;
348}
349
350static_always_inline void
351clib_memset_u16 (void *p, u16 val, uword count)
352{
353 u16 *ptr = p;
354#if defined(CLIB_HAVE_VEC512)
355 u16x32 v512 = u16x32_splat (val);
356 while (count >= 32)
357 {
358 u16x32_store_unaligned (v512, ptr);
359 ptr += 32;
360 count -= 32;
361 }
362 if (count == 0)
363 return;
364#endif
365#if defined(CLIB_HAVE_VEC256)
366 u16x16 v256 = u16x16_splat (val);
367 while (count >= 16)
368 {
369 u16x16_store_unaligned (v256, ptr);
370 ptr += 16;
371 count -= 16;
372 }
373 if (count == 0)
374 return;
375#endif
376#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
377 u16x8 v128 = u16x8_splat (val);
378 while (count >= 8)
379 {
380 u16x8_store_unaligned (v128, ptr);
381 ptr += 8;
382 count -= 8;
383 }
384#else
385 while (count >= 4)
386 {
387 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
388 ptr += 4;
389 count -= 4;
390 }
391#endif
392 while (count--)
393 ptr++[0] = val;
394}
395
396static_always_inline void
397clib_memset_u8 (void *p, u8 val, uword count)
398{
399 u8 *ptr = p;
400#if defined(CLIB_HAVE_VEC512)
401 u8x64 v512 = u8x64_splat (val);
402 while (count >= 64)
403 {
404 u8x64_store_unaligned (v512, ptr);
405 ptr += 64;
406 count -= 64;
407 }
408 if (count == 0)
409 return;
410#endif
411#if defined(CLIB_HAVE_VEC256)
412 u8x32 v256 = u8x32_splat (val);
413 while (count >= 32)
414 {
415 u8x32_store_unaligned (v256, ptr);
416 ptr += 32;
417 count -= 32;
418 }
419 if (count == 0)
420 return;
421#endif
422#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
423 u8x16 v128 = u8x16_splat (val);
424 while (count >= 16)
425 {
426 u8x16_store_unaligned (v128, ptr);
427 ptr += 16;
428 count -= 16;
429 }
430#else
431 while (count >= 4)
432 {
433 ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
434 ptr += 4;
435 count -= 4;
436 }
437#endif
438 while (count--)
439 ptr++[0] = val;
440}
441
442static_always_inline uword
443clib_count_equal_u64 (u64 * data, uword max_count)
444{
Neale Ranns2329e092018-10-03 14:13:27 -0400445 uword count;
446 u64 first;
Damjan Marion14864772018-05-22 14:07:47 +0200447
Neale Ranns2329e092018-10-03 14:13:27 -0400448 if (max_count == 1)
449 return 1;
Damjan Marion008eef32018-09-12 22:37:30 +0200450 if (data[0] != data[1])
451 return 1;
452
Neale Ranns2329e092018-10-03 14:13:27 -0400453 count = 0;
454 first = data[0];
455
Damjan Marion008eef32018-09-12 22:37:30 +0200456#if defined(CLIB_HAVE_VEC256)
457 u64x4 splat = u64x4_splat (first);
458 while (1)
Damjan Marion14864772018-05-22 14:07:47 +0200459 {
Damjan Marion008eef32018-09-12 22:37:30 +0200460 u64 bmp;
461 bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
462 if (bmp != 0xffffffff)
463 {
464 count += count_trailing_zeros (~bmp) / 8;
465 return clib_min (count, max_count);
466 }
467
468 data += 4;
469 count += 4;
470
Damjan Marion14864772018-05-22 14:07:47 +0200471 if (count >= max_count)
472 return max_count;
473 }
474#endif
Damjan Marion008eef32018-09-12 22:37:30 +0200475 count += 2;
476 data += 2;
Neale Ranns825fc482018-10-10 13:27:00 +0000477 while (count + 3 < max_count &&
Damjan Marion008eef32018-09-12 22:37:30 +0200478 ((data[0] ^ first) | (data[1] ^ first) |
479 (data[2] ^ first) | (data[3] ^ first)) == 0)
Damjan Marion14864772018-05-22 14:07:47 +0200480 {
481 data += 4;
482 count += 4;
Damjan Marion14864772018-05-22 14:07:47 +0200483 }
Damjan Marion14864772018-05-22 14:07:47 +0200484 while (count < max_count && (data[0] == first))
485 {
486 data += 1;
487 count += 1;
488 }
489 return count;
490}
491
492static_always_inline uword
493clib_count_equal_u32 (u32 * data, uword max_count)
494{
Neale Ranns2329e092018-10-03 14:13:27 -0400495 uword count;
496 u32 first;
Damjan Marion14864772018-05-22 14:07:47 +0200497
Neale Ranns2329e092018-10-03 14:13:27 -0400498 if (max_count == 1)
499 return 1;
Damjan Marion008eef32018-09-12 22:37:30 +0200500 if (data[0] != data[1])
501 return 1;
502
Neale Ranns2329e092018-10-03 14:13:27 -0400503 count = 0;
504 first = data[0];
505
Damjan Marion14864772018-05-22 14:07:47 +0200506#if defined(CLIB_HAVE_VEC256)
Damjan Marion008eef32018-09-12 22:37:30 +0200507 u32x8 splat = u32x8_splat (first);
508 while (1)
Damjan Marion14864772018-05-22 14:07:47 +0200509 {
Damjan Marion008eef32018-09-12 22:37:30 +0200510 u64 bmp;
511 bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
512 if (bmp != 0xffffffff)
513 {
514 count += count_trailing_zeros (~bmp) / 4;
515 return clib_min (count, max_count);
516 }
517
Damjan Marion14864772018-05-22 14:07:47 +0200518 data += 8;
519 count += 8;
Damjan Marion008eef32018-09-12 22:37:30 +0200520
521 if (count >= max_count)
522 return max_count;
523 }
524#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
525 u32x4 splat = u32x4_splat (first);
526 while (1)
527 {
528 u64 bmp;
529 bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
530 if (bmp != 0xffff)
531 {
532 count += count_trailing_zeros (~bmp) / 4;
533 return clib_min (count, max_count);
534 }
535
536 data += 4;
537 count += 4;
538
Damjan Marion14864772018-05-22 14:07:47 +0200539 if (count >= max_count)
540 return max_count;
541 }
542#endif
Damjan Marion008eef32018-09-12 22:37:30 +0200543 count += 2;
544 data += 2;
Neale Ranns825fc482018-10-10 13:27:00 +0000545 while (count + 3 < max_count &&
Damjan Marion008eef32018-09-12 22:37:30 +0200546 ((data[0] ^ first) | (data[1] ^ first) |
547 (data[2] ^ first) | (data[3] ^ first)) == 0)
Damjan Marion14864772018-05-22 14:07:47 +0200548 {
549 data += 4;
550 count += 4;
Damjan Marion14864772018-05-22 14:07:47 +0200551 }
Damjan Marion14864772018-05-22 14:07:47 +0200552 while (count < max_count && (data[0] == first))
553 {
554 data += 1;
555 count += 1;
556 }
557 return count;
558}
559
560static_always_inline uword
561clib_count_equal_u16 (u16 * data, uword max_count)
562{
Neale Ranns2329e092018-10-03 14:13:27 -0400563 uword count;
564 u16 first;
Damjan Marion14864772018-05-22 14:07:47 +0200565
Neale Ranns2329e092018-10-03 14:13:27 -0400566 if (max_count == 1)
567 return 1;
Damjan Marion008eef32018-09-12 22:37:30 +0200568 if (data[0] != data[1])
569 return 1;
570
Neale Ranns2329e092018-10-03 14:13:27 -0400571 count = 0;
572 first = data[0];
573
Damjan Marion14864772018-05-22 14:07:47 +0200574#if defined(CLIB_HAVE_VEC256)
Damjan Marion008eef32018-09-12 22:37:30 +0200575 u16x16 splat = u16x16_splat (first);
576 while (1)
Damjan Marion14864772018-05-22 14:07:47 +0200577 {
Damjan Marion008eef32018-09-12 22:37:30 +0200578 u64 bmp;
579 bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
580 if (bmp != 0xffffffff)
581 {
582 count += count_trailing_zeros (~bmp) / 2;
583 return clib_min (count, max_count);
584 }
585
Damjan Marion14864772018-05-22 14:07:47 +0200586 data += 16;
587 count += 16;
Damjan Marion008eef32018-09-12 22:37:30 +0200588
589 if (count >= max_count)
590 return max_count;
Damjan Marion14864772018-05-22 14:07:47 +0200591 }
Damjan Marion008eef32018-09-12 22:37:30 +0200592#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
593 u16x8 splat = u16x8_splat (first);
594 while (1)
Damjan Marion14864772018-05-22 14:07:47 +0200595 {
Damjan Marion008eef32018-09-12 22:37:30 +0200596 u64 bmp;
597 bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
598 if (bmp != 0xffff)
599 {
600 count += count_trailing_zeros (~bmp) / 2;
601 return clib_min (count, max_count);
602 }
603
Damjan Marion14864772018-05-22 14:07:47 +0200604 data += 8;
605 count += 8;
Damjan Marion008eef32018-09-12 22:37:30 +0200606
607 if (count >= max_count)
608 return max_count;
Damjan Marion14864772018-05-22 14:07:47 +0200609 }
610#endif
Damjan Marion008eef32018-09-12 22:37:30 +0200611 count += 2;
612 data += 2;
Neale Ranns825fc482018-10-10 13:27:00 +0000613 while (count + 3 < max_count &&
Damjan Marion008eef32018-09-12 22:37:30 +0200614 ((data[0] ^ first) | (data[1] ^ first) |
615 (data[2] ^ first) | (data[3] ^ first)) == 0)
616 {
617 data += 4;
618 count += 4;
619 }
Damjan Marion14864772018-05-22 14:07:47 +0200620 while (count < max_count && (data[0] == first))
621 {
622 data += 1;
623 count += 1;
624 }
625 return count;
626}
627
Damjan Marion008eef32018-09-12 22:37:30 +0200628static_always_inline uword
629clib_count_equal_u8 (u8 * data, uword max_count)
Damjan Marion14864772018-05-22 14:07:47 +0200630{
Neale Ranns2329e092018-10-03 14:13:27 -0400631 uword count;
632 u8 first;
Damjan Marion14864772018-05-22 14:07:47 +0200633
Neale Ranns2329e092018-10-03 14:13:27 -0400634 if (max_count == 1)
635 return 1;
Damjan Marion008eef32018-09-12 22:37:30 +0200636 if (data[0] != data[1])
637 return 1;
638
Neale Ranns2329e092018-10-03 14:13:27 -0400639 count = 0;
640 first = data[0];
641
Damjan Marion14864772018-05-22 14:07:47 +0200642#if defined(CLIB_HAVE_VEC256)
Damjan Marion008eef32018-09-12 22:37:30 +0200643 u8x32 splat = u8x32_splat (first);
644 while (1)
Damjan Marion14864772018-05-22 14:07:47 +0200645 {
Damjan Marion008eef32018-09-12 22:37:30 +0200646 u64 bmp;
647 bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
648 if (bmp != 0xffffffff)
649 {
650 count += count_trailing_zeros (~bmp);
651 return clib_min (count, max_count);
652 }
653
Damjan Marion14864772018-05-22 14:07:47 +0200654 data += 32;
655 count += 32;
Damjan Marion008eef32018-09-12 22:37:30 +0200656
657 if (count >= max_count)
658 return max_count;
659 }
660#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
661 u8x16 splat = u8x16_splat (first);
662 while (1)
663 {
664 u64 bmp;
665 bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
666 if (bmp != 0xffff)
667 {
668 count += count_trailing_zeros (~bmp);
669 return clib_min (count, max_count);
670 }
671
672 data += 16;
673 count += 16;
674
675 if (count >= max_count)
676 return max_count;
Damjan Marion14864772018-05-22 14:07:47 +0200677 }
678#endif
Damjan Marion008eef32018-09-12 22:37:30 +0200679 count += 2;
680 data += 2;
Neale Ranns825fc482018-10-10 13:27:00 +0000681 while (count + 3 < max_count &&
Damjan Marion008eef32018-09-12 22:37:30 +0200682 ((data[0] ^ first) | (data[1] ^ first) |
683 (data[2] ^ first) | (data[3] ^ first)) == 0)
Damjan Marion14864772018-05-22 14:07:47 +0200684 {
685 data += 4;
686 count += 4;
687 }
Damjan Marion14864772018-05-22 14:07:47 +0200688 while (count < max_count && (data[0] == first))
689 {
690 data += 1;
691 count += 1;
692 }
693 return count;
694}
695
Ed Warnickecb9cada2015-12-08 15:45:58 -0700696#endif /* included_clib_string_h */
Dave Barachc3799992016-08-15 11:12:27 -0400697
698/*
699 * fd.io coding-style-patch-verification: ON
700 *
701 * Local Variables:
702 * eval: (c-set-style "gnu")
703 * End:
704 */