blob: fa427a6f1a9f273c77bb47006b672282b1688c83 [file] [log] [blame]
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +00001/* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
3 */
4
5#ifndef included_vector_array_mask_h
6#define included_vector_array_mask_h
7#include <vppinfra/clib.h>
8
9/** \brief Mask array of 32-bit elemments
10
11 @param src source array of u32 elements
12 @param mask use to mask the values of source array
13 @param n_elts number of elements in the source array
14 @return masked values are return in source array
15*/
16
17static_always_inline void
18clib_array_mask_u32 (u32 *src, u32 mask, u32 n_elts)
19{
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000020#if defined(CLIB_HAVE_VEC512)
21 u32x16 mask16 = u32x16_splat (mask);
Damjan Marionf62ed3f2021-10-27 17:28:26 +020022 if (n_elts <= 16)
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000023 {
Damjan Marionf62ed3f2021-10-27 17:28:26 +020024 u32 m = pow2_mask (n_elts);
25 u32x16 r = u32x16_mask_load_zero (src, m);
26 u32x16_mask_store (r & mask16, src, m);
27 return;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000028 }
Damjan Marionf62ed3f2021-10-27 17:28:26 +020029 for (int i = 0; i < n_elts; i += 16)
30 *((u32x16u *) (src + i)) &= mask16;
31 *((u32x16u *) (src + n_elts - 16)) &= mask16;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000032#elif defined(CLIB_HAVE_VEC256)
33 u32x8 mask8 = u32x8_splat (mask);
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000034#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
Damjan Marionf62ed3f2021-10-27 17:28:26 +020035 if (n_elts <= 8)
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000036 {
Damjan Marionf62ed3f2021-10-27 17:28:26 +020037 u32 m = pow2_mask (n_elts);
38 u32x8 r = u32x8_mask_load_zero (src, m);
39 u32x8_mask_store (r & mask8, src, m);
40 return;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000041 }
Damjan Marionf62ed3f2021-10-27 17:28:26 +020042#else
43 if (PREDICT_FALSE (n_elts < 4))
44 {
45 if (n_elts & 2)
46 {
47 src[0] &= mask;
48 src[1] &= mask;
49 src += 2;
50 }
51 if (n_elts & 1)
52 src[0] &= mask;
53 return;
54 }
55 if (n_elts <= 8)
56 {
57 u32x4 mask4 = u32x4_splat (mask);
58 *(u32x4u *) src &= mask4;
59 *(u32x4u *) (src + n_elts - 4) &= mask4;
60 }
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000061#endif
Damjan Marionf62ed3f2021-10-27 17:28:26 +020062
63 for (int i = 0; i < n_elts; i += 8)
64 *((u32x8u *) (src + i)) &= mask8;
65 *((u32x8u *) (src + n_elts - 8)) &= mask8;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000066#elif defined(CLIB_HAVE_VEC128)
67 u32x4 mask4 = u32x4_splat (mask);
68
Damjan Marionf62ed3f2021-10-27 17:28:26 +020069 if (PREDICT_FALSE (n_elts < 4))
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000070 {
Damjan Marionf62ed3f2021-10-27 17:28:26 +020071 if (n_elts & 2)
72 {
73 src[0] &= mask;
74 src[1] &= mask;
75 src += 2;
76 }
77 if (n_elts & 1)
78 src[0] &= mask;
79 return;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000080 }
Damjan Marionf62ed3f2021-10-27 17:28:26 +020081
82 for (int i = 0; i < n_elts; i += 4)
83 *((u32x4u *) (src + i)) &= mask4;
84 *((u32x4u *) (src + n_elts - 4)) &= mask4;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000085 return;
Damjan Marionf62ed3f2021-10-27 17:28:26 +020086#else
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000087 while (n_elts > 0)
88 {
89 src[0] &= mask;
90 src++;
91 n_elts--;
92 }
Damjan Marionf62ed3f2021-10-27 17:28:26 +020093#endif
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000094}
95
96#endif