blob: ba22d79560f581ec8c5e93085ff18d21c29fe879 [file] [log] [blame]
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +00001/* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
3 */
4
5#ifndef included_vector_array_mask_h
6#define included_vector_array_mask_h
7#include <vppinfra/clib.h>
8
9/** \brief Mask array of 32-bit elemments
10
11 @param src source array of u32 elements
12 @param mask use to mask the values of source array
13 @param n_elts number of elements in the source array
14 @return masked values are return in source array
15*/
16
17static_always_inline void
18clib_array_mask_u32 (u32 *src, u32 mask, u32 n_elts)
19{
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000020#if defined(CLIB_HAVE_VEC512)
21 u32x16 mask16 = u32x16_splat (mask);
Damjan Marionf62ed3f2021-10-27 17:28:26 +020022 if (n_elts <= 16)
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000023 {
Damjan Marionf62ed3f2021-10-27 17:28:26 +020024 u32 m = pow2_mask (n_elts);
25 u32x16 r = u32x16_mask_load_zero (src, m);
26 u32x16_mask_store (r & mask16, src, m);
27 return;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000028 }
Dmitry Valtera2188912024-01-18 09:09:12 +000029 for (; n_elts >= 16; n_elts -= 16, src += 16)
30 *((u32x16u *) src) &= mask16;
Damjan Marionf62ed3f2021-10-27 17:28:26 +020031 *((u32x16u *) (src + n_elts - 16)) &= mask16;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000032#elif defined(CLIB_HAVE_VEC256)
33 u32x8 mask8 = u32x8_splat (mask);
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000034#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
Damjan Marionf62ed3f2021-10-27 17:28:26 +020035 if (n_elts <= 8)
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000036 {
Damjan Marionf62ed3f2021-10-27 17:28:26 +020037 u32 m = pow2_mask (n_elts);
38 u32x8 r = u32x8_mask_load_zero (src, m);
39 u32x8_mask_store (r & mask8, src, m);
40 return;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000041 }
Damjan Marionf62ed3f2021-10-27 17:28:26 +020042#else
43 if (PREDICT_FALSE (n_elts < 4))
44 {
45 if (n_elts & 2)
46 {
47 src[0] &= mask;
48 src[1] &= mask;
49 src += 2;
50 }
51 if (n_elts & 1)
52 src[0] &= mask;
53 return;
54 }
55 if (n_elts <= 8)
56 {
57 u32x4 mask4 = u32x4_splat (mask);
58 *(u32x4u *) src &= mask4;
59 *(u32x4u *) (src + n_elts - 4) &= mask4;
60 }
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000061#endif
Damjan Marionf62ed3f2021-10-27 17:28:26 +020062
Dmitry Valtera2188912024-01-18 09:09:12 +000063 for (; n_elts >= 8; n_elts -= 8, src += 8)
64 *((u32x8u *) src) &= mask8;
Damjan Marionf62ed3f2021-10-27 17:28:26 +020065 *((u32x8u *) (src + n_elts - 8)) &= mask8;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000066#elif defined(CLIB_HAVE_VEC128)
67 u32x4 mask4 = u32x4_splat (mask);
68
Damjan Marionf62ed3f2021-10-27 17:28:26 +020069 if (PREDICT_FALSE (n_elts < 4))
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000070 {
Damjan Marionf62ed3f2021-10-27 17:28:26 +020071 if (n_elts & 2)
72 {
73 src[0] &= mask;
74 src[1] &= mask;
75 src += 2;
76 }
77 if (n_elts & 1)
78 src[0] &= mask;
79 return;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000080 }
Damjan Marionf62ed3f2021-10-27 17:28:26 +020081
Dmitry Valtera2188912024-01-18 09:09:12 +000082 for (; n_elts >= 4; n_elts -= 4, src += 4)
83 *((u32x4u *) src) &= mask4;
Damjan Marionf62ed3f2021-10-27 17:28:26 +020084 *((u32x4u *) (src + n_elts - 4)) &= mask4;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000085 return;
Damjan Marionf62ed3f2021-10-27 17:28:26 +020086#else
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000087 while (n_elts > 0)
88 {
89 src[0] &= mask;
90 src++;
91 n_elts--;
92 }
Damjan Marionf62ed3f2021-10-27 17:28:26 +020093#endif
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000094}
95
Damjan Marion10672be2022-09-08 19:00:06 +020096static_always_inline void
97clib_array_mask_set_u32_x64 (u32 *a, u32 v, uword bmp, int n_elts)
98{
99#if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
100 u32x16 r = u32x16_splat (v);
101 for (; n_elts > 0; n_elts -= 16, a += 16, bmp >>= 16)
102 u32x16_mask_store (r, a, bmp);
103#elif defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
104 u32x8 r = u32x8_splat (v);
105 for (; n_elts > 0; n_elts -= 8, a += 8, bmp >>= 8)
106 u32x8_mask_store (r, a, bmp);
107#else
108 while (bmp)
109 {
110 a[get_lowest_set_bit_index (bmp)] = v;
111 bmp = clear_lowest_set_bit (bmp);
112 }
113#endif
114}
115
116static_always_inline void
117clib_array_mask_set_u32 (u32 *a, u32 v, uword *bmp, u32 n_elts)
118{
119 while (n_elts >= uword_bits)
120 {
121 clib_array_mask_set_u32_x64 (a, v, bmp++[0], uword_bits);
122 a += uword_bits;
123 n_elts -= uword_bits;
124 }
125
126 clib_array_mask_set_u32_x64 (a, v, bmp[0] & pow2_mask (n_elts), n_elts);
127}
128
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +0000129#endif