blob: 3d4a82ac01b170734b50c13c59b53058f120e1d9 [file] [log] [blame]
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +00001/* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
3 */
4
5#ifndef included_vector_array_mask_h
6#define included_vector_array_mask_h
7#include <vppinfra/clib.h>
8
9/** \brief Mask array of 32-bit elemments
10
11 @param src source array of u32 elements
12 @param mask use to mask the values of source array
13 @param n_elts number of elements in the source array
14 @return masked values are return in source array
15*/
16
17static_always_inline void
18clib_array_mask_u32 (u32 *src, u32 mask, u32 n_elts)
19{
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000020#if defined(CLIB_HAVE_VEC512)
21 u32x16 mask16 = u32x16_splat (mask);
Damjan Marionf62ed3f2021-10-27 17:28:26 +020022 if (n_elts <= 16)
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000023 {
Damjan Marionf62ed3f2021-10-27 17:28:26 +020024 u32 m = pow2_mask (n_elts);
25 u32x16 r = u32x16_mask_load_zero (src, m);
26 u32x16_mask_store (r & mask16, src, m);
27 return;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000028 }
Dmitry Valtera2188912024-01-18 09:09:12 +000029 for (; n_elts >= 16; n_elts -= 16, src += 16)
30 *((u32x16u *) src) &= mask16;
Damjan Marionf62ed3f2021-10-27 17:28:26 +020031 *((u32x16u *) (src + n_elts - 16)) &= mask16;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000032#elif defined(CLIB_HAVE_VEC256)
33 u32x8 mask8 = u32x8_splat (mask);
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000034#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
Damjan Marionf62ed3f2021-10-27 17:28:26 +020035 if (n_elts <= 8)
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000036 {
Damjan Marionf62ed3f2021-10-27 17:28:26 +020037 u32 m = pow2_mask (n_elts);
38 u32x8 r = u32x8_mask_load_zero (src, m);
39 u32x8_mask_store (r & mask8, src, m);
40 return;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000041 }
Damjan Marionf62ed3f2021-10-27 17:28:26 +020042#else
43 if (PREDICT_FALSE (n_elts < 4))
44 {
45 if (n_elts & 2)
46 {
47 src[0] &= mask;
48 src[1] &= mask;
49 src += 2;
50 }
51 if (n_elts & 1)
52 src[0] &= mask;
53 return;
54 }
55 if (n_elts <= 8)
56 {
57 u32x4 mask4 = u32x4_splat (mask);
58 *(u32x4u *) src &= mask4;
59 *(u32x4u *) (src + n_elts - 4) &= mask4;
Dmitry Valter96604992024-03-06 17:17:54 +000060 return;
Damjan Marionf62ed3f2021-10-27 17:28:26 +020061 }
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000062#endif
Damjan Marionf62ed3f2021-10-27 17:28:26 +020063
Dmitry Valtera2188912024-01-18 09:09:12 +000064 for (; n_elts >= 8; n_elts -= 8, src += 8)
65 *((u32x8u *) src) &= mask8;
Damjan Marionf62ed3f2021-10-27 17:28:26 +020066 *((u32x8u *) (src + n_elts - 8)) &= mask8;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000067#elif defined(CLIB_HAVE_VEC128)
68 u32x4 mask4 = u32x4_splat (mask);
69
Damjan Marionf62ed3f2021-10-27 17:28:26 +020070 if (PREDICT_FALSE (n_elts < 4))
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000071 {
Damjan Marionf62ed3f2021-10-27 17:28:26 +020072 if (n_elts & 2)
73 {
74 src[0] &= mask;
75 src[1] &= mask;
76 src += 2;
77 }
78 if (n_elts & 1)
79 src[0] &= mask;
80 return;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000081 }
Damjan Marionf62ed3f2021-10-27 17:28:26 +020082
Dmitry Valtera2188912024-01-18 09:09:12 +000083 for (; n_elts >= 4; n_elts -= 4, src += 4)
84 *((u32x4u *) src) &= mask4;
Damjan Marionf62ed3f2021-10-27 17:28:26 +020085 *((u32x4u *) (src + n_elts - 4)) &= mask4;
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000086 return;
Damjan Marionf62ed3f2021-10-27 17:28:26 +020087#else
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000088 while (n_elts > 0)
89 {
90 src[0] &= mask;
91 src++;
92 n_elts--;
93 }
Damjan Marionf62ed3f2021-10-27 17:28:26 +020094#endif
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +000095}
96
Damjan Marion10672be2022-09-08 19:00:06 +020097static_always_inline void
98clib_array_mask_set_u32_x64 (u32 *a, u32 v, uword bmp, int n_elts)
99{
100#if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
101 u32x16 r = u32x16_splat (v);
102 for (; n_elts > 0; n_elts -= 16, a += 16, bmp >>= 16)
103 u32x16_mask_store (r, a, bmp);
104#elif defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
105 u32x8 r = u32x8_splat (v);
106 for (; n_elts > 0; n_elts -= 8, a += 8, bmp >>= 8)
107 u32x8_mask_store (r, a, bmp);
108#else
109 while (bmp)
110 {
111 a[get_lowest_set_bit_index (bmp)] = v;
112 bmp = clear_lowest_set_bit (bmp);
113 }
114#endif
115}
116
117static_always_inline void
118clib_array_mask_set_u32 (u32 *a, u32 v, uword *bmp, u32 n_elts)
119{
120 while (n_elts >= uword_bits)
121 {
122 clib_array_mask_set_u32_x64 (a, v, bmp++[0], uword_bits);
123 a += uword_bits;
124 n_elts -= uword_bits;
125 }
126
127 clib_array_mask_set_u32_x64 (a, v, bmp[0] & pow2_mask (n_elts), n_elts);
128}
129
Mohsin Kazmi0ec7dad2021-07-15 10:34:36 +0000130#endif