blob: 66c46f226aaebba97e2cdab97463d8a88990ed32 [file] [log] [blame]
Damjan Marionc5766222018-04-16 00:18:34 +02001/*
2 * Copyright (c) 2018 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#ifndef included_vector_avx2_h
17#define included_vector_avx2_h
18
19#include <vppinfra/clib.h>
20#include <x86intrin.h>
21
Damjan Mariona52e1662018-05-19 00:04:23 +020022/* *INDENT-OFF* */
Damjan Marionc5766222018-04-16 00:18:34 +020023#define foreach_avx2_vec256i \
24 _(i,8,32,epi8) _(i,16,16,epi16) _(i,32,8,epi32) _(i,64,4,epi64x)
25#define foreach_avx2_vec256u \
26 _(u,8,32,epi8) _(u,16,16,epi16) _(u,32,8,epi32) _(u,64,4,epi64x)
27#define foreach_avx2_vec256f \
28 _(f,32,8,ps) _(f,64,4,pd)
29
Damjan Mariona52e1662018-05-19 00:04:23 +020030/* splat, load_unaligned, store_unaligned, is_all_zero, is_equal,
31 is_all_equal */
Damjan Marionc5766222018-04-16 00:18:34 +020032#define _(t, s, c, i) \
33static_always_inline t##s##x##c \
34t##s##x##c##_splat (t##s x) \
35{ return (t##s##x##c) _mm256_set1_##i (x); } \
36\
37static_always_inline t##s##x##c \
38t##s##x##c##_load_unaligned (void *p) \
39{ return (t##s##x##c) _mm256_loadu_si256 (p); } \
40\
41static_always_inline void \
42t##s##x##c##_store_unaligned (t##s##x##c v, void *p) \
43{ _mm256_storeu_si256 ((__m256i *) p, (__m256i) v); } \
44\
45static_always_inline int \
46t##s##x##c##_is_all_zero (t##s##x##c x) \
47{ return _mm256_testz_si256 ((__m256i) x, (__m256i) x); } \
48\
49static_always_inline int \
Damjan Marion14864772018-05-22 14:07:47 +020050t##s##x##c##_is_equal (t##s##x##c a, t##s##x##c b) \
51{ return t##s##x##c##_is_all_zero (a ^ b); } \
Damjan Marionc5766222018-04-16 00:18:34 +020052\
Damjan Mariona52e1662018-05-19 00:04:23 +020053static_always_inline int \
54t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \
55{ return t##s##x##c##_is_equal (v, t##s##x##c##_splat (x)); }; \
Damjan Marionc5766222018-04-16 00:18:34 +020056
57foreach_avx2_vec256i foreach_avx2_vec256u
58#undef _
Damjan Mariona52e1662018-05-19 00:04:23 +020059/* *INDENT-ON* */
60
61always_inline u32x8
Damjan Marionc5766222018-04-16 00:18:34 +020062u32x8_permute (u32x8 v, u32x8 idx)
63{
64 return (u32x8) _mm256_permutevar8x32_epi32 ((__m256i) v, (__m256i) idx);
65}
66
Damjan Marion1cf9a162018-05-23 20:21:51 +020067/* _extract_lo, _extract_hi */
68/* *INDENT-OFF* */
69#define _(t1,t2) \
70always_inline t1 \
71t2##_extract_lo (t2 v) \
72{ return (t1) _mm256_extracti128_si256 ((__m256i) v, 0); } \
73\
74always_inline t1 \
75t2##_extract_hi (t2 v) \
76{ return (t1) _mm256_extracti128_si256 ((__m256i) v, 1); } \
77\
78always_inline t2 \
79t2##_insert_lo (t2 v1, t1 v2) \
80{ return (t2) _mm256_inserti128_si256 ((__m256i) v1, (__m128i) v2, 0); }\
81\
82always_inline t2 \
83t2##_insert_hi (t2 v1, t1 v2) \
84{ return (t2) _mm256_inserti128_si256 ((__m256i) v1, (__m128i) v2, 1); }\
Damjan Marionc5766222018-04-16 00:18:34 +020085
Damjan Marion1cf9a162018-05-23 20:21:51 +020086_(u8x16, u8x32)
87_(u16x8, u16x16)
88_(u32x4, u32x8)
89_(u64x2, u64x4)
90#undef _
91/* *INDENT-ON* */
Damjan Marionc5766222018-04-16 00:18:34 +020092
Damjan Marionee7f0bd2018-05-05 12:30:28 +020093
Damjan Marion1cf9a162018-05-23 20:21:51 +020094
Damjan Marionee7f0bd2018-05-05 12:30:28 +020095
Damjan Marion8c3f8a22018-05-17 21:12:13 +020096static_always_inline u32
97u8x32_msb_mask (u8x32 v)
98{
99 return _mm256_movemask_epi8 ((__m256i) v);
100}
101
Damjan Marionafe56de2018-05-17 12:44:00 +0200102/* _extend_to_ */
103/* *INDENT-OFF* */
104#define _(f,t,i) \
105static_always_inline t \
106f##_extend_to_##t (f x) \
107{ return (t) _mm256_cvt##i ((__m128i) x); }
108
109_(u16x8, u32x8, epu16_epi32)
110_(u16x8, u64x4, epu16_epi64)
111_(u32x4, u64x4, epu32_epi64)
112_(u8x16, u16x16, epu8_epi64)
113_(u8x16, u32x8, epu8_epi32)
114_(u8x16, u64x4, epu8_epi64)
115_(i16x8, i32x8, epi16_epi32)
116_(i16x8, i64x4, epi16_epi64)
117_(i32x4, i64x4, epi32_epi64)
118_(i8x16, i16x16, epi8_epi64)
119_(i8x16, i32x8, epi8_epi32)
120_(i8x16, i64x4, epi8_epi64)
121#undef _
122/* *INDENT-ON* */
123
Damjan Marionbf129f42018-06-27 13:03:26 +0200124static_always_inline u16x16
125u16x16_byte_swap (u16x16 v)
126{
127 u8x32 swap = {
128 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14,
129 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14
130 };
131 return (u16x16) _mm256_shuffle_epi8 ((__m256i) v, (__m256i) swap);
132}
133
134static_always_inline u32x8
135u32x8_hadd (u32x8 v1, u32x8 v2)
136{
137 return (u32x8) _mm256_hadd_epi32 ((__m256i) v1, (__m256i) v2);
138}
139
Damjan Marion08bca802018-06-18 22:21:40 +0200140static_always_inline u16x16
141u16x16_mask_last (u16x16 v, u8 n_last)
142{
143 const u16x16 masks[17] = {
144 {0},
145 {-1},
146 {-1, -1},
147 {-1, -1, -1},
148 {-1, -1, -1, -1},
149 {-1, -1, -1, -1, -1},
150 {-1, -1, -1, -1, -1, -1},
151 {-1, -1, -1, -1, -1, -1, -1},
152 {-1, -1, -1, -1, -1, -1, -1, -1},
153 {-1, -1, -1, -1, -1, -1, -1, -1, -1},
154 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
155 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
156 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
157 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
158 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
159 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
160 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
161 };
162
163 ASSERT (n_last < 17);
164
165 return v & masks[16 - n_last];
166}
167
Damjan Marionc5766222018-04-16 00:18:34 +0200168#endif /* included_vector_avx2_h */
169
170/*
171 * fd.io coding-style-patch-verification: ON
172 *
173 * Local Variables:
174 * eval: (c-set-style "gnu")
175 * End:
176 */