blob: db09de0f04c304116f4ebc0434365bf9b837ed7d [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15/*
16 Copyright (c) 2008 Eliot Dresselhaus
17
18 Permission is hereby granted, free of charge, to any person obtaining
19 a copy of this software and associated documentation files (the
20 "Software"), to deal in the Software without restriction, including
21 without limitation the rights to use, copy, modify, merge, publish,
22 distribute, sublicense, and/or sell copies of the Software, and to
23 permit persons to whom the Software is furnished to do so, subject to
24 the following conditions:
25
26 The above copyright notice and this permission notice shall be
27 included in all copies or substantial portions of the Software.
28
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
36*/
37
38#ifndef included_vector_funcs_h
39#define included_vector_funcs_h
40
41#include <vppinfra/byte_order.h>
42
43/* Addition/subtraction. */
44#if CLIB_VECTOR_WORD_BITS == 128
45#define u8x_add u8x16_add
46#define u16x_add u16x8_add
47#define u32x_add u32x4_add
48#define u64x_add u64x2_add
49#define i8x_add i8x16_add
50#define i16x_add i16x8_add
51#define i32x_add i32x4_add
52#define i64x_add i64x2_add
53#define u8x_sub u8x16_sub
54#define u16x_sub u16x8_sub
55#define u32x_sub u32x4_sub
56#define u64x_sub u64x2_sub
57#define i8x_sub i8x16_sub
58#define i16x_sub i16x8_sub
59#define i32x_sub i32x4_sub
60#define i64x_sub i64x2_sub
61#endif
62
63#if CLIB_VECTOR_WORD_BITS == 64
64#define u8x_add u8x8_add
65#define u16x_add u16x4_add
66#define u32x_add u32x2_add
67#define i8x_add i8x8_add
68#define i16x_add i16x4_add
69#define i32x_add i32x2_add
70#define u8x_sub u8x8_sub
71#define u16x_sub u16x4_sub
72#define u32x_sub u32x2_sub
73#define i8x_sub i8x8_sub
74#define i16x_sub i16x4_sub
75#define i32x_sub i32x2_sub
76#endif
77
78/* Saturating addition/subtraction. */
79#if CLIB_VECTOR_WORD_BITS == 128
80#define u8x_add_saturate u8x16_add_saturate
81#define u16x_add_saturate u16x8_add_saturate
82#define i8x_add_saturate i8x16_add_saturate
83#define i16x_add_saturate i16x8_add_saturate
84#define u8x_sub_saturate u8x16_sub_saturate
85#define u16x_sub_saturate u16x8_sub_saturate
86#define i8x_sub_saturate i8x16_sub_saturate
87#define i16x_sub_saturate i16x8_sub_saturate
88#endif
89
90#if CLIB_VECTOR_WORD_BITS == 64
91#define u8x_add_saturate u8x8_add_saturate
92#define u16x_add_saturate u16x4_add_saturate
93#define i8x_add_saturate i8x8_add_saturate
94#define i16x_add_saturate i16x4_add_saturate
95#define u8x_sub_saturate u8x8_sub_saturate
96#define u16x_sub_saturate u16x4_sub_saturate
97#define i8x_sub_saturate i8x8_sub_saturate
98#define i16x_sub_saturate i16x4_sub_saturate
99#endif
100
101#define _vector_interleave(a,b,t) \
102do { \
103 t _tmp_lo = t##_interleave_lo (a, b); \
104 t _tmp_hi = t##_interleave_hi (a, b); \
105 if (CLIB_ARCH_IS_LITTLE_ENDIAN) \
106 (a) = _tmp_lo, (b) = _tmp_hi; \
107 else \
108 (a) = _tmp_hi, (b) = _tmp_lo; \
109} while (0)
110
111/* 128 bit interleaves. */
112#define u8x16_interleave(a,b) _vector_interleave(a,b,u8x16)
113#define i8x16_interleave(a,b) _vector_interleave(a,b,i8x16)
114#define u16x8_interleave(a,b) _vector_interleave(a,b,u16x8)
115#define i16x8_interleave(a,b) _vector_interleave(a,b,i16x8)
116#define u32x4_interleave(a,b) _vector_interleave(a,b,u32x4)
117#define i32x4_interleave(a,b) _vector_interleave(a,b,i32x4)
118#define u64x2_interleave(a,b) _vector_interleave(a,b,u64x2)
119#define i64x2_interleave(a,b) _vector_interleave(a,b,i64x2)
120
121/* 64 bit interleaves. */
122#define u8x8_interleave(a,b) _vector_interleave(a,b,u8x8)
123#define i8x8_interleave(a,b) _vector_interleave(a,b,i8x8)
124#define u16x4_interleave(a,b) _vector_interleave(a,b,u16x4)
125#define i16x4_interleave(a,b) _vector_interleave(a,b,i16x4)
126#define u32x2_interleave(a,b) _vector_interleave(a,b,u32x2)
127#define i32x2_interleave(a,b) _vector_interleave(a,b,i32x2)
128
129/* Word sized interleaves. */
130#if CLIB_VECTOR_WORD_BITS == 128
131#define u8x_interleave u8x16_interleave
132#define u16x_interleave u16x8_interleave
133#define u32x_interleave u32x4_interleave
134#define u64x_interleave u64x2_interleave
135#endif
136
137#if CLIB_VECTOR_WORD_BITS == 64
138#define u8x_interleave u8x8_interleave
139#define u16x_interleave u16x4_interleave
140#define u32x_interleave u32x2_interleave
Dave Barachc3799992016-08-15 11:12:27 -0400141#define u64x_interleave(a,b) /* do nothing */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700142#endif
143
144/* Vector word sized shifts. */
145#if CLIB_VECTOR_WORD_BITS == 128
146#define u8x_shift_left u8x16_shift_left
147#define i8x_shift_left i8x16_shift_left
148#define u16x_shift_left u16x8_shift_left
149#define i16x_shift_left i16x8_shift_left
150#define u32x_shift_left u32x4_shift_left
151#define i32x_shift_left i32x4_shift_left
152#define u64x_shift_left u64x2_shift_left
153#define i64x_shift_left i64x2_shift_left
154#define u8x_shift_right u8x16_shift_right
155#define i8x_shift_right i8x16_shift_right
156#define u16x_shift_right u16x8_shift_right
157#define i16x_shift_right i16x8_shift_right
158#define u32x_shift_right u32x4_shift_right
159#define i32x_shift_right i32x4_shift_right
160#define u64x_shift_right u64x2_shift_right
161#define i64x_shift_right i64x2_shift_right
162#define u8x_rotate_left u8x16_rotate_left
163#define i8x_rotate_left i8x16_rotate_left
164#define u16x_rotate_left u16x8_rotate_left
165#define i16x_rotate_left i16x8_rotate_left
166#define u32x_rotate_left u32x4_rotate_left
167#define i32x_rotate_left i32x4_rotate_left
168#define u64x_rotate_left u64x2_rotate_left
169#define i64x_rotate_left i64x2_rotate_left
170#define u8x_rotate_right u8x16_rotate_right
171#define i8x_rotate_right i8x16_rotate_right
172#define u16x_rotate_right u16x8_rotate_right
173#define i16x_rotate_right i16x8_rotate_right
174#define u32x_rotate_right u32x4_rotate_right
175#define i32x_rotate_right i32x4_rotate_right
176#define u64x_rotate_right u64x2_rotate_right
177#define i64x_rotate_right i64x2_rotate_right
178#define u8x_ishift_left u8x16_ishift_left
179#define i8x_ishift_left i8x16_ishift_left
180#define u16x_ishift_left u16x8_ishift_left
181#define i16x_ishift_left i16x8_ishift_left
182#define u32x_ishift_left u32x4_ishift_left
183#define i32x_ishift_left i32x4_ishift_left
184#define u64x_ishift_left u64x2_ishift_left
185#define i64x_ishift_left i64x2_ishift_left
186#define u8x_ishift_right u8x16_ishift_right
187#define i8x_ishift_right i8x16_ishift_right
188#define u16x_ishift_right u16x8_ishift_right
189#define i16x_ishift_right i16x8_ishift_right
190#define u32x_ishift_right u32x4_ishift_right
191#define i32x_ishift_right i32x4_ishift_right
192#define u64x_ishift_right u64x2_ishift_right
193#define i64x_ishift_right i64x2_ishift_right
194#define u8x_irotate_left u8x16_irotate_left
195#define i8x_irotate_left i8x16_irotate_left
196#define u16x_irotate_left u16x8_irotate_left
197#define i16x_irotate_left i16x8_irotate_left
198#define u32x_irotate_left u32x4_irotate_left
199#define i32x_irotate_left i32x4_irotate_left
200#define u64x_irotate_left u64x2_irotate_left
201#define i64x_irotate_left i64x2_irotate_left
202#define u8x_irotate_right u8x16_irotate_right
203#define i8x_irotate_right i8x16_irotate_right
204#define u16x_irotate_right u16x8_irotate_right
205#define i16x_irotate_right i16x8_irotate_right
206#define u32x_irotate_right u32x4_irotate_right
207#define i32x_irotate_right i32x4_irotate_right
208#define u64x_irotate_right u64x2_irotate_right
209#define i64x_irotate_right i64x2_irotate_right
210#endif
211
212#if CLIB_VECTOR_WORD_BITS == 64
213#define u8x_shift_left u8x8_shift_left
214#define i8x_shift_left i8x8_shift_left
215#define u16x_shift_left u16x4_shift_left
216#define i16x_shift_left i16x4_shift_left
217#define u32x_shift_left u32x2_shift_left
218#define i32x_shift_left i32x2_shift_left
219#define u8x_shift_right u8x8_shift_right
220#define i8x_shift_right i8x8_shift_right
221#define u16x_shift_right u16x4_shift_right
222#define i16x_shift_right i16x4_shift_right
223#define u32x_shift_right u32x2_shift_right
224#define i32x_shift_right i32x2_shift_right
225#define u8x_rotate_left u8x8_rotate_left
226#define i8x_rotate_left i8x8_rotate_left
227#define u16x_rotate_left u16x4_rotate_left
228#define i16x_rotate_left i16x4_rotate_left
229#define u32x_rotate_left u32x2_rotate_left
230#define i32x_rotate_left i32x2_rotate_left
231#define u8x_rotate_right u8x8_rotate_right
232#define i8x_rotate_right i8x8_rotate_right
233#define u16x_rotate_right u16x4_rotate_right
234#define i16x_rotate_right i16x4_rotate_right
235#define u32x_rotate_right u32x2_rotate_right
236#define i32x_rotate_right i32x2_rotate_right
237#define u8x_ishift_left u8x8_ishift_left
238#define i8x_ishift_left i8x8_ishift_left
239#define u16x_ishift_left u16x4_ishift_left
240#define i16x_ishift_left i16x4_ishift_left
241#define u32x_ishift_left u32x2_ishift_left
242#define i32x_ishift_left i32x2_ishift_left
243#define u8x_ishift_right u8x8_ishift_right
244#define i8x_ishift_right i8x8_ishift_right
245#define u16x_ishift_right u16x4_ishift_right
246#define i16x_ishift_right i16x4_ishift_right
247#define u32x_ishift_right u32x2_ishift_right
248#define i32x_ishift_right i32x2_ishift_right
249#define u8x_irotate_left u8x8_irotate_left
250#define i8x_irotate_left i8x8_irotate_left
251#define u16x_irotate_left u16x4_irotate_left
252#define i16x_irotate_left i16x4_irotate_left
253#define u32x_irotate_left u32x2_irotate_left
254#define i32x_irotate_left i32x2_irotate_left
255#define u8x_irotate_right u8x8_irotate_right
256#define i8x_irotate_right i8x8_irotate_right
257#define u16x_irotate_right u16x4_irotate_right
258#define i16x_irotate_right i16x4_irotate_right
259#define u32x_irotate_right u32x2_irotate_right
260#define i32x_irotate_right i32x2_irotate_right
261#endif
262
263#if CLIB_VECTOR_WORD_BITS == 128
264#define u8x_splat u8x16_splat
265#define i8x_splat i8x16_splat
266#define u16x_splat u16x8_splat
267#define i16x_splat i16x8_splat
268#define u32x_splat u32x4_splat
269#define i32x_splat i32x4_splat
270#define u64x_splat u64x2_splat
271#define i64x_splat i64x2_splat
272#endif
273
274#if CLIB_VECTOR_WORD_BITS == 64
275#define u8x_splat u8x8_splat
276#define i8x_splat i8x8_splat
277#define u16x_splat u16x4_splat
278#define i16x_splat i16x4_splat
279#define u32x_splat u32x2_splat
280#define i32x_splat i32x2_splat
281#endif
282
283#define u32x4_transpose_step(x,y) \
284do { \
285 u32x4 _x = (x); \
286 u32x4 _y = (y); \
287 (x) = u32x4_interleave_lo (_x, _y); \
288 (y) = u32x4_interleave_hi (_x, _y); \
289} while (0)
290
291/* 4x4 transpose: x_ij -> x_ji */
292#define u32x4_transpose(x0,x1,x2,x3) \
293do { \
294 u32x4 _x0 = (u32x4) (x0); \
295 u32x4 _x1 = (u32x4) (x1); \
296 u32x4 _x2 = (u32x4) (x2); \
297 u32x4 _x3 = (u32x4) (x3); \
298 u32x4_transpose_step (_x0, _x2); \
299 u32x4_transpose_step (_x1, _x3); \
300 u32x4_transpose_step (_x0, _x1); \
301 u32x4_transpose_step (_x2, _x3); \
302 (x0) = (u32x4) _x0; \
303 (x1) = (u32x4) _x1; \
304 (x2) = (u32x4) _x2; \
305 (x3) = (u32x4) _x3; \
306} while (0)
307
308#define i32x4_transpose(x0,x1,x2,x3) \
309do { \
310 u32x4 _x0 = (u32x4) (x0); \
311 u32x4 _x1 = (u32x4) (x1); \
312 u32x4 _x2 = (u32x4) (x2); \
313 u32x4 _x3 = (u32x4) (x3); \
314 u32x4_transpose_step (_x0, _x2); \
315 u32x4_transpose_step (_x1, _x3); \
316 u32x4_transpose_step (_x0, _x1); \
317 u32x4_transpose_step (_x2, _x3); \
318 (x0) = (i32x4) _x0; \
319 (x1) = (i32x4) _x1; \
320 (x2) = (i32x4) _x2; \
321 (x3) = (i32x4) _x3; \
322} while (0)
323
324#undef _
325
326#endif /* included_vector_funcs_h */
Dave Barachc3799992016-08-15 11:12:27 -0400327
328/*
329 * fd.io coding-style-patch-verification: ON
330 *
331 * Local Variables:
332 * eval: (c-set-style "gnu")
333 * End:
334 */