Blame - src/vppinfra/vector/index_to_ptr.h - fdio/vpp

blob: 3985b757d544046a13da289e852aec5ca76b4fe6 [file] [log] [blame]

Damjan Marion	4c53ff4	2021-10-28 23:03:04 +0200	[diff] [blame]	1	/* SPDX-License-Identifier: Apache-2.0
				2	* Copyright(c) 2021 Cisco Systems, Inc.
				3	*/
				4
				5	#ifndef included_vector_index_to_ptr_h
				6	#define included_vector_index_to_ptr_h
				7	#include <vppinfra/clib.h>
				8
				9	#ifdef CLIB_HAVE_VEC128
				10	static_always_inline void
				11	clib_index_to_ptr_u32x4 (u32 indices, void *ptrs, i32 i, u64x2 ov, u8 shift)
				12	{
				13	u32x4 iv4 = u32x4_load_unaligned (indices + i);
				14	u64x2 pv2;
				15	pv2 = u64x2_from_u32x4 (iv4);
				16	u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i);
				17	#ifdef __aarch64__
				18	pv2 = u64x2_from_u32x4_high (iv4);
				19	#else
				20	pv2 = u64x2_from_u32x4 ((u32x4) u8x16_word_shift_right (iv4, 8));
				21	#endif
				22	u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i + 2);
				23	}
				24	#endif
				25
				26	/** \brief Convert array of indices to pointers with base and shift
				27
				28	@param indices source array of u32 indices
				29	@param base base pointer
				30	@param shift numbers of bits to be shifted
				31	@param ptrs destinatin array of pointers
				32	@param n_elts number of elements in the source array
				33	*/
				34
				35	static_always_inline void
				36	clib_index_to_ptr_u32 (u32 indices, void base, u8 shift, void **ptrs,
				37	u32 n_elts)
				38	{
				39	#if defined CLIB_HAVE_VEC512
				40	if (n_elts >= 8)
				41	{
				42	u64x8 off = u64x8_splat ((u64) base);
				43	u64x8 b0, b1, b2, b3, b4, b5, b6, b7;
				44
				45	while (n_elts >= 64)
				46	{
				47	b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
				48	b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
				49	b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
				50	b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
				51	b4 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 32));
				52	b5 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 40));
				53	b6 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 48));
				54	b7 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 56));
				55	u64x8_store_unaligned ((b0 << shift) + off, ptrs);
				56	u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
				57	u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
				58	u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
				59	u64x8_store_unaligned ((b4 << shift) + off, ptrs + 32);
				60	u64x8_store_unaligned ((b5 << shift) + off, ptrs + 40);
				61	u64x8_store_unaligned ((b6 << shift) + off, ptrs + 48);
				62	u64x8_store_unaligned ((b7 << shift) + off, ptrs + 56);
				63	ptrs += 64;
				64	indices += 64;
				65	n_elts -= 64;
				66	}
				67
				68	if (n_elts == 0)
				69	return;
				70
				71	if (n_elts >= 32)
				72	{
				73	b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
				74	b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
				75	b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
				76	b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
				77	u64x8_store_unaligned ((b0 << shift) + off, ptrs);
				78	u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
				79	u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
				80	u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
				81	ptrs += 32;
				82	indices += 32;
				83	n_elts -= 32;
				84	}
				85	if (n_elts >= 16)
				86	{
				87	b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
				88	b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
				89	u64x8_store_unaligned ((b0 << shift) + off, ptrs);
				90	u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
				91	ptrs += 16;
				92	indices += 16;
				93	n_elts -= 16;
				94	}
Leyi Rong	767a9ab	2023-03-08 13:34:56 +0800	[diff] [blame]	95	if (n_elts >= 8)
Damjan Marion	4c53ff4	2021-10-28 23:03:04 +0200	[diff] [blame]	96	{
				97	b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
				98	u64x8_store_unaligned ((b0 << shift) + off, ptrs);
				99	ptrs += 8;
				100	indices += 8;
				101	n_elts -= 8;
				102	}
				103
Leyi Rong	767a9ab	2023-03-08 13:34:56 +0800	[diff] [blame]	104	if (n_elts == 0)
				105	return;
				106
Damjan Marion	4c53ff4	2021-10-28 23:03:04 +0200	[diff] [blame]	107	b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + n_elts - 8));
				108	u64x8_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 8);
				109	}
				110	else
				111	{
				112	u32 mask = pow2_mask (n_elts);
				113	u64x8 r = u64x8_from_u32x8 (u32x8_mask_load_zero (indices, mask));
				114	u64x8_mask_store ((r << shift) + u64x8_splat ((u64) base), ptrs, mask);
				115	return;
				116	}
				117	#elif defined CLIB_HAVE_VEC256
				118	if (n_elts >= 4)
				119	{
				120	u64x4 off = u64x4_splat ((u64) base);
				121	u64x4 b0, b1, b2, b3, b4, b5, b6, b7;
				122
				123	while (n_elts >= 32)
				124	{
				125	b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
				126	b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
				127	b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
				128	b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
				129	b4 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 16));
				130	b5 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 20));
				131	b6 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 24));
				132	b7 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 28));
				133	u64x4_store_unaligned ((b0 << shift) + off, ptrs);
				134	u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
				135	u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
				136	u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
				137	u64x4_store_unaligned ((b4 << shift) + off, ptrs + 16);
				138	u64x4_store_unaligned ((b5 << shift) + off, ptrs + 20);
				139	u64x4_store_unaligned ((b6 << shift) + off, ptrs + 24);
				140	u64x4_store_unaligned ((b7 << shift) + off, ptrs + 28);
				141	ptrs += 32;
				142	indices += 32;
				143	n_elts -= 32;
				144	}
				145
				146	if (n_elts == 0)
				147	return;
				148
				149	if (n_elts >= 16)
				150	{
				151	b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
				152	b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
				153	b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
				154	b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
				155	u64x4_store_unaligned ((b0 << shift) + off, ptrs);
				156	u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
				157	u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
				158	u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
				159	ptrs += 16;
				160	indices += 16;
				161	n_elts -= 16;
				162	}
				163	if (n_elts >= 8)
				164	{
				165	b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
				166	b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
				167	u64x4_store_unaligned ((b0 << shift) + off, ptrs);
				168	u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
				169	ptrs += 8;
				170	indices += 8;
				171	n_elts -= 8;
				172	}
				173	if (n_elts > 4)
				174	{
				175	b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
				176	u64x4_store_unaligned ((b0 << shift) + off, ptrs);
				177	ptrs += 4;
				178	indices += 4;
				179	n_elts -= 4;
				180	}
				181
				182	b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + n_elts - 4));
				183	u64x4_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 4);
				184	return;
				185	}
				186	#ifdef CLIB_HAVE_VEC256_MASK_LOAD_STORE
				187	else
				188	{
				189	u32 mask = pow2_mask (n_elts);
				190	u64x4 r = u64x4_from_u32x4 (u32x4_mask_load_zero (indices, mask));
				191	u64x4_mask_store ((r << shift) + u64x4_splat ((u64) base), ptrs, mask);
				192	return;
				193	}
				194	#endif
				195	#elif defined(CLIB_HAVE_VEC128)
				196	if (n_elts >= 4)
				197	{
				198	u64x2 ov = u64x2_splat ((u64) base);
				199	u32 i = (u32 ) indices;
				200	void p = (void ) ptrs;
				201	u32 n = n_elts;
				202
				203	while (n >= 32)
				204	{
				205	clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
				206	clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
				207	clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
				208	clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
				209	clib_index_to_ptr_u32x4 (indices, ptrs, 16, ov, shift);
				210	clib_index_to_ptr_u32x4 (indices, ptrs, 20, ov, shift);
				211	clib_index_to_ptr_u32x4 (indices, ptrs, 24, ov, shift);
				212	clib_index_to_ptr_u32x4 (indices, ptrs, 28, ov, shift);
				213	indices += 32;
				214	ptrs += 32;
				215	n -= 32;
				216	}
				217
				218	if (n == 0)
				219	return;
				220
				221	if (n >= 16)
				222	{
				223	clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
				224	clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
				225	clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
				226	clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
				227	indices += 16;
				228	ptrs += 16;
				229	n -= 16;
				230	}
				231
				232	if (n >= 8)
				233	{
				234	clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
				235	clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
				236	indices += 8;
				237	ptrs += 8;
				238	n -= 8;
				239	}
				240
				241	if (n > 4)
				242	clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
				243
				244	clib_index_to_ptr_u32x4 (i, p, n_elts - 4, ov, shift);
				245	return;
				246	}
				247	#endif
				248	while (n_elts)
				249	{
				250	ptrs[0] = base + ((u64) indices[0] << shift);
				251	ptrs += 1;
				252	indices += 1;
				253	n_elts -= 1;
				254	}
				255	}
				256
				257	#endif