blob: 34ce78684565f9ee9a5ae858e6506b8f41cbb346 [file] [log] [blame]
Chenmin Sun7f837382020-03-28 00:34:19 +08001From d338aa7cb45638b3a14177a8d83ef01c4ec20d1b Mon Sep 17 00:00:00 2001
2From: Leyi Rong <leyi.rong@intel.com>
3Date: Wed, 8 Apr 2020 14:22:09 +0800
4Subject: [DPDK 14/17] net/iavf: add RSS hash parsing in AVX path
5
6Support RSS hash parsing from Flex Rx
7descriptor in AVX data path.
8
9Signed-off-by: Leyi Rong <leyi.rong@intel.com>
10---
11 drivers/net/iavf/iavf_rxtx_vec_avx2.c | 92 ++++++++++++++++++++++++++-
12 1 file changed, 90 insertions(+), 2 deletions(-)
13
14diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
15index 3bf5833fa..22f1b7887 100644
16--- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c
17+++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c
18@@ -698,7 +698,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
19 _mm256_set_epi8
20 (/* first descriptor */
21 0xFF, 0xFF,
22- 0xFF, 0xFF, /* rss not supported */
23+ 0xFF, 0xFF, /* rss hash parsed separately */
24 11, 10, /* octet 10~11, 16 bits vlan_macip */
25 5, 4, /* octet 4~5, 16 bits data_len */
26 0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */
27@@ -707,7 +707,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
28 0xFF, 0xFF, /*pkt_type set as unknown */
29 /* second descriptor */
30 0xFF, 0xFF,
31- 0xFF, 0xFF, /* rss not supported */
32+ 0xFF, 0xFF, /* rss hash parsed separately */
33 11, 10, /* octet 10~11, 16 bits vlan_macip */
34 5, 4, /* octet 4~5, 16 bits data_len */
35 0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */
36@@ -994,6 +994,94 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq,
37 _mm256_extract_epi32(fdir_id0_7, 4);
38 } /* if() on fdir_enabled */
39
40+ /**
41+ * needs to load 2nd 16B of each desc for RSS hash parsing,
42+ * will cause performance drop to get into this context.
43+ */
44+ if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads &
45+ DEV_RX_OFFLOAD_RSS_HASH) {
46+ /* load bottom half of every 32B desc */
47+ const __m128i raw_desc_bh7 =
48+ _mm_load_si128
49+ ((void *)(&rxdp[7].wb.status_error1));
50+ rte_compiler_barrier();
51+ const __m128i raw_desc_bh6 =
52+ _mm_load_si128
53+ ((void *)(&rxdp[6].wb.status_error1));
54+ rte_compiler_barrier();
55+ const __m128i raw_desc_bh5 =
56+ _mm_load_si128
57+ ((void *)(&rxdp[5].wb.status_error1));
58+ rte_compiler_barrier();
59+ const __m128i raw_desc_bh4 =
60+ _mm_load_si128
61+ ((void *)(&rxdp[4].wb.status_error1));
62+ rte_compiler_barrier();
63+ const __m128i raw_desc_bh3 =
64+ _mm_load_si128
65+ ((void *)(&rxdp[3].wb.status_error1));
66+ rte_compiler_barrier();
67+ const __m128i raw_desc_bh2 =
68+ _mm_load_si128
69+ ((void *)(&rxdp[2].wb.status_error1));
70+ rte_compiler_barrier();
71+ const __m128i raw_desc_bh1 =
72+ _mm_load_si128
73+ ((void *)(&rxdp[1].wb.status_error1));
74+ rte_compiler_barrier();
75+ const __m128i raw_desc_bh0 =
76+ _mm_load_si128
77+ ((void *)(&rxdp[0].wb.status_error1));
78+
79+ __m256i raw_desc_bh6_7 =
80+ _mm256_inserti128_si256
81+ (_mm256_castsi128_si256(raw_desc_bh6),
82+ raw_desc_bh7, 1);
83+ __m256i raw_desc_bh4_5 =
84+ _mm256_inserti128_si256
85+ (_mm256_castsi128_si256(raw_desc_bh4),
86+ raw_desc_bh5, 1);
87+ __m256i raw_desc_bh2_3 =
88+ _mm256_inserti128_si256
89+ (_mm256_castsi128_si256(raw_desc_bh2),
90+ raw_desc_bh3, 1);
91+ __m256i raw_desc_bh0_1 =
92+ _mm256_inserti128_si256
93+ (_mm256_castsi128_si256(raw_desc_bh0),
94+ raw_desc_bh1, 1);
95+
96+ /**
97+ * to shift the 32b RSS hash value to the
98+ * highest 32b of each 128b before mask
99+ */
100+ __m256i rss_hash6_7 =
101+ _mm256_slli_epi64(raw_desc_bh6_7, 32);
102+ __m256i rss_hash4_5 =
103+ _mm256_slli_epi64(raw_desc_bh4_5, 32);
104+ __m256i rss_hash2_3 =
105+ _mm256_slli_epi64(raw_desc_bh2_3, 32);
106+ __m256i rss_hash0_1 =
107+ _mm256_slli_epi64(raw_desc_bh0_1, 32);
108+
109+ __m256i rss_hash_msk =
110+ _mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
111+ 0xFFFFFFFF, 0, 0, 0);
112+
113+ rss_hash6_7 = _mm256_and_si256
114+ (rss_hash6_7, rss_hash_msk);
115+ rss_hash4_5 = _mm256_and_si256
116+ (rss_hash4_5, rss_hash_msk);
117+ rss_hash2_3 = _mm256_and_si256
118+ (rss_hash2_3, rss_hash_msk);
119+ rss_hash0_1 = _mm256_and_si256
120+ (rss_hash0_1, rss_hash_msk);
121+
122+ mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
123+ mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
124+ mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
125+ mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
126+ } /* if() on RSS hash parsing */
127+
128 /**
129 * At this point, we have the 8 sets of flags in the low 16-bits
130 * of each 32-bit value in vlan0.
131--
1322.17.1
133