Chenmin Sun | 7f83738 | 2020-03-28 00:34:19 +0800 | [diff] [blame] | 1 | From d338aa7cb45638b3a14177a8d83ef01c4ec20d1b Mon Sep 17 00:00:00 2001 |
| 2 | From: Leyi Rong <leyi.rong@intel.com> |
| 3 | Date: Wed, 8 Apr 2020 14:22:09 +0800 |
| 4 | Subject: [DPDK 14/17] net/iavf: add RSS hash parsing in AVX path |
| 5 | |
| 6 | Support RSS hash parsing from Flex Rx |
| 7 | descriptor in AVX data path. |
| 8 | |
| 9 | Signed-off-by: Leyi Rong <leyi.rong@intel.com> |
| 10 | --- |
| 11 | drivers/net/iavf/iavf_rxtx_vec_avx2.c | 92 ++++++++++++++++++++++++++- |
| 12 | 1 file changed, 90 insertions(+), 2 deletions(-) |
| 13 | |
| 14 | diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c |
| 15 | index 3bf5833fa..22f1b7887 100644 |
| 16 | --- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c |
| 17 | +++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c |
| 18 | @@ -698,7 +698,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, |
| 19 | _mm256_set_epi8 |
| 20 | (/* first descriptor */ |
| 21 | 0xFF, 0xFF, |
| 22 | - 0xFF, 0xFF, /* rss not supported */ |
| 23 | + 0xFF, 0xFF, /* rss hash parsed separately */ |
| 24 | 11, 10, /* octet 10~11, 16 bits vlan_macip */ |
| 25 | 5, 4, /* octet 4~5, 16 bits data_len */ |
| 26 | 0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */ |
| 27 | @@ -707,7 +707,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, |
| 28 | 0xFF, 0xFF, /*pkt_type set as unknown */ |
| 29 | /* second descriptor */ |
| 30 | 0xFF, 0xFF, |
| 31 | - 0xFF, 0xFF, /* rss not supported */ |
| 32 | + 0xFF, 0xFF, /* rss hash parsed separately */ |
| 33 | 11, 10, /* octet 10~11, 16 bits vlan_macip */ |
| 34 | 5, 4, /* octet 4~5, 16 bits data_len */ |
| 35 | 0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */ |
| 36 | @@ -994,6 +994,94 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, |
| 37 | _mm256_extract_epi32(fdir_id0_7, 4); |
| 38 | } /* if() on fdir_enabled */ |
| 39 | |
| 40 | + /** |
| 41 | + * needs to load 2nd 16B of each desc for RSS hash parsing, |
| 42 | + * will cause performance drop to get into this context. |
| 43 | + */ |
| 44 | + if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads & |
| 45 | + DEV_RX_OFFLOAD_RSS_HASH) { |
| 46 | + /* load bottom half of every 32B desc */ |
| 47 | + const __m128i raw_desc_bh7 = |
| 48 | + _mm_load_si128 |
| 49 | + ((void *)(&rxdp[7].wb.status_error1)); |
| 50 | + rte_compiler_barrier(); |
| 51 | + const __m128i raw_desc_bh6 = |
| 52 | + _mm_load_si128 |
| 53 | + ((void *)(&rxdp[6].wb.status_error1)); |
| 54 | + rte_compiler_barrier(); |
| 55 | + const __m128i raw_desc_bh5 = |
| 56 | + _mm_load_si128 |
| 57 | + ((void *)(&rxdp[5].wb.status_error1)); |
| 58 | + rte_compiler_barrier(); |
| 59 | + const __m128i raw_desc_bh4 = |
| 60 | + _mm_load_si128 |
| 61 | + ((void *)(&rxdp[4].wb.status_error1)); |
| 62 | + rte_compiler_barrier(); |
| 63 | + const __m128i raw_desc_bh3 = |
| 64 | + _mm_load_si128 |
| 65 | + ((void *)(&rxdp[3].wb.status_error1)); |
| 66 | + rte_compiler_barrier(); |
| 67 | + const __m128i raw_desc_bh2 = |
| 68 | + _mm_load_si128 |
| 69 | + ((void *)(&rxdp[2].wb.status_error1)); |
| 70 | + rte_compiler_barrier(); |
| 71 | + const __m128i raw_desc_bh1 = |
| 72 | + _mm_load_si128 |
| 73 | + ((void *)(&rxdp[1].wb.status_error1)); |
| 74 | + rte_compiler_barrier(); |
| 75 | + const __m128i raw_desc_bh0 = |
| 76 | + _mm_load_si128 |
| 77 | + ((void *)(&rxdp[0].wb.status_error1)); |
| 78 | + |
| 79 | + __m256i raw_desc_bh6_7 = |
| 80 | + _mm256_inserti128_si256 |
| 81 | + (_mm256_castsi128_si256(raw_desc_bh6), |
| 82 | + raw_desc_bh7, 1); |
| 83 | + __m256i raw_desc_bh4_5 = |
| 84 | + _mm256_inserti128_si256 |
| 85 | + (_mm256_castsi128_si256(raw_desc_bh4), |
| 86 | + raw_desc_bh5, 1); |
| 87 | + __m256i raw_desc_bh2_3 = |
| 88 | + _mm256_inserti128_si256 |
| 89 | + (_mm256_castsi128_si256(raw_desc_bh2), |
| 90 | + raw_desc_bh3, 1); |
| 91 | + __m256i raw_desc_bh0_1 = |
| 92 | + _mm256_inserti128_si256 |
| 93 | + (_mm256_castsi128_si256(raw_desc_bh0), |
| 94 | + raw_desc_bh1, 1); |
| 95 | + |
| 96 | + /** |
| 97 | + * to shift the 32b RSS hash value to the |
| 98 | + * highest 32b of each 128b before mask |
| 99 | + */ |
| 100 | + __m256i rss_hash6_7 = |
| 101 | + _mm256_slli_epi64(raw_desc_bh6_7, 32); |
| 102 | + __m256i rss_hash4_5 = |
| 103 | + _mm256_slli_epi64(raw_desc_bh4_5, 32); |
| 104 | + __m256i rss_hash2_3 = |
| 105 | + _mm256_slli_epi64(raw_desc_bh2_3, 32); |
| 106 | + __m256i rss_hash0_1 = |
| 107 | + _mm256_slli_epi64(raw_desc_bh0_1, 32); |
| 108 | + |
| 109 | + __m256i rss_hash_msk = |
| 110 | + _mm256_set_epi32(0xFFFFFFFF, 0, 0, 0, |
| 111 | + 0xFFFFFFFF, 0, 0, 0); |
| 112 | + |
| 113 | + rss_hash6_7 = _mm256_and_si256 |
| 114 | + (rss_hash6_7, rss_hash_msk); |
| 115 | + rss_hash4_5 = _mm256_and_si256 |
| 116 | + (rss_hash4_5, rss_hash_msk); |
| 117 | + rss_hash2_3 = _mm256_and_si256 |
| 118 | + (rss_hash2_3, rss_hash_msk); |
| 119 | + rss_hash0_1 = _mm256_and_si256 |
| 120 | + (rss_hash0_1, rss_hash_msk); |
| 121 | + |
| 122 | + mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7); |
| 123 | + mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5); |
| 124 | + mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3); |
| 125 | + mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1); |
| 126 | + } /* if() on RSS hash parsing */ |
| 127 | + |
| 128 | /** |
| 129 | * At this point, we have the 8 sets of flags in the low 16-bits |
| 130 | * of each 32-bit value in vlan0. |
| 131 | -- |
| 132 | 2.17.1 |
| 133 | |