Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 1 | /* |
| 2 | *------------------------------------------------------------------ |
| 3 | * Copyright (c) 2019 Cisco and/or its affiliates. |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at: |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | *------------------------------------------------------------------ |
| 16 | */ |
| 17 | |
| 18 | /* |
| 19 | *------------------------------------------------------------------ |
| 20 | * Copyright(c) 2018, Intel Corporation All rights reserved. |
| 21 | * |
| 22 | * Redistribution and use in source and binary forms, with or without |
| 23 | * modification, are permitted provided that the following conditions |
| 24 | * are met: |
| 25 | * * Redistributions of source code must retain the above copyright |
| 26 | * notice, this list of conditions and the following disclaimer. |
| 27 | * * Redistributions in binary form must reproduce the above copyright |
| 28 | * notice, this list of conditions and the following disclaimer in |
| 29 | * the documentation and/or other materials provided with the |
| 30 | * distribution. |
| 31 | * * Neither the name of Intel Corporation nor the names of its |
| 32 | * contributors may be used to endorse or promote products derived |
| 33 | * from this software without specific prior written permission. |
| 34 | * |
| 35 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 36 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 37 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 38 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 39 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 40 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 41 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES * LOSS OF USE, |
| 42 | * DATA, OR PROFITS * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 43 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 44 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 45 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 46 | *------------------------------------------------------------------ |
| 47 | */ |
| 48 | |
| 49 | /* |
| 50 | * Based on work by: Shay Gueron, Michael E. Kounavis, Erdinc Ozturk, |
| 51 | * Vinodh Gopal, James Guilford, Tomasz Kantecki |
| 52 | * |
| 53 | * References: |
| 54 | * [1] Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation on |
| 55 | * Intel Architecture Processors. August, 2010 |
| 56 | * [2] Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode on |
| 57 | * Intel Architecture Processors. October, 2012. |
| 58 | * [3] intel-ipsec-mb library, https://github.com/01org/intel-ipsec-mb.git |
| 59 | * |
| 60 | * Definitions: |
| 61 | * GF Galois Extension Field GF(2^128) - finite field where elements are |
| 62 | * represented as polynomials with coefficients in GF(2) with the |
| 63 | * highest degree of 127. Polynomials are represented as 128-bit binary |
| 64 | * numbers where each bit represents one coefficient. |
| 65 | * e.g. polynomial x^5 + x^3 + x + 1 is represented in binary 101011. |
| 66 | * H hash key (128 bit) |
| 67 | * POLY irreducible polynomial x^127 + x^7 + x^2 + x + 1 |
| 68 | * RPOLY irreducible polynomial x^128 + x^127 + x^126 + x^121 + 1 |
| 69 | * + addition in GF, which equals to XOR operation |
| 70 | * * multiplication in GF |
| 71 | * |
| 72 | * GF multiplication consists of 2 steps: |
| 73 | * - carry-less multiplication of two 128-bit operands into 256-bit result |
| 74 | * - reduction of 256-bit result into 128-bit with modulo POLY |
| 75 | * |
| 76 | * GHash is calculated on 128-bit blocks of data according to the following |
| 77 | * formula: |
| 78 | * GH = (GH + data) * hash_key |
| 79 | * |
| 80 | * To avoid bit-reflection of data, this code uses GF multipication |
| 81 | * with reversed polynomial: |
| 82 | * a * b * x^-127 mod RPOLY |
| 83 | * |
| 84 | * To improve computation speed table Hi is precomputed with powers of H', |
| 85 | * where H' is calculated as H<<1 mod RPOLY. |
| 86 | * This allows us to improve performance by deferring reduction. For example |
| 87 | * to caclulate ghash of 4 128-bit blocks of data (b0, b1, b2, b3), we can do: |
| 88 | * |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 89 | * u8x16 Hi[4]; |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 90 | * ghash_precompute (H, Hi, 4); |
| 91 | * |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 92 | * ghash_ctx_t _gd, *gd = &_gd; |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 93 | * ghash_mul_first (gd, GH ^ b0, Hi[3]); |
| 94 | * ghash_mul_next (gd, b1, Hi[2]); |
| 95 | * ghash_mul_next (gd, b2, Hi[1]); |
| 96 | * ghash_mul_next (gd, b3, Hi[0]); |
| 97 | * ghash_reduce (gd); |
| 98 | * ghash_reduce2 (gd); |
| 99 | * GH = ghash_final (gd); |
| 100 | * |
| 101 | * Reduction step is split into 3 functions so it can be better interleaved |
| 102 | * with other code, (i.e. with AES computation). |
| 103 | */ |
| 104 | |
| 105 | #ifndef __ghash_h__ |
| 106 | #define __ghash_h__ |
| 107 | |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 108 | static_always_inline u8x16 |
| 109 | gmul_lo_lo (u8x16 a, u8x16 b) |
| 110 | { |
Damjan Marion | 11da575 | 2020-02-12 20:12:55 +0100 | [diff] [blame] | 111 | #if defined (__PCLMUL__) |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 112 | return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x00); |
Damjan Marion | 11da575 | 2020-02-12 20:12:55 +0100 | [diff] [blame] | 113 | #elif defined (__ARM_FEATURE_CRYPTO) |
| 114 | return (u8x16) vmull_p64 ((poly64_t) vget_low_p64 ((poly64x2_t) a), |
| 115 | (poly64_t) vget_low_p64 ((poly64x2_t) b)); |
| 116 | #endif |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 117 | } |
| 118 | |
| 119 | static_always_inline u8x16 |
Damjan Marion | 8727acd | 2020-02-12 18:30:17 +0100 | [diff] [blame] | 120 | gmul_hi_lo (u8x16 a, u8x16 b) |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 121 | { |
Damjan Marion | 11da575 | 2020-02-12 20:12:55 +0100 | [diff] [blame] | 122 | #if defined (__PCLMUL__) |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 123 | return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x01); |
Damjan Marion | 11da575 | 2020-02-12 20:12:55 +0100 | [diff] [blame] | 124 | #elif defined (__ARM_FEATURE_CRYPTO) |
| 125 | return (u8x16) vmull_p64 ((poly64_t) vget_high_p64 ((poly64x2_t) a), |
| 126 | (poly64_t) vget_low_p64 ((poly64x2_t) b)); |
| 127 | #endif |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 128 | } |
| 129 | |
| 130 | static_always_inline u8x16 |
Damjan Marion | 8727acd | 2020-02-12 18:30:17 +0100 | [diff] [blame] | 131 | gmul_lo_hi (u8x16 a, u8x16 b) |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 132 | { |
Damjan Marion | 11da575 | 2020-02-12 20:12:55 +0100 | [diff] [blame] | 133 | #if defined (__PCLMUL__) |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 134 | return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x10); |
Damjan Marion | 11da575 | 2020-02-12 20:12:55 +0100 | [diff] [blame] | 135 | #elif defined (__ARM_FEATURE_CRYPTO) |
| 136 | return (u8x16) vmull_p64 ((poly64_t) vget_low_p64 ((poly64x2_t) a), |
| 137 | (poly64_t) vget_high_p64 ((poly64x2_t) b)); |
| 138 | #endif |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 139 | } |
| 140 | |
| 141 | static_always_inline u8x16 |
| 142 | gmul_hi_hi (u8x16 a, u8x16 b) |
| 143 | { |
Damjan Marion | 11da575 | 2020-02-12 20:12:55 +0100 | [diff] [blame] | 144 | #if defined (__PCLMUL__) |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 145 | return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x11); |
Damjan Marion | 11da575 | 2020-02-12 20:12:55 +0100 | [diff] [blame] | 146 | #elif defined (__ARM_FEATURE_CRYPTO) |
| 147 | return (u8x16) vmull_high_p64 ((poly64x2_t) a, (poly64x2_t) b); |
| 148 | #endif |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 149 | } |
| 150 | |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 151 | typedef struct |
| 152 | { |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 153 | u8x16 mid, hi, lo, tmp_lo, tmp_hi; |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 154 | u8x32 hi2, lo2, mid2, tmp_lo2, tmp_hi2; |
| 155 | u8x64 hi4, lo4, mid4, tmp_lo4, tmp_hi4; |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 156 | int pending; |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 157 | } ghash_ctx_t; |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 158 | |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 159 | static const u8x16 ghash_poly = { |
| 160 | 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 161 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2 |
| 162 | }; |
| 163 | |
| 164 | static const u8x16 ghash_poly2 = { |
| 165 | 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00, |
| 166 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2 |
| 167 | }; |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 168 | |
| 169 | static_always_inline void |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 170 | ghash_mul_first (ghash_ctx_t *gd, u8x16 a, u8x16 b) |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 171 | { |
| 172 | /* a1 * b1 */ |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 173 | gd->hi = gmul_hi_hi (a, b); |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 174 | /* a0 * b0 */ |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 175 | gd->lo = gmul_lo_lo (a, b); |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 176 | /* a0 * b1 ^ a1 * b0 */ |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 177 | gd->mid = gmul_hi_lo (a, b) ^ gmul_lo_hi (a, b); |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 178 | |
| 179 | /* set gd->pending to 0 so next invocation of ghash_mul_next(...) knows that |
| 180 | there is no pending data in tmp_lo and tmp_hi */ |
| 181 | gd->pending = 0; |
| 182 | } |
| 183 | |
| 184 | static_always_inline void |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 185 | ghash_mul_next (ghash_ctx_t *gd, u8x16 a, u8x16 b) |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 186 | { |
| 187 | /* a1 * b1 */ |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 188 | u8x16 hi = gmul_hi_hi (a, b); |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 189 | /* a0 * b0 */ |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 190 | u8x16 lo = gmul_lo_lo (a, b); |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 191 | |
| 192 | /* this branch will be optimized out by the compiler, and it allows us to |
| 193 | reduce number of XOR operations by using ternary logic */ |
| 194 | if (gd->pending) |
| 195 | { |
| 196 | /* there is peding data from previous invocation so we can XOR */ |
Damjan Marion | f75defa | 2020-02-13 18:14:06 +0100 | [diff] [blame] | 197 | gd->hi = u8x16_xor3 (gd->hi, gd->tmp_hi, hi); |
| 198 | gd->lo = u8x16_xor3 (gd->lo, gd->tmp_lo, lo); |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 199 | gd->pending = 0; |
| 200 | } |
| 201 | else |
| 202 | { |
| 203 | /* there is no peding data from previous invocation so we postpone XOR */ |
| 204 | gd->tmp_hi = hi; |
| 205 | gd->tmp_lo = lo; |
| 206 | gd->pending = 1; |
| 207 | } |
| 208 | |
| 209 | /* gd->mid ^= a0 * b1 ^ a1 * b0 */ |
Damjan Marion | f75defa | 2020-02-13 18:14:06 +0100 | [diff] [blame] | 210 | gd->mid = u8x16_xor3 (gd->mid, gmul_hi_lo (a, b), gmul_lo_hi (a, b)); |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 211 | } |
| 212 | |
| 213 | static_always_inline void |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 214 | ghash_reduce (ghash_ctx_t *gd) |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 215 | { |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 216 | u8x16 r; |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 217 | |
| 218 | /* Final combination: |
| 219 | gd->lo ^= gd->mid << 64 |
| 220 | gd->hi ^= gd->mid >> 64 */ |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 221 | u8x16 midl = u8x16_word_shift_left (gd->mid, 8); |
| 222 | u8x16 midr = u8x16_word_shift_right (gd->mid, 8); |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 223 | |
| 224 | if (gd->pending) |
| 225 | { |
Damjan Marion | f75defa | 2020-02-13 18:14:06 +0100 | [diff] [blame] | 226 | gd->lo = u8x16_xor3 (gd->lo, gd->tmp_lo, midl); |
| 227 | gd->hi = u8x16_xor3 (gd->hi, gd->tmp_hi, midr); |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 228 | } |
| 229 | else |
| 230 | { |
| 231 | gd->lo ^= midl; |
| 232 | gd->hi ^= midr; |
| 233 | } |
Damjan Marion | 8727acd | 2020-02-12 18:30:17 +0100 | [diff] [blame] | 234 | r = gmul_hi_lo (ghash_poly2, gd->lo); |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 235 | gd->lo ^= u8x16_word_shift_left (r, 8); |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 236 | } |
| 237 | |
| 238 | static_always_inline void |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 239 | ghash_reduce2 (ghash_ctx_t *gd) |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 240 | { |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 241 | gd->tmp_lo = gmul_lo_lo (ghash_poly2, gd->lo); |
Damjan Marion | 8727acd | 2020-02-12 18:30:17 +0100 | [diff] [blame] | 242 | gd->tmp_hi = gmul_lo_hi (ghash_poly2, gd->lo); |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 243 | } |
| 244 | |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 245 | static_always_inline u8x16 |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 246 | ghash_final (ghash_ctx_t *gd) |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 247 | { |
Damjan Marion | f75defa | 2020-02-13 18:14:06 +0100 | [diff] [blame] | 248 | return u8x16_xor3 (gd->hi, u8x16_word_shift_right (gd->tmp_lo, 4), |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 249 | u8x16_word_shift_left (gd->tmp_hi, 4)); |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 250 | } |
| 251 | |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 252 | static_always_inline u8x16 |
| 253 | ghash_mul (u8x16 a, u8x16 b) |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 254 | { |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 255 | ghash_ctx_t _gd, *gd = &_gd; |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 256 | ghash_mul_first (gd, a, b); |
| 257 | ghash_reduce (gd); |
| 258 | ghash_reduce2 (gd); |
| 259 | return ghash_final (gd); |
| 260 | } |
| 261 | |
Damjan Marion | adeaf16 | 2023-03-14 18:04:45 +0000 | [diff] [blame] | 262 | #if defined(__VPCLMULQDQ__) && defined(__AVX512F__) |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 263 | |
| 264 | static const u8x64 ghash4_poly2 = { |
| 265 | 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00, |
| 266 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, |
| 267 | 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00, |
| 268 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, |
| 269 | 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00, |
| 270 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, |
| 271 | 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00, |
| 272 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, |
| 273 | }; |
| 274 | |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 275 | static_always_inline u8x64 |
| 276 | gmul4_lo_lo (u8x64 a, u8x64 b) |
| 277 | { |
| 278 | return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x00); |
| 279 | } |
| 280 | |
| 281 | static_always_inline u8x64 |
| 282 | gmul4_hi_lo (u8x64 a, u8x64 b) |
| 283 | { |
| 284 | return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x01); |
| 285 | } |
| 286 | |
| 287 | static_always_inline u8x64 |
| 288 | gmul4_lo_hi (u8x64 a, u8x64 b) |
| 289 | { |
| 290 | return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x10); |
| 291 | } |
| 292 | |
| 293 | static_always_inline u8x64 |
| 294 | gmul4_hi_hi (u8x64 a, u8x64 b) |
| 295 | { |
| 296 | return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x11); |
| 297 | } |
| 298 | |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 299 | static_always_inline void |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 300 | ghash4_mul_first (ghash_ctx_t *gd, u8x64 a, u8x64 b) |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 301 | { |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 302 | gd->hi4 = gmul4_hi_hi (a, b); |
| 303 | gd->lo4 = gmul4_lo_lo (a, b); |
| 304 | gd->mid4 = gmul4_hi_lo (a, b) ^ gmul4_lo_hi (a, b); |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 305 | gd->pending = 0; |
| 306 | } |
| 307 | |
| 308 | static_always_inline void |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 309 | ghash4_mul_next (ghash_ctx_t *gd, u8x64 a, u8x64 b) |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 310 | { |
| 311 | u8x64 hi = gmul4_hi_hi (a, b); |
| 312 | u8x64 lo = gmul4_lo_lo (a, b); |
| 313 | |
| 314 | if (gd->pending) |
| 315 | { |
| 316 | /* there is peding data from previous invocation so we can XOR */ |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 317 | gd->hi4 = u8x64_xor3 (gd->hi4, gd->tmp_hi4, hi); |
| 318 | gd->lo4 = u8x64_xor3 (gd->lo4, gd->tmp_lo4, lo); |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 319 | gd->pending = 0; |
| 320 | } |
| 321 | else |
| 322 | { |
| 323 | /* there is no peding data from previous invocation so we postpone XOR */ |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 324 | gd->tmp_hi4 = hi; |
| 325 | gd->tmp_lo4 = lo; |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 326 | gd->pending = 1; |
| 327 | } |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 328 | gd->mid4 = u8x64_xor3 (gd->mid4, gmul4_hi_lo (a, b), gmul4_lo_hi (a, b)); |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 329 | } |
| 330 | |
| 331 | static_always_inline void |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 332 | ghash4_reduce (ghash_ctx_t *gd) |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 333 | { |
| 334 | u8x64 r; |
| 335 | |
| 336 | /* Final combination: |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 337 | gd->lo4 ^= gd->mid4 << 64 |
| 338 | gd->hi4 ^= gd->mid4 >> 64 */ |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 339 | |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 340 | u8x64 midl = u8x64_word_shift_left (gd->mid4, 8); |
| 341 | u8x64 midr = u8x64_word_shift_right (gd->mid4, 8); |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 342 | |
| 343 | if (gd->pending) |
| 344 | { |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 345 | gd->lo4 = u8x64_xor3 (gd->lo4, gd->tmp_lo4, midl); |
| 346 | gd->hi4 = u8x64_xor3 (gd->hi4, gd->tmp_hi4, midr); |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 347 | } |
| 348 | else |
| 349 | { |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 350 | gd->lo4 ^= midl; |
| 351 | gd->hi4 ^= midr; |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 352 | } |
| 353 | |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 354 | r = gmul4_hi_lo (ghash4_poly2, gd->lo4); |
| 355 | gd->lo4 ^= u8x64_word_shift_left (r, 8); |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 356 | } |
| 357 | |
| 358 | static_always_inline void |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 359 | ghash4_reduce2 (ghash_ctx_t *gd) |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 360 | { |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 361 | gd->tmp_lo4 = gmul4_lo_lo (ghash4_poly2, gd->lo4); |
| 362 | gd->tmp_hi4 = gmul4_lo_hi (ghash4_poly2, gd->lo4); |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 363 | } |
| 364 | |
| 365 | static_always_inline u8x16 |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 366 | ghash4_final (ghash_ctx_t *gd) |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 367 | { |
| 368 | u8x64 r; |
| 369 | u8x32 t; |
| 370 | |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 371 | r = u8x64_xor3 (gd->hi4, u8x64_word_shift_right (gd->tmp_lo4, 4), |
| 372 | u8x64_word_shift_left (gd->tmp_hi4, 4)); |
Damjan Marion | 627fb6a | 2020-02-16 13:07:13 +0100 | [diff] [blame] | 373 | |
| 374 | /* horizontal XOR of 4 128-bit lanes */ |
| 375 | t = u8x64_extract_lo (r) ^ u8x64_extract_hi (r); |
| 376 | return u8x32_extract_hi (t) ^ u8x32_extract_lo (t); |
| 377 | } |
| 378 | #endif |
| 379 | |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 380 | #if defined(__VPCLMULQDQ__) |
| 381 | |
| 382 | static const u8x32 ghash2_poly2 = { |
| 383 | 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
| 384 | 0x00, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, |
| 385 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, |
| 386 | }; |
| 387 | |
| 388 | static_always_inline u8x32 |
| 389 | gmul2_lo_lo (u8x32 a, u8x32 b) |
| 390 | { |
| 391 | return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x00); |
| 392 | } |
| 393 | |
| 394 | static_always_inline u8x32 |
| 395 | gmul2_hi_lo (u8x32 a, u8x32 b) |
| 396 | { |
| 397 | return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x01); |
| 398 | } |
| 399 | |
| 400 | static_always_inline u8x32 |
| 401 | gmul2_lo_hi (u8x32 a, u8x32 b) |
| 402 | { |
| 403 | return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x10); |
| 404 | } |
| 405 | |
| 406 | static_always_inline u8x32 |
| 407 | gmul2_hi_hi (u8x32 a, u8x32 b) |
| 408 | { |
| 409 | return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x11); |
| 410 | } |
| 411 | |
| 412 | static_always_inline void |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 413 | ghash2_mul_first (ghash_ctx_t *gd, u8x32 a, u8x32 b) |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 414 | { |
| 415 | gd->hi2 = gmul2_hi_hi (a, b); |
| 416 | gd->lo2 = gmul2_lo_lo (a, b); |
| 417 | gd->mid2 = gmul2_hi_lo (a, b) ^ gmul2_lo_hi (a, b); |
| 418 | gd->pending = 0; |
| 419 | } |
| 420 | |
| 421 | static_always_inline void |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 422 | ghash2_mul_next (ghash_ctx_t *gd, u8x32 a, u8x32 b) |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 423 | { |
| 424 | u8x32 hi = gmul2_hi_hi (a, b); |
| 425 | u8x32 lo = gmul2_lo_lo (a, b); |
| 426 | |
| 427 | if (gd->pending) |
| 428 | { |
| 429 | /* there is peding data from previous invocation so we can XOR */ |
| 430 | gd->hi2 = u8x32_xor3 (gd->hi2, gd->tmp_hi2, hi); |
| 431 | gd->lo2 = u8x32_xor3 (gd->lo2, gd->tmp_lo2, lo); |
| 432 | gd->pending = 0; |
| 433 | } |
| 434 | else |
| 435 | { |
| 436 | /* there is no peding data from previous invocation so we postpone XOR */ |
| 437 | gd->tmp_hi2 = hi; |
| 438 | gd->tmp_lo2 = lo; |
| 439 | gd->pending = 1; |
| 440 | } |
| 441 | gd->mid2 = u8x32_xor3 (gd->mid2, gmul2_hi_lo (a, b), gmul2_lo_hi (a, b)); |
| 442 | } |
| 443 | |
| 444 | static_always_inline void |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 445 | ghash2_reduce (ghash_ctx_t *gd) |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 446 | { |
| 447 | u8x32 r; |
| 448 | |
| 449 | /* Final combination: |
| 450 | gd->lo2 ^= gd->mid2 << 64 |
| 451 | gd->hi2 ^= gd->mid2 >> 64 */ |
| 452 | |
| 453 | u8x32 midl = u8x32_word_shift_left (gd->mid2, 8); |
| 454 | u8x32 midr = u8x32_word_shift_right (gd->mid2, 8); |
| 455 | |
| 456 | if (gd->pending) |
| 457 | { |
| 458 | gd->lo2 = u8x32_xor3 (gd->lo2, gd->tmp_lo2, midl); |
| 459 | gd->hi2 = u8x32_xor3 (gd->hi2, gd->tmp_hi2, midr); |
| 460 | } |
| 461 | else |
| 462 | { |
| 463 | gd->lo2 ^= midl; |
| 464 | gd->hi2 ^= midr; |
| 465 | } |
| 466 | |
| 467 | r = gmul2_hi_lo (ghash2_poly2, gd->lo2); |
| 468 | gd->lo2 ^= u8x32_word_shift_left (r, 8); |
| 469 | } |
| 470 | |
| 471 | static_always_inline void |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 472 | ghash2_reduce2 (ghash_ctx_t *gd) |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 473 | { |
| 474 | gd->tmp_lo2 = gmul2_lo_lo (ghash2_poly2, gd->lo2); |
| 475 | gd->tmp_hi2 = gmul2_lo_hi (ghash2_poly2, gd->lo2); |
| 476 | } |
| 477 | |
| 478 | static_always_inline u8x16 |
Damjan Marion | 4172448 | 2023-03-23 13:44:01 +0000 | [diff] [blame] | 479 | ghash2_final (ghash_ctx_t *gd) |
Damjan Marion | b47376f | 2023-03-15 11:42:06 +0000 | [diff] [blame] | 480 | { |
| 481 | u8x32 r; |
| 482 | |
| 483 | r = u8x32_xor3 (gd->hi2, u8x32_word_shift_right (gd->tmp_lo2, 4), |
| 484 | u8x32_word_shift_left (gd->tmp_hi2, 4)); |
| 485 | |
| 486 | /* horizontal XOR of 2 128-bit lanes */ |
| 487 | return u8x32_extract_hi (r) ^ u8x32_extract_lo (r); |
| 488 | } |
| 489 | #endif |
| 490 | |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 491 | static_always_inline void |
Damjan Marion | a2e56ee | 2020-02-24 14:26:08 +0100 | [diff] [blame] | 492 | ghash_precompute (u8x16 H, u8x16 * Hi, int n) |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 493 | { |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 494 | u8x16 r8; |
| 495 | u32x4 r32; |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 496 | /* calcullate H<<1 mod poly from the hash key */ |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 497 | r8 = (u8x16) ((u64x2) H >> 63); |
| 498 | H = (u8x16) ((u64x2) H << 1); |
| 499 | H |= u8x16_word_shift_left (r8, 8); |
| 500 | r32 = (u32x4) u8x16_word_shift_right (r8, 8); |
Damjan Marion | 11da575 | 2020-02-12 20:12:55 +0100 | [diff] [blame] | 501 | #ifdef __SSE2__ |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 502 | r32 = u32x4_shuffle (r32, 0, 1, 2, 0); |
Damjan Marion | 11da575 | 2020-02-12 20:12:55 +0100 | [diff] [blame] | 503 | #else |
| 504 | r32[3] = r32[0]; |
| 505 | #endif |
Damjan Marion | 415b4b0 | 2020-02-11 17:04:38 +0100 | [diff] [blame] | 506 | r32 = r32 == (u32x4) {1, 0, 0, 1}; |
Damjan Marion | a2e56ee | 2020-02-24 14:26:08 +0100 | [diff] [blame] | 507 | Hi[n - 1] = H = H ^ ((u8x16) r32 & ghash_poly); |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 508 | |
| 509 | /* calculate H^(i + 1) */ |
Damjan Marion | a2e56ee | 2020-02-24 14:26:08 +0100 | [diff] [blame] | 510 | for (int i = n - 2; i >= 0; i--) |
| 511 | Hi[i] = ghash_mul (H, Hi[i + 1]); |
Damjan Marion | dd2423e | 2019-05-22 16:30:01 +0200 | [diff] [blame] | 512 | } |
| 513 | |
| 514 | #endif /* __ghash_h__ */ |
| 515 | |