blob: 9e80e3b03187d41904be4598481b0f339b855d95 [file] [log] [blame]
Damjan Mariondeb8af62019-04-02 19:06:50 +02001/*
2 *------------------------------------------------------------------
Damjan Marion776644e2020-01-31 10:24:07 +01003 * Copyright (c) 2020 Cisco and/or its affiliates.
Damjan Mariondeb8af62019-04-02 19:06:50 +02004 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *------------------------------------------------------------------
16 */
17
Damjan Marion9caef2a2024-01-08 19:05:40 +000018#ifndef __aes_h__
19#define __aes_h__
Damjan Mariondeb8af62019-04-02 19:06:50 +020020
21typedef enum
22{
Damjan Marion7d08e392020-01-28 09:55:25 +010023 AES_KEY_128 = 0,
24 AES_KEY_192 = 1,
25 AES_KEY_256 = 2,
26} aes_key_size_t;
Damjan Mariondeb8af62019-04-02 19:06:50 +020027
Damjan Marion7d08e392020-01-28 09:55:25 +010028#define AES_KEY_ROUNDS(x) (10 + x * 2)
29#define AES_KEY_BYTES(x) (16 + x * 8)
Damjan Mariondeb8af62019-04-02 19:06:50 +020030
Damjan Marion93975e62020-01-30 15:46:23 +010031static_always_inline u8x16
32aes_block_load (u8 * p)
33{
Damjan Marion622b5ce2020-02-12 10:59:14 +010034 return *(u8x16u *) p;
Damjan Marion93975e62020-01-30 15:46:23 +010035}
36
37static_always_inline u8x16
Damjan Marion9caef2a2024-01-08 19:05:40 +000038aes_enc_round_x1 (u8x16 a, u8x16 k)
Damjan Marion93975e62020-01-30 15:46:23 +010039{
Damjan Marion622b5ce2020-02-12 10:59:14 +010040#if defined (__AES__)
Damjan Marion93975e62020-01-30 15:46:23 +010041 return (u8x16) _mm_aesenc_si128 ((__m128i) a, (__m128i) k);
Damjan Marion4ba16a42020-04-28 13:29:37 +020042#elif defined (__ARM_FEATURE_CRYPTO)
Damjan Marion622b5ce2020-02-12 10:59:14 +010043 return vaesmcq_u8 (vaeseq_u8 (a, u8x16_splat (0))) ^ k;
44#endif
Damjan Marion93975e62020-01-30 15:46:23 +010045}
46
Damjan Marionadeaf162023-03-14 18:04:45 +000047#if defined(__VAES__) && defined(__AVX512F__)
Damjan Marione84e9d72020-02-13 13:11:02 +010048static_always_inline u8x64
49aes_enc_round_x4 (u8x64 a, u8x64 k)
50{
51 return (u8x64) _mm512_aesenc_epi128 ((__m512i) a, (__m512i) k);
52}
53
54static_always_inline u8x64
55aes_enc_last_round_x4 (u8x64 a, u8x64 k)
56{
57 return (u8x64) _mm512_aesenclast_epi128 ((__m512i) a, (__m512i) k);
58}
59
60static_always_inline u8x64
61aes_dec_round_x4 (u8x64 a, u8x64 k)
62{
63 return (u8x64) _mm512_aesdec_epi128 ((__m512i) a, (__m512i) k);
64}
65
66static_always_inline u8x64
67aes_dec_last_round_x4 (u8x64 a, u8x64 k)
68{
69 return (u8x64) _mm512_aesdeclast_epi128 ((__m512i) a, (__m512i) k);
70}
71#endif
72
Damjan Marionadeaf162023-03-14 18:04:45 +000073#ifdef __VAES__
74static_always_inline u8x32
75aes_enc_round_x2 (u8x32 a, u8x32 k)
76{
77 return (u8x32) _mm256_aesenc_epi128 ((__m256i) a, (__m256i) k);
78}
79
80static_always_inline u8x32
81aes_enc_last_round_x2 (u8x32 a, u8x32 k)
82{
83 return (u8x32) _mm256_aesenclast_epi128 ((__m256i) a, (__m256i) k);
84}
85
86static_always_inline u8x32
87aes_dec_round_x2 (u8x32 a, u8x32 k)
88{
89 return (u8x32) _mm256_aesdec_epi128 ((__m256i) a, (__m256i) k);
90}
91
92static_always_inline u8x32
93aes_dec_last_round_x2 (u8x32 a, u8x32 k)
94{
95 return (u8x32) _mm256_aesdeclast_epi128 ((__m256i) a, (__m256i) k);
96}
97#endif
98
Damjan Marion93975e62020-01-30 15:46:23 +010099static_always_inline u8x16
Damjan Marion9caef2a2024-01-08 19:05:40 +0000100aes_enc_last_round_x1 (u8x16 a, u8x16 k)
Damjan Marion93975e62020-01-30 15:46:23 +0100101{
Damjan Marion622b5ce2020-02-12 10:59:14 +0100102#if defined (__AES__)
Damjan Marion93975e62020-01-30 15:46:23 +0100103 return (u8x16) _mm_aesenclast_si128 ((__m128i) a, (__m128i) k);
Damjan Marion4ba16a42020-04-28 13:29:37 +0200104#elif defined (__ARM_FEATURE_CRYPTO)
Damjan Marion622b5ce2020-02-12 10:59:14 +0100105 return vaeseq_u8 (a, u8x16_splat (0)) ^ k;
106#endif
Damjan Marion93975e62020-01-30 15:46:23 +0100107}
108
Damjan Marion622b5ce2020-02-12 10:59:14 +0100109#ifdef __x86_64__
110
Damjan Marion93975e62020-01-30 15:46:23 +0100111static_always_inline u8x16
Damjan Marion9caef2a2024-01-08 19:05:40 +0000112aes_dec_round_x1 (u8x16 a, u8x16 k)
Damjan Marion93975e62020-01-30 15:46:23 +0100113{
114 return (u8x16) _mm_aesdec_si128 ((__m128i) a, (__m128i) k);
115}
116
117static_always_inline u8x16
Damjan Marion9caef2a2024-01-08 19:05:40 +0000118aes_dec_last_round_x1 (u8x16 a, u8x16 k)
Damjan Marion93975e62020-01-30 15:46:23 +0100119{
120 return (u8x16) _mm_aesdeclast_si128 ((__m128i) a, (__m128i) k);
121}
Damjan Marion622b5ce2020-02-12 10:59:14 +0100122#endif
Damjan Marion93975e62020-01-30 15:46:23 +0100123
124static_always_inline void
125aes_block_store (u8 * p, u8x16 r)
126{
Damjan Marion622b5ce2020-02-12 10:59:14 +0100127 *(u8x16u *) p = r;
Damjan Marion93975e62020-01-30 15:46:23 +0100128}
129
130static_always_inline u8x16
Damjan Marion415b4b02020-02-11 17:04:38 +0100131aes_encrypt_block (u8x16 block, const u8x16 * round_keys, aes_key_size_t ks)
132{
Damjan Marion622b5ce2020-02-12 10:59:14 +0100133 int rounds = AES_KEY_ROUNDS (ks);
Damjan Marion415b4b02020-02-11 17:04:38 +0100134 block ^= round_keys[0];
Damjan Marion622b5ce2020-02-12 10:59:14 +0100135 for (int i = 1; i < rounds; i += 1)
Damjan Marion9caef2a2024-01-08 19:05:40 +0000136 block = aes_enc_round_x1 (block, round_keys[i]);
137 return aes_enc_last_round_x1 (block, round_keys[rounds]);
Damjan Marion415b4b02020-02-11 17:04:38 +0100138}
139
140static_always_inline u8x16
Damjan Marion93975e62020-01-30 15:46:23 +0100141aes_inv_mix_column (u8x16 a)
142{
Damjan Marion622b5ce2020-02-12 10:59:14 +0100143#if defined (__AES__)
Damjan Marion93975e62020-01-30 15:46:23 +0100144 return (u8x16) _mm_aesimc_si128 ((__m128i) a);
Damjan Marion4ba16a42020-04-28 13:29:37 +0200145#elif defined (__ARM_FEATURE_CRYPTO)
Damjan Marion622b5ce2020-02-12 10:59:14 +0100146 return vaesimcq_u8 (a);
147#endif
Damjan Marion93975e62020-01-30 15:46:23 +0100148}
Damjan Mariondeb8af62019-04-02 19:06:50 +0200149
Damjan Marion622b5ce2020-02-12 10:59:14 +0100150#ifdef __x86_64__
Damjan Marion415b4b02020-02-11 17:04:38 +0100151#define aes_keygen_assist(a, b) \
152 (u8x16) _mm_aeskeygenassist_si128((__m128i) a, b)
153
Damjan Mariondeb8af62019-04-02 19:06:50 +0200154/* AES-NI based AES key expansion based on code samples from
155 Intel(r) Advanced Encryption Standard (AES) New Instructions White Paper
156 (323641-001) */
157
Damjan Marion93975e62020-01-30 15:46:23 +0100158static_always_inline void
Damjan Marion415b4b02020-02-11 17:04:38 +0100159aes128_key_assist (u8x16 * rk, u8x16 r)
Damjan Mariondeb8af62019-04-02 19:06:50 +0200160{
Damjan Marion415b4b02020-02-11 17:04:38 +0100161 u8x16 t = rk[-1];
162 t ^= u8x16_word_shift_left (t, 4);
163 t ^= u8x16_word_shift_left (t, 4);
164 t ^= u8x16_word_shift_left (t, 4);
165 rk[0] = t ^ (u8x16) u32x4_shuffle ((u32x4) r, 3, 3, 3, 3);
Damjan Mariondeb8af62019-04-02 19:06:50 +0200166}
167
168static_always_inline void
Damjan Marion21feecf2021-10-29 22:08:39 +0200169aes128_key_expand (u8x16 *rk, u8x16u const *k)
Damjan Mariondeb8af62019-04-02 19:06:50 +0200170{
Damjan Marion415b4b02020-02-11 17:04:38 +0100171 rk[0] = k[0];
172 aes128_key_assist (rk + 1, aes_keygen_assist (rk[0], 0x01));
173 aes128_key_assist (rk + 2, aes_keygen_assist (rk[1], 0x02));
174 aes128_key_assist (rk + 3, aes_keygen_assist (rk[2], 0x04));
175 aes128_key_assist (rk + 4, aes_keygen_assist (rk[3], 0x08));
176 aes128_key_assist (rk + 5, aes_keygen_assist (rk[4], 0x10));
177 aes128_key_assist (rk + 6, aes_keygen_assist (rk[5], 0x20));
178 aes128_key_assist (rk + 7, aes_keygen_assist (rk[6], 0x40));
179 aes128_key_assist (rk + 8, aes_keygen_assist (rk[7], 0x80));
180 aes128_key_assist (rk + 9, aes_keygen_assist (rk[8], 0x1b));
181 aes128_key_assist (rk + 10, aes_keygen_assist (rk[9], 0x36));
Damjan Mariondeb8af62019-04-02 19:06:50 +0200182}
183
184static_always_inline void
Damjan Marion415b4b02020-02-11 17:04:38 +0100185aes192_key_assist (u8x16 * r1, u8x16 * r2, u8x16 key_assist)
Damjan Mariondeb8af62019-04-02 19:06:50 +0200186{
Damjan Marion415b4b02020-02-11 17:04:38 +0100187 u8x16 t;
188 r1[0] ^= t = u8x16_word_shift_left (r1[0], 4);
189 r1[0] ^= t = u8x16_word_shift_left (t, 4);
190 r1[0] ^= u8x16_word_shift_left (t, 4);
191 r1[0] ^= (u8x16) _mm_shuffle_epi32 ((__m128i) key_assist, 0x55);
192 r2[0] ^= u8x16_word_shift_left (r2[0], 4);
193 r2[0] ^= (u8x16) _mm_shuffle_epi32 ((__m128i) r1[0], 0xff);
Damjan Mariondeb8af62019-04-02 19:06:50 +0200194}
195
196static_always_inline void
Damjan Marion415b4b02020-02-11 17:04:38 +0100197aes192_key_expand (u8x16 * rk, u8x16u const *k)
Damjan Mariondeb8af62019-04-02 19:06:50 +0200198{
Damjan Marion415b4b02020-02-11 17:04:38 +0100199 u8x16 r1, r2;
Damjan Mariondeb8af62019-04-02 19:06:50 +0200200
Damjan Marion415b4b02020-02-11 17:04:38 +0100201 rk[0] = r1 = k[0];
Damjan Marion415b4b02020-02-11 17:04:38 +0100202 rk[1] = r2 = (u8x16) (u64x2) { *(u64 *) (k + 1), 0 };
Damjan Mariondeb8af62019-04-02 19:06:50 +0200203
Damjan Marion415b4b02020-02-11 17:04:38 +0100204 aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x1));
205 rk[1] = (u8x16) _mm_shuffle_pd ((__m128d) rk[1], (__m128d) r1, 0);
206 rk[2] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
Damjan Marion93975e62020-01-30 15:46:23 +0100207
Damjan Marion415b4b02020-02-11 17:04:38 +0100208 aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x2));
209 rk[3] = r1;
210 rk[4] = r2;
Damjan Mariondeb8af62019-04-02 19:06:50 +0200211
Damjan Marion415b4b02020-02-11 17:04:38 +0100212 aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x4));
213 rk[4] = (u8x16) _mm_shuffle_pd ((__m128d) rk[4], (__m128d) r1, 0);
214 rk[5] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
Damjan Mariondeb8af62019-04-02 19:06:50 +0200215
Damjan Marion415b4b02020-02-11 17:04:38 +0100216 aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x8));
217 rk[6] = r1;
218 rk[7] = r2;
Damjan Marion93975e62020-01-30 15:46:23 +0100219
Damjan Marion415b4b02020-02-11 17:04:38 +0100220 aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x10));
221 rk[7] = (u8x16) _mm_shuffle_pd ((__m128d) rk[7], (__m128d) r1, 0);
222 rk[8] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
Damjan Marion93975e62020-01-30 15:46:23 +0100223
Damjan Marion415b4b02020-02-11 17:04:38 +0100224 aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x20));
225 rk[9] = r1;
226 rk[10] = r2;
Damjan Mariondeb8af62019-04-02 19:06:50 +0200227
Damjan Marion415b4b02020-02-11 17:04:38 +0100228 aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x40));
229 rk[10] = (u8x16) _mm_shuffle_pd ((__m128d) rk[10], (__m128d) r1, 0);
230 rk[11] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
Damjan Marion93975e62020-01-30 15:46:23 +0100231
Damjan Marion415b4b02020-02-11 17:04:38 +0100232 aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x80));
233 rk[12] = r1;
Damjan Mariondeb8af62019-04-02 19:06:50 +0200234}
235
236static_always_inline void
Damjan Marion415b4b02020-02-11 17:04:38 +0100237aes256_key_assist (u8x16 * rk, int i, u8x16 key_assist)
Damjan Mariondeb8af62019-04-02 19:06:50 +0200238{
Damjan Marion415b4b02020-02-11 17:04:38 +0100239 u8x16 r, t;
240 rk += i;
241 r = rk[-2];
242 r ^= t = u8x16_word_shift_left (r, 4);
243 r ^= t = u8x16_word_shift_left (t, 4);
244 r ^= u8x16_word_shift_left (t, 4);
245 r ^= (u8x16) u32x4_shuffle ((u32x4) key_assist, 3, 3, 3, 3);
246 rk[0] = r;
Damjan Marion93975e62020-01-30 15:46:23 +0100247
248 if (i >= 14)
249 return;
250
Damjan Marion415b4b02020-02-11 17:04:38 +0100251 key_assist = aes_keygen_assist (rk[0], 0x0);
252 r = rk[-1];
253 r ^= t = u8x16_word_shift_left (r, 4);
254 r ^= t = u8x16_word_shift_left (t, 4);
255 r ^= u8x16_word_shift_left (t, 4);
256 r ^= (u8x16) u32x4_shuffle ((u32x4) key_assist, 2, 2, 2, 2);
257 rk[1] = r;
Damjan Mariondeb8af62019-04-02 19:06:50 +0200258}
259
260static_always_inline void
Damjan Marion415b4b02020-02-11 17:04:38 +0100261aes256_key_expand (u8x16 * rk, u8x16u const *k)
Damjan Mariondeb8af62019-04-02 19:06:50 +0200262{
Damjan Marion415b4b02020-02-11 17:04:38 +0100263 rk[0] = k[0];
264 rk[1] = k[1];
265 aes256_key_assist (rk, 2, aes_keygen_assist (rk[1], 0x01));
266 aes256_key_assist (rk, 4, aes_keygen_assist (rk[3], 0x02));
267 aes256_key_assist (rk, 6, aes_keygen_assist (rk[5], 0x04));
268 aes256_key_assist (rk, 8, aes_keygen_assist (rk[7], 0x08));
269 aes256_key_assist (rk, 10, aes_keygen_assist (rk[9], 0x10));
270 aes256_key_assist (rk, 12, aes_keygen_assist (rk[11], 0x20));
271 aes256_key_assist (rk, 14, aes_keygen_assist (rk[13], 0x40));
Damjan Mariondeb8af62019-04-02 19:06:50 +0200272}
Damjan Marion776644e2020-01-31 10:24:07 +0100273#endif
274
275#ifdef __aarch64__
276
Damjan Marion776644e2020-01-31 10:24:07 +0100277static const u8x16 aese_prep_mask1 =
278 { 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12 };
279static const u8x16 aese_prep_mask2 =
280 { 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15 };
281
Damjan Marion4fe44af2020-02-12 18:24:24 +0100282static_always_inline void
Damjan Marion776644e2020-01-31 10:24:07 +0100283aes128_key_expand_round_neon (u8x16 * rk, u32 rcon)
284{
285 u8x16 r, t, last_round = rk[-1], z = { };
286 r = vqtbl1q_u8 (last_round, aese_prep_mask1);
287 r = vaeseq_u8 (r, z);
288 r ^= (u8x16) vdupq_n_u32 (rcon);
289 r ^= last_round;
290 r ^= t = vextq_u8 (z, last_round, 12);
291 r ^= t = vextq_u8 (z, t, 12);
292 r ^= vextq_u8 (z, t, 12);
293 rk[0] = r;
294}
295
Damjan Marion4fe44af2020-02-12 18:24:24 +0100296static_always_inline void
Jieqiang Wang83b982b2021-11-29 14:25:03 +0000297aes128_key_expand (u8x16 *rk, u8x16u const *k)
Damjan Marion776644e2020-01-31 10:24:07 +0100298{
Damjan Marion415b4b02020-02-11 17:04:38 +0100299 rk[0] = k[0];
Damjan Marion776644e2020-01-31 10:24:07 +0100300 aes128_key_expand_round_neon (rk + 1, 0x01);
301 aes128_key_expand_round_neon (rk + 2, 0x02);
302 aes128_key_expand_round_neon (rk + 3, 0x04);
303 aes128_key_expand_round_neon (rk + 4, 0x08);
304 aes128_key_expand_round_neon (rk + 5, 0x10);
305 aes128_key_expand_round_neon (rk + 6, 0x20);
306 aes128_key_expand_round_neon (rk + 7, 0x40);
307 aes128_key_expand_round_neon (rk + 8, 0x80);
308 aes128_key_expand_round_neon (rk + 9, 0x1b);
309 aes128_key_expand_round_neon (rk + 10, 0x36);
310}
311
Damjan Marion4fe44af2020-02-12 18:24:24 +0100312static_always_inline void
Damjan Marion776644e2020-01-31 10:24:07 +0100313aes192_key_expand_round_neon (u8x8 * rk, u32 rcon)
314{
315 u8x8 r, last_round = rk[-1], z = { };
316 u8x16 r2, z2 = { };
317
318 r2 = (u8x16) vdupq_lane_u64 ((uint64x1_t) last_round, 0);
319 r2 = vqtbl1q_u8 (r2, aese_prep_mask1);
320 r2 = vaeseq_u8 (r2, z2);
321 r2 ^= (u8x16) vdupq_n_u32 (rcon);
322
323 r = (u8x8) vdup_laneq_u64 ((u64x2) r2, 0);
324 r ^= rk[-3];
325 r ^= vext_u8 (z, rk[-3], 4);
326 rk[0] = r;
327
328 r = rk[-2] ^ vext_u8 (r, z, 4);
329 r ^= vext_u8 (z, r, 4);
330 rk[1] = r;
331
332 if (rcon == 0x80)
333 return;
334
335 r = rk[-1] ^ vext_u8 (r, z, 4);
336 r ^= vext_u8 (z, r, 4);
337 rk[2] = r;
338}
339
Damjan Marion4fe44af2020-02-12 18:24:24 +0100340static_always_inline void
Damjan Marion415b4b02020-02-11 17:04:38 +0100341aes192_key_expand (u8x16 * ek, const u8x16u * k)
Damjan Marion776644e2020-01-31 10:24:07 +0100342{
343 u8x8 *rk = (u8x8 *) ek;
Damjan Marion415b4b02020-02-11 17:04:38 +0100344 ek[0] = k[0];
345 rk[2] = *(u8x8u *) (k + 1);
Damjan Marion776644e2020-01-31 10:24:07 +0100346 aes192_key_expand_round_neon (rk + 3, 0x01);
347 aes192_key_expand_round_neon (rk + 6, 0x02);
348 aes192_key_expand_round_neon (rk + 9, 0x04);
349 aes192_key_expand_round_neon (rk + 12, 0x08);
350 aes192_key_expand_round_neon (rk + 15, 0x10);
351 aes192_key_expand_round_neon (rk + 18, 0x20);
352 aes192_key_expand_round_neon (rk + 21, 0x40);
353 aes192_key_expand_round_neon (rk + 24, 0x80);
354}
355
356
Damjan Marion4fe44af2020-02-12 18:24:24 +0100357static_always_inline void
Damjan Marion776644e2020-01-31 10:24:07 +0100358aes256_key_expand_round_neon (u8x16 * rk, u32 rcon)
359{
360 u8x16 r, t, z = { };
361
362 r = vqtbl1q_u8 (rk[-1], rcon ? aese_prep_mask1 : aese_prep_mask2);
363 r = vaeseq_u8 (r, z);
364 if (rcon)
365 r ^= (u8x16) vdupq_n_u32 (rcon);
366 r ^= rk[-2];
367 r ^= t = vextq_u8 (z, rk[-2], 12);
368 r ^= t = vextq_u8 (z, t, 12);
369 r ^= vextq_u8 (z, t, 12);
370 rk[0] = r;
371}
372
Damjan Marion4fe44af2020-02-12 18:24:24 +0100373static_always_inline void
Jieqiang Wang83b982b2021-11-29 14:25:03 +0000374aes256_key_expand (u8x16 *rk, u8x16u const *k)
Damjan Marion776644e2020-01-31 10:24:07 +0100375{
Damjan Marion415b4b02020-02-11 17:04:38 +0100376 rk[0] = k[0];
377 rk[1] = k[1];
Damjan Marion776644e2020-01-31 10:24:07 +0100378 aes256_key_expand_round_neon (rk + 2, 0x01);
379 aes256_key_expand_round_neon (rk + 3, 0);
380 aes256_key_expand_round_neon (rk + 4, 0x02);
381 aes256_key_expand_round_neon (rk + 5, 0);
382 aes256_key_expand_round_neon (rk + 6, 0x04);
383 aes256_key_expand_round_neon (rk + 7, 0);
384 aes256_key_expand_round_neon (rk + 8, 0x08);
385 aes256_key_expand_round_neon (rk + 9, 0);
386 aes256_key_expand_round_neon (rk + 10, 0x10);
387 aes256_key_expand_round_neon (rk + 11, 0);
388 aes256_key_expand_round_neon (rk + 12, 0x20);
389 aes256_key_expand_round_neon (rk + 13, 0);
390 aes256_key_expand_round_neon (rk + 14, 0x40);
391}
392
393#endif
Damjan Mariondeb8af62019-04-02 19:06:50 +0200394
395static_always_inline void
Damjan Marion415b4b02020-02-11 17:04:38 +0100396aes_key_expand (u8x16 * key_schedule, u8 const *key, aes_key_size_t ks)
Damjan Mariondeb8af62019-04-02 19:06:50 +0200397{
398 switch (ks)
399 {
Damjan Marion7d08e392020-01-28 09:55:25 +0100400 case AES_KEY_128:
Damjan Marion415b4b02020-02-11 17:04:38 +0100401 aes128_key_expand (key_schedule, (u8x16u const *) key);
Damjan Mariondeb8af62019-04-02 19:06:50 +0200402 break;
Damjan Marion7d08e392020-01-28 09:55:25 +0100403 case AES_KEY_192:
Damjan Marion415b4b02020-02-11 17:04:38 +0100404 aes192_key_expand (key_schedule, (u8x16u const *) key);
Damjan Mariondeb8af62019-04-02 19:06:50 +0200405 break;
Damjan Marion7d08e392020-01-28 09:55:25 +0100406 case AES_KEY_256:
Damjan Marion415b4b02020-02-11 17:04:38 +0100407 aes256_key_expand (key_schedule, (u8x16u const *) key);
Damjan Mariondeb8af62019-04-02 19:06:50 +0200408 break;
409 }
410}
411
Damjan Mariondeb8af62019-04-02 19:06:50 +0200412static_always_inline void
Damjan Marion93975e62020-01-30 15:46:23 +0100413aes_key_enc_to_dec (u8x16 * ke, u8x16 * kd, aes_key_size_t ks)
Damjan Mariondeb8af62019-04-02 19:06:50 +0200414{
Damjan Marion7d08e392020-01-28 09:55:25 +0100415 int rounds = AES_KEY_ROUNDS (ks);
Damjan Mariondeb8af62019-04-02 19:06:50 +0200416
Damjan Marion78b58f62020-01-29 10:31:26 +0100417 kd[rounds] = ke[0];
418 kd[0] = ke[rounds];
Damjan Mariondeb8af62019-04-02 19:06:50 +0200419
420 for (int i = 1; i < (rounds / 2); i++)
421 {
Damjan Marion93975e62020-01-30 15:46:23 +0100422 kd[rounds - i] = aes_inv_mix_column (ke[i]);
423 kd[i] = aes_inv_mix_column (ke[rounds - i]);
Damjan Mariondeb8af62019-04-02 19:06:50 +0200424 }
425
Damjan Marion93975e62020-01-30 15:46:23 +0100426 kd[rounds / 2] = aes_inv_mix_column (ke[rounds / 2]);
Damjan Mariondeb8af62019-04-02 19:06:50 +0200427}
Damjan Marion9caef2a2024-01-08 19:05:40 +0000428#if defined(__VAES__) && defined(__AVX512F__)
429#define N_AES_LANES 4
430#define aes_load_partial(p, n) u8x64_load_partial ((u8 *) (p), n)
431#define aes_store_partial(v, p, n) u8x64_store_partial (v, (u8 *) (p), n)
432#define aes_reflect(r) u8x64_reflect_u8x16 (r)
433typedef u8x64 aes_data_t;
434typedef u8x64u aes_mem_t;
435typedef u32x16 aes_counter_t;
436#elif defined(__VAES__)
437#define N_AES_LANES 2
438#define aes_load_partial(p, n) u8x32_load_partial ((u8 *) (p), n)
439#define aes_store_partial(v, p, n) u8x32_store_partial (v, (u8 *) (p), n)
440#define aes_reflect(r) u8x32_reflect_u8x16 (r)
441typedef u8x32 aes_data_t;
442typedef u8x32u aes_mem_t;
443typedef u32x8 aes_counter_t;
444#else
445#define N_AES_LANES 1
446#define aes_load_partial(p, n) u8x16_load_partial ((u8 *) (p), n)
447#define aes_store_partial(v, p, n) u8x16_store_partial (v, (u8 *) (p), n)
448#define aes_reflect(r) u8x16_reflect (r)
449typedef u8x16 aes_data_t;
450typedef u8x16u aes_mem_t;
451typedef u32x4 aes_counter_t;
452#endif
Damjan Mariondeb8af62019-04-02 19:06:50 +0200453
Damjan Marion9caef2a2024-01-08 19:05:40 +0000454#define N_AES_BYTES (N_AES_LANES * 16)
Damjan Mariondeb8af62019-04-02 19:06:50 +0200455
Damjan Marion9caef2a2024-01-08 19:05:40 +0000456typedef union
457{
458 u8x16 x1;
459 u8x32 x2;
460 u8x64 x4;
461 u8x16 lanes[4];
462} aes_expaned_key_t;
463
464static_always_inline void
465aes_enc_round (aes_data_t *r, const aes_expaned_key_t *ek, uword n_blocks)
466{
467 for (int i = 0; i < n_blocks; i++)
468#if N_AES_LANES == 4
469 r[i] = aes_enc_round_x4 (r[i], ek->x4);
470#elif N_AES_LANES == 2
471 r[i] = aes_enc_round_x2 (r[i], ek->x2);
472#else
473 r[i] = aes_enc_round_x1 (r[i], ek->x1);
474#endif
475}
476
477static_always_inline void
478aes_enc_last_round (aes_data_t *r, aes_data_t *d, const aes_expaned_key_t *ek,
479 uword n_blocks)
480{
481 for (int i = 0; i < n_blocks; i++)
482#if N_AES_LANES == 4
483 d[i] ^= r[i] = aes_enc_last_round_x4 (r[i], ek->x4);
484#elif N_AES_LANES == 2
485 d[i] ^= r[i] = aes_enc_last_round_x2 (r[i], ek->x2);
486#else
487 d[i] ^= r[i] = aes_enc_last_round_x1 (r[i], ek->x1);
488#endif
489}
490
491#endif /* __aes_h__ */