vppinfra: AES-CBC and AES-GCM refactor and optimizations
- crypto code moved to vppinfra for better testing and reuse
- added 256-bit VAES support (Intel Client CPUs)
- added AES_GMAC functions
Change-Id: I960c8e14ca0a0126703e8f1589d86f32e2a98361
Type: improvement
Signed-off-by: Damjan Marion <damarion@cisco.com>
diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt
index 975bf50..ad942a2 100644
--- a/src/vppinfra/CMakeLists.txt
+++ b/src/vppinfra/CMakeLists.txt
@@ -128,6 +128,11 @@
clib.h
cpu.h
crc32.h
+ crypto/sha2.h
+ crypto/ghash.h
+ crypto/aes.h
+ crypto/aes_cbc.h
+ crypto/aes_gcm.h
dlist.h
dlmalloc.h
elf_clib.h
@@ -168,7 +173,6 @@
random_isaac.h
rbtree.h
serialize.h
- sha2.h
smp.h
socket.h
sparse_vec.h
@@ -278,6 +282,8 @@
endif(VPP_BUILD_VPPINFRA_TESTS)
set(test_files
+ test/aes_cbc.c
+ test/aes_gcm.c
test/array_mask.c
test/compress.c
test/count_equal.c
diff --git a/src/vppinfra/crypto/aes.h b/src/vppinfra/crypto/aes.h
new file mode 100644
index 0000000..a5e286e
--- /dev/null
+++ b/src/vppinfra/crypto/aes.h
@@ -0,0 +1,439 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __aesni_h__
+#define __aesni_h__
+
+typedef enum
+{
+ AES_KEY_128 = 0,
+ AES_KEY_192 = 1,
+ AES_KEY_256 = 2,
+} aes_key_size_t;
+
+#define AES_KEY_ROUNDS(x) (10 + x * 2)
+#define AES_KEY_BYTES(x) (16 + x * 8)
+
+static_always_inline u8x16
+aes_block_load (u8 * p)
+{
+ return *(u8x16u *) p;
+}
+
+static_always_inline u8x16
+aes_enc_round (u8x16 a, u8x16 k)
+{
+#if defined (__AES__)
+ return (u8x16) _mm_aesenc_si128 ((__m128i) a, (__m128i) k);
+#elif defined (__ARM_FEATURE_CRYPTO)
+ return vaesmcq_u8 (vaeseq_u8 (a, u8x16_splat (0))) ^ k;
+#endif
+}
+
+#if defined(__VAES__) && defined(__AVX512F__)
+static_always_inline u8x64
+aes_enc_round_x4 (u8x64 a, u8x64 k)
+{
+ return (u8x64) _mm512_aesenc_epi128 ((__m512i) a, (__m512i) k);
+}
+
+static_always_inline u8x64
+aes_enc_last_round_x4 (u8x64 a, u8x64 k)
+{
+ return (u8x64) _mm512_aesenclast_epi128 ((__m512i) a, (__m512i) k);
+}
+
+static_always_inline u8x64
+aes_dec_round_x4 (u8x64 a, u8x64 k)
+{
+ return (u8x64) _mm512_aesdec_epi128 ((__m512i) a, (__m512i) k);
+}
+
+static_always_inline u8x64
+aes_dec_last_round_x4 (u8x64 a, u8x64 k)
+{
+ return (u8x64) _mm512_aesdeclast_epi128 ((__m512i) a, (__m512i) k);
+}
+#endif
+
+#ifdef __VAES__
+static_always_inline u8x32
+aes_enc_round_x2 (u8x32 a, u8x32 k)
+{
+ return (u8x32) _mm256_aesenc_epi128 ((__m256i) a, (__m256i) k);
+}
+
+static_always_inline u8x32
+aes_enc_last_round_x2 (u8x32 a, u8x32 k)
+{
+ return (u8x32) _mm256_aesenclast_epi128 ((__m256i) a, (__m256i) k);
+}
+
+static_always_inline u8x32
+aes_dec_round_x2 (u8x32 a, u8x32 k)
+{
+ return (u8x32) _mm256_aesdec_epi128 ((__m256i) a, (__m256i) k);
+}
+
+static_always_inline u8x32
+aes_dec_last_round_x2 (u8x32 a, u8x32 k)
+{
+ return (u8x32) _mm256_aesdeclast_epi128 ((__m256i) a, (__m256i) k);
+}
+#endif
+
+static_always_inline u8x16
+aes_enc_last_round (u8x16 a, u8x16 k)
+{
+#if defined (__AES__)
+ return (u8x16) _mm_aesenclast_si128 ((__m128i) a, (__m128i) k);
+#elif defined (__ARM_FEATURE_CRYPTO)
+ return vaeseq_u8 (a, u8x16_splat (0)) ^ k;
+#endif
+}
+
+#ifdef __x86_64__
+
+static_always_inline u8x16
+aes_dec_round (u8x16 a, u8x16 k)
+{
+ return (u8x16) _mm_aesdec_si128 ((__m128i) a, (__m128i) k);
+}
+
+static_always_inline u8x16
+aes_dec_last_round (u8x16 a, u8x16 k)
+{
+ return (u8x16) _mm_aesdeclast_si128 ((__m128i) a, (__m128i) k);
+}
+#endif
+
+static_always_inline void
+aes_block_store (u8 * p, u8x16 r)
+{
+ *(u8x16u *) p = r;
+}
+
+static_always_inline u8x16
+aes_encrypt_block (u8x16 block, const u8x16 * round_keys, aes_key_size_t ks)
+{
+ int rounds = AES_KEY_ROUNDS (ks);
+ block ^= round_keys[0];
+ for (int i = 1; i < rounds; i += 1)
+ block = aes_enc_round (block, round_keys[i]);
+ return aes_enc_last_round (block, round_keys[rounds]);
+}
+
+static_always_inline u8x16
+aes_inv_mix_column (u8x16 a)
+{
+#if defined (__AES__)
+ return (u8x16) _mm_aesimc_si128 ((__m128i) a);
+#elif defined (__ARM_FEATURE_CRYPTO)
+ return vaesimcq_u8 (a);
+#endif
+}
+
+#ifdef __x86_64__
+#define aes_keygen_assist(a, b) \
+ (u8x16) _mm_aeskeygenassist_si128((__m128i) a, b)
+
+/* AES-NI based AES key expansion based on code samples from
+ Intel(r) Advanced Encryption Standard (AES) New Instructions White Paper
+ (323641-001) */
+
+static_always_inline void
+aes128_key_assist (u8x16 * rk, u8x16 r)
+{
+ u8x16 t = rk[-1];
+ t ^= u8x16_word_shift_left (t, 4);
+ t ^= u8x16_word_shift_left (t, 4);
+ t ^= u8x16_word_shift_left (t, 4);
+ rk[0] = t ^ (u8x16) u32x4_shuffle ((u32x4) r, 3, 3, 3, 3);
+}
+
+static_always_inline void
+aes128_key_expand (u8x16 *rk, u8x16u const *k)
+{
+ rk[0] = k[0];
+ aes128_key_assist (rk + 1, aes_keygen_assist (rk[0], 0x01));
+ aes128_key_assist (rk + 2, aes_keygen_assist (rk[1], 0x02));
+ aes128_key_assist (rk + 3, aes_keygen_assist (rk[2], 0x04));
+ aes128_key_assist (rk + 4, aes_keygen_assist (rk[3], 0x08));
+ aes128_key_assist (rk + 5, aes_keygen_assist (rk[4], 0x10));
+ aes128_key_assist (rk + 6, aes_keygen_assist (rk[5], 0x20));
+ aes128_key_assist (rk + 7, aes_keygen_assist (rk[6], 0x40));
+ aes128_key_assist (rk + 8, aes_keygen_assist (rk[7], 0x80));
+ aes128_key_assist (rk + 9, aes_keygen_assist (rk[8], 0x1b));
+ aes128_key_assist (rk + 10, aes_keygen_assist (rk[9], 0x36));
+}
+
+static_always_inline void
+aes192_key_assist (u8x16 * r1, u8x16 * r2, u8x16 key_assist)
+{
+ u8x16 t;
+ r1[0] ^= t = u8x16_word_shift_left (r1[0], 4);
+ r1[0] ^= t = u8x16_word_shift_left (t, 4);
+ r1[0] ^= u8x16_word_shift_left (t, 4);
+ r1[0] ^= (u8x16) _mm_shuffle_epi32 ((__m128i) key_assist, 0x55);
+ r2[0] ^= u8x16_word_shift_left (r2[0], 4);
+ r2[0] ^= (u8x16) _mm_shuffle_epi32 ((__m128i) r1[0], 0xff);
+}
+
+static_always_inline void
+aes192_key_expand (u8x16 * rk, u8x16u const *k)
+{
+ u8x16 r1, r2;
+
+ rk[0] = r1 = k[0];
+ /* *INDENT-OFF* */
+ rk[1] = r2 = (u8x16) (u64x2) { *(u64 *) (k + 1), 0 };
+ /* *INDENT-ON* */
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x1));
+ rk[1] = (u8x16) _mm_shuffle_pd ((__m128d) rk[1], (__m128d) r1, 0);
+ rk[2] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x2));
+ rk[3] = r1;
+ rk[4] = r2;
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x4));
+ rk[4] = (u8x16) _mm_shuffle_pd ((__m128d) rk[4], (__m128d) r1, 0);
+ rk[5] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x8));
+ rk[6] = r1;
+ rk[7] = r2;
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x10));
+ rk[7] = (u8x16) _mm_shuffle_pd ((__m128d) rk[7], (__m128d) r1, 0);
+ rk[8] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x20));
+ rk[9] = r1;
+ rk[10] = r2;
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x40));
+ rk[10] = (u8x16) _mm_shuffle_pd ((__m128d) rk[10], (__m128d) r1, 0);
+ rk[11] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
+
+ aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x80));
+ rk[12] = r1;
+}
+
+static_always_inline void
+aes256_key_assist (u8x16 * rk, int i, u8x16 key_assist)
+{
+ u8x16 r, t;
+ rk += i;
+ r = rk[-2];
+ r ^= t = u8x16_word_shift_left (r, 4);
+ r ^= t = u8x16_word_shift_left (t, 4);
+ r ^= u8x16_word_shift_left (t, 4);
+ r ^= (u8x16) u32x4_shuffle ((u32x4) key_assist, 3, 3, 3, 3);
+ rk[0] = r;
+
+ if (i >= 14)
+ return;
+
+ key_assist = aes_keygen_assist (rk[0], 0x0);
+ r = rk[-1];
+ r ^= t = u8x16_word_shift_left (r, 4);
+ r ^= t = u8x16_word_shift_left (t, 4);
+ r ^= u8x16_word_shift_left (t, 4);
+ r ^= (u8x16) u32x4_shuffle ((u32x4) key_assist, 2, 2, 2, 2);
+ rk[1] = r;
+}
+
+static_always_inline void
+aes256_key_expand (u8x16 * rk, u8x16u const *k)
+{
+ rk[0] = k[0];
+ rk[1] = k[1];
+ aes256_key_assist (rk, 2, aes_keygen_assist (rk[1], 0x01));
+ aes256_key_assist (rk, 4, aes_keygen_assist (rk[3], 0x02));
+ aes256_key_assist (rk, 6, aes_keygen_assist (rk[5], 0x04));
+ aes256_key_assist (rk, 8, aes_keygen_assist (rk[7], 0x08));
+ aes256_key_assist (rk, 10, aes_keygen_assist (rk[9], 0x10));
+ aes256_key_assist (rk, 12, aes_keygen_assist (rk[11], 0x20));
+ aes256_key_assist (rk, 14, aes_keygen_assist (rk[13], 0x40));
+}
+#endif
+
+#ifdef __aarch64__
+
+static const u8x16 aese_prep_mask1 =
+ { 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12 };
+static const u8x16 aese_prep_mask2 =
+ { 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15 };
+
+static_always_inline void
+aes128_key_expand_round_neon (u8x16 * rk, u32 rcon)
+{
+ u8x16 r, t, last_round = rk[-1], z = { };
+ r = vqtbl1q_u8 (last_round, aese_prep_mask1);
+ r = vaeseq_u8 (r, z);
+ r ^= (u8x16) vdupq_n_u32 (rcon);
+ r ^= last_round;
+ r ^= t = vextq_u8 (z, last_round, 12);
+ r ^= t = vextq_u8 (z, t, 12);
+ r ^= vextq_u8 (z, t, 12);
+ rk[0] = r;
+}
+
+static_always_inline void
+aes128_key_expand (u8x16 *rk, u8x16u const *k)
+{
+ rk[0] = k[0];
+ aes128_key_expand_round_neon (rk + 1, 0x01);
+ aes128_key_expand_round_neon (rk + 2, 0x02);
+ aes128_key_expand_round_neon (rk + 3, 0x04);
+ aes128_key_expand_round_neon (rk + 4, 0x08);
+ aes128_key_expand_round_neon (rk + 5, 0x10);
+ aes128_key_expand_round_neon (rk + 6, 0x20);
+ aes128_key_expand_round_neon (rk + 7, 0x40);
+ aes128_key_expand_round_neon (rk + 8, 0x80);
+ aes128_key_expand_round_neon (rk + 9, 0x1b);
+ aes128_key_expand_round_neon (rk + 10, 0x36);
+}
+
+static_always_inline void
+aes192_key_expand_round_neon (u8x8 * rk, u32 rcon)
+{
+ u8x8 r, last_round = rk[-1], z = { };
+ u8x16 r2, z2 = { };
+
+ r2 = (u8x16) vdupq_lane_u64 ((uint64x1_t) last_round, 0);
+ r2 = vqtbl1q_u8 (r2, aese_prep_mask1);
+ r2 = vaeseq_u8 (r2, z2);
+ r2 ^= (u8x16) vdupq_n_u32 (rcon);
+
+ r = (u8x8) vdup_laneq_u64 ((u64x2) r2, 0);
+ r ^= rk[-3];
+ r ^= vext_u8 (z, rk[-3], 4);
+ rk[0] = r;
+
+ r = rk[-2] ^ vext_u8 (r, z, 4);
+ r ^= vext_u8 (z, r, 4);
+ rk[1] = r;
+
+ if (rcon == 0x80)
+ return;
+
+ r = rk[-1] ^ vext_u8 (r, z, 4);
+ r ^= vext_u8 (z, r, 4);
+ rk[2] = r;
+}
+
+static_always_inline void
+aes192_key_expand (u8x16 * ek, const u8x16u * k)
+{
+ u8x8 *rk = (u8x8 *) ek;
+ ek[0] = k[0];
+ rk[2] = *(u8x8u *) (k + 1);
+ aes192_key_expand_round_neon (rk + 3, 0x01);
+ aes192_key_expand_round_neon (rk + 6, 0x02);
+ aes192_key_expand_round_neon (rk + 9, 0x04);
+ aes192_key_expand_round_neon (rk + 12, 0x08);
+ aes192_key_expand_round_neon (rk + 15, 0x10);
+ aes192_key_expand_round_neon (rk + 18, 0x20);
+ aes192_key_expand_round_neon (rk + 21, 0x40);
+ aes192_key_expand_round_neon (rk + 24, 0x80);
+}
+
+
+static_always_inline void
+aes256_key_expand_round_neon (u8x16 * rk, u32 rcon)
+{
+ u8x16 r, t, z = { };
+
+ r = vqtbl1q_u8 (rk[-1], rcon ? aese_prep_mask1 : aese_prep_mask2);
+ r = vaeseq_u8 (r, z);
+ if (rcon)
+ r ^= (u8x16) vdupq_n_u32 (rcon);
+ r ^= rk[-2];
+ r ^= t = vextq_u8 (z, rk[-2], 12);
+ r ^= t = vextq_u8 (z, t, 12);
+ r ^= vextq_u8 (z, t, 12);
+ rk[0] = r;
+}
+
+static_always_inline void
+aes256_key_expand (u8x16 *rk, u8x16u const *k)
+{
+ rk[0] = k[0];
+ rk[1] = k[1];
+ aes256_key_expand_round_neon (rk + 2, 0x01);
+ aes256_key_expand_round_neon (rk + 3, 0);
+ aes256_key_expand_round_neon (rk + 4, 0x02);
+ aes256_key_expand_round_neon (rk + 5, 0);
+ aes256_key_expand_round_neon (rk + 6, 0x04);
+ aes256_key_expand_round_neon (rk + 7, 0);
+ aes256_key_expand_round_neon (rk + 8, 0x08);
+ aes256_key_expand_round_neon (rk + 9, 0);
+ aes256_key_expand_round_neon (rk + 10, 0x10);
+ aes256_key_expand_round_neon (rk + 11, 0);
+ aes256_key_expand_round_neon (rk + 12, 0x20);
+ aes256_key_expand_round_neon (rk + 13, 0);
+ aes256_key_expand_round_neon (rk + 14, 0x40);
+}
+
+#endif
+
+static_always_inline void
+aes_key_expand (u8x16 * key_schedule, u8 const *key, aes_key_size_t ks)
+{
+ switch (ks)
+ {
+ case AES_KEY_128:
+ aes128_key_expand (key_schedule, (u8x16u const *) key);
+ break;
+ case AES_KEY_192:
+ aes192_key_expand (key_schedule, (u8x16u const *) key);
+ break;
+ case AES_KEY_256:
+ aes256_key_expand (key_schedule, (u8x16u const *) key);
+ break;
+ }
+}
+
+static_always_inline void
+aes_key_enc_to_dec (u8x16 * ke, u8x16 * kd, aes_key_size_t ks)
+{
+ int rounds = AES_KEY_ROUNDS (ks);
+
+ kd[rounds] = ke[0];
+ kd[0] = ke[rounds];
+
+ for (int i = 1; i < (rounds / 2); i++)
+ {
+ kd[rounds - i] = aes_inv_mix_column (ke[i]);
+ kd[i] = aes_inv_mix_column (ke[rounds - i]);
+ }
+
+ kd[rounds / 2] = aes_inv_mix_column (ke[rounds / 2]);
+}
+
+#endif /* __aesni_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vppinfra/crypto/aes_cbc.h b/src/vppinfra/crypto/aes_cbc.h
new file mode 100644
index 0000000..5c3054f
--- /dev/null
+++ b/src/vppinfra/crypto/aes_cbc.h
@@ -0,0 +1,549 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef __crypto_aes_cbc_h__
+#define __crypto_aes_cbc_h__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vector.h>
+#include <vppinfra/crypto/aes.h>
+
+typedef struct
+{
+ const u8x16 encrypt_key[15];
+ const u8x16 decrypt_key[15];
+} aes_cbc_key_data_t;
+
+static_always_inline void
+clib_aes_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *src, uword len,
+ const u8 *iv, aes_key_size_t ks, u8 *dst)
+{
+ int rounds = AES_KEY_ROUNDS (ks);
+ u8x16 r, *k = (u8x16 *) kd->encrypt_key;
+
+ r = *(u8x16u *) iv;
+
+ for (int i = 0; i < len; i += 16)
+ {
+ int j;
+#if __x86_64__
+ r = u8x16_xor3 (r, *(u8x16u *) (src + i), k[0]);
+ for (j = 1; j < rounds; j++)
+ r = aes_enc_round (r, k[j]);
+ r = aes_enc_last_round (r, k[rounds]);
+#else
+ r ^= *(u8x16u *) (src + i);
+ for (j = 1; j < rounds - 1; j++)
+ r = vaesmcq_u8 (vaeseq_u8 (r, k[j]));
+ r = vaeseq_u8 (r, k[j]) ^ k[rounds];
+#endif
+ *(u8x16u *) (dst + i) = r;
+ }
+}
+
+static_always_inline void
+clib_aes128_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *plaintext,
+ uword len, const u8 *iv, u8 *ciphertext)
+{
+ clib_aes_cbc_encrypt (kd, plaintext, len, iv, AES_KEY_128, ciphertext);
+}
+
+static_always_inline void
+clib_aes192_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *plaintext,
+ uword len, const u8 *iv, u8 *ciphertext)
+{
+ clib_aes_cbc_encrypt (kd, plaintext, len, iv, AES_KEY_192, ciphertext);
+}
+
+static_always_inline void
+clib_aes256_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *plaintext,
+ uword len, const u8 *iv, u8 *ciphertext)
+{
+ clib_aes_cbc_encrypt (kd, plaintext, len, iv, AES_KEY_256, ciphertext);
+}
+
+static_always_inline void __clib_unused
+aes_cbc_dec (const u8x16 *k, u8x16u *src, u8x16u *dst, u8x16u *iv, int count,
+ int rounds)
+{
+ u8x16 r[4], c[4], f;
+
+ f = iv[0];
+ while (count >= 64)
+ {
+ c[0] = r[0] = src[0];
+ c[1] = r[1] = src[1];
+ c[2] = r[2] = src[2];
+ c[3] = r[3] = src[3];
+
+#if __x86_64__
+ r[0] ^= k[0];
+ r[1] ^= k[0];
+ r[2] ^= k[0];
+ r[3] ^= k[0];
+
+ for (int i = 1; i < rounds; i++)
+ {
+ r[0] = aes_dec_round (r[0], k[i]);
+ r[1] = aes_dec_round (r[1], k[i]);
+ r[2] = aes_dec_round (r[2], k[i]);
+ r[3] = aes_dec_round (r[3], k[i]);
+ }
+
+ r[0] = aes_dec_last_round (r[0], k[rounds]);
+ r[1] = aes_dec_last_round (r[1], k[rounds]);
+ r[2] = aes_dec_last_round (r[2], k[rounds]);
+ r[3] = aes_dec_last_round (r[3], k[rounds]);
+#else
+ for (int i = 0; i < rounds - 1; i++)
+ {
+ r[0] = vaesimcq_u8 (vaesdq_u8 (r[0], k[i]));
+ r[1] = vaesimcq_u8 (vaesdq_u8 (r[1], k[i]));
+ r[2] = vaesimcq_u8 (vaesdq_u8 (r[2], k[i]));
+ r[3] = vaesimcq_u8 (vaesdq_u8 (r[3], k[i]));
+ }
+ r[0] = vaesdq_u8 (r[0], k[rounds - 1]) ^ k[rounds];
+ r[1] = vaesdq_u8 (r[1], k[rounds - 1]) ^ k[rounds];
+ r[2] = vaesdq_u8 (r[2], k[rounds - 1]) ^ k[rounds];
+ r[3] = vaesdq_u8 (r[3], k[rounds - 1]) ^ k[rounds];
+#endif
+ dst[0] = r[0] ^ f;
+ dst[1] = r[1] ^ c[0];
+ dst[2] = r[2] ^ c[1];
+ dst[3] = r[3] ^ c[2];
+ f = c[3];
+
+ count -= 64;
+ src += 4;
+ dst += 4;
+ }
+
+ while (count > 0)
+ {
+ c[0] = r[0] = src[0];
+#if __x86_64__
+ r[0] ^= k[0];
+ for (int i = 1; i < rounds; i++)
+ r[0] = aes_dec_round (r[0], k[i]);
+ r[0] = aes_dec_last_round (r[0], k[rounds]);
+#else
+ c[0] = r[0] = src[0];
+ for (int i = 0; i < rounds - 1; i++)
+ r[0] = vaesimcq_u8 (vaesdq_u8 (r[0], k[i]));
+ r[0] = vaesdq_u8 (r[0], k[rounds - 1]) ^ k[rounds];
+#endif
+ dst[0] = r[0] ^ f;
+ f = c[0];
+
+ count -= 16;
+ src += 1;
+ dst += 1;
+ }
+}
+
+#if __x86_64__
+#if defined(__VAES__) && defined(__AVX512F__)
+
+static_always_inline u8x64
+aes_block_load_x4 (u8 *src[], int i)
+{
+ u8x64 r = {};
+ r = u8x64_insert_u8x16 (r, aes_block_load (src[0] + i), 0);
+ r = u8x64_insert_u8x16 (r, aes_block_load (src[1] + i), 1);
+ r = u8x64_insert_u8x16 (r, aes_block_load (src[2] + i), 2);
+ r = u8x64_insert_u8x16 (r, aes_block_load (src[3] + i), 3);
+ return r;
+}
+
+static_always_inline void
+aes_block_store_x4 (u8 *dst[], int i, u8x64 r)
+{
+ aes_block_store (dst[0] + i, u8x64_extract_u8x16 (r, 0));
+ aes_block_store (dst[1] + i, u8x64_extract_u8x16 (r, 1));
+ aes_block_store (dst[2] + i, u8x64_extract_u8x16 (r, 2));
+ aes_block_store (dst[3] + i, u8x64_extract_u8x16 (r, 3));
+}
+
+static_always_inline u8x64
+aes4_cbc_dec_permute (u8x64 a, u8x64 b)
+{
+ return (u8x64) u64x8_shuffle2 (a, b, 6, 7, 8, 9, 10, 11, 12, 13);
+}
+
+static_always_inline void
+aes4_cbc_dec (const u8x16 *k, u8x64u *src, u8x64u *dst, u8x16u *iv, int count,
+ aes_key_size_t rounds)
+{
+ u8x64 f, k4, r[4], c[4] = {};
+ __mmask8 m;
+ int i, n_blocks = count >> 4;
+
+ f = u8x64_insert_u8x16 (u8x64_zero (), *iv, 3);
+
+ while (n_blocks >= 16)
+ {
+ k4 = u8x64_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+ c[2] = src[2];
+ c[3] = src[3];
+
+ r[0] = c[0] ^ k4;
+ r[1] = c[1] ^ k4;
+ r[2] = c[2] ^ k4;
+ r[3] = c[3] ^ k4;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x4 (r[0], k4);
+ r[1] = aes_dec_round_x4 (r[1], k4);
+ r[2] = aes_dec_round_x4 (r[2], k4);
+ r[3] = aes_dec_round_x4 (r[3], k4);
+ }
+
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x4 (r[0], k4);
+ r[1] = aes_dec_last_round_x4 (r[1], k4);
+ r[2] = aes_dec_last_round_x4 (r[2], k4);
+ r[3] = aes_dec_last_round_x4 (r[3], k4);
+
+ dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes4_cbc_dec_permute (c[0], c[1]);
+ dst[2] = r[2] ^= aes4_cbc_dec_permute (c[1], c[2]);
+ dst[3] = r[3] ^= aes4_cbc_dec_permute (c[2], c[3]);
+ f = c[3];
+
+ n_blocks -= 16;
+ src += 4;
+ dst += 4;
+ }
+
+ if (n_blocks >= 12)
+ {
+ k4 = u8x64_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+ c[2] = src[2];
+
+ r[0] = c[0] ^ k4;
+ r[1] = c[1] ^ k4;
+ r[2] = c[2] ^ k4;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x4 (r[0], k4);
+ r[1] = aes_dec_round_x4 (r[1], k4);
+ r[2] = aes_dec_round_x4 (r[2], k4);
+ }
+
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x4 (r[0], k4);
+ r[1] = aes_dec_last_round_x4 (r[1], k4);
+ r[2] = aes_dec_last_round_x4 (r[2], k4);
+
+ dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes4_cbc_dec_permute (c[0], c[1]);
+ dst[2] = r[2] ^= aes4_cbc_dec_permute (c[1], c[2]);
+ f = c[2];
+
+ n_blocks -= 12;
+ src += 3;
+ dst += 3;
+ }
+ else if (n_blocks >= 8)
+ {
+ k4 = u8x64_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+
+ r[0] = c[0] ^ k4;
+ r[1] = c[1] ^ k4;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x4 (r[0], k4);
+ r[1] = aes_dec_round_x4 (r[1], k4);
+ }
+
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x4 (r[0], k4);
+ r[1] = aes_dec_last_round_x4 (r[1], k4);
+
+ dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes4_cbc_dec_permute (c[0], c[1]);
+ f = c[1];
+
+ n_blocks -= 8;
+ src += 2;
+ dst += 2;
+ }
+ else if (n_blocks >= 4)
+ {
+ c[0] = src[0];
+
+ r[0] = c[0] ^ u8x64_splat_u8x16 (k[0]);
+
+ for (i = 1; i < rounds; i++)
+ r[0] = aes_dec_round_x4 (r[0], u8x64_splat_u8x16 (k[i]));
+
+ r[0] = aes_dec_last_round_x4 (r[0], u8x64_splat_u8x16 (k[i]));
+
+ dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]);
+ f = c[0];
+
+ n_blocks -= 4;
+ src += 1;
+ dst += 1;
+ }
+
+ if (n_blocks > 0)
+ {
+ k4 = u8x64_splat_u8x16 (k[0]);
+ m = (1 << (n_blocks * 2)) - 1;
+ c[0] =
+ (u8x64) _mm512_mask_loadu_epi64 ((__m512i) c[0], m, (__m512i *) src);
+ f = aes4_cbc_dec_permute (f, c[0]);
+ r[0] = c[0] ^ k4;
+ for (i = 1; i < rounds; i++)
+ r[0] = aes_dec_round_x4 (r[0], u8x64_splat_u8x16 (k[i]));
+ r[0] = aes_dec_last_round_x4 (r[0], u8x64_splat_u8x16 (k[i]));
+ _mm512_mask_storeu_epi64 ((__m512i *) dst, m, (__m512i) (r[0] ^ f));
+ }
+}
+#elif defined(__VAES__)
+
+static_always_inline u8x32
+aes_block_load_x2 (u8 *src[], int i)
+{
+ u8x32 r = {};
+ r = u8x32_insert_lo (r, aes_block_load (src[0] + i));
+ r = u8x32_insert_hi (r, aes_block_load (src[1] + i));
+ return r;
+}
+
+static_always_inline void
+aes_block_store_x2 (u8 *dst[], int i, u8x32 r)
+{
+ aes_block_store (dst[0] + i, u8x32_extract_lo (r));
+ aes_block_store (dst[1] + i, u8x32_extract_hi (r));
+}
+
+static_always_inline u8x32
+aes2_cbc_dec_permute (u8x32 a, u8x32 b)
+{
+ return (u8x32) u64x4_shuffle2 ((u64x4) a, (u64x4) b, 2, 3, 4, 5);
+}
+
+static_always_inline void
+aes2_cbc_dec (const u8x16 *k, u8x32u *src, u8x32u *dst, u8x16u *iv, int count,
+ aes_key_size_t rounds)
+{
+ u8x32 k2, f = {}, r[4], c[4] = {};
+ int i, n_blocks = count >> 4;
+
+ f = u8x32_insert_hi (f, *iv);
+
+ while (n_blocks >= 8)
+ {
+ k2 = u8x32_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+ c[2] = src[2];
+ c[3] = src[3];
+
+ r[0] = c[0] ^ k2;
+ r[1] = c[1] ^ k2;
+ r[2] = c[2] ^ k2;
+ r[3] = c[3] ^ k2;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x2 (r[0], k2);
+ r[1] = aes_dec_round_x2 (r[1], k2);
+ r[2] = aes_dec_round_x2 (r[2], k2);
+ r[3] = aes_dec_round_x2 (r[3], k2);
+ }
+
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x2 (r[0], k2);
+ r[1] = aes_dec_last_round_x2 (r[1], k2);
+ r[2] = aes_dec_last_round_x2 (r[2], k2);
+ r[3] = aes_dec_last_round_x2 (r[3], k2);
+
+ dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes2_cbc_dec_permute (c[0], c[1]);
+ dst[2] = r[2] ^= aes2_cbc_dec_permute (c[1], c[2]);
+ dst[3] = r[3] ^= aes2_cbc_dec_permute (c[2], c[3]);
+ f = c[3];
+
+ n_blocks -= 8;
+ src += 4;
+ dst += 4;
+ }
+
+ if (n_blocks >= 6)
+ {
+ k2 = u8x32_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+ c[2] = src[2];
+
+ r[0] = c[0] ^ k2;
+ r[1] = c[1] ^ k2;
+ r[2] = c[2] ^ k2;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x2 (r[0], k2);
+ r[1] = aes_dec_round_x2 (r[1], k2);
+ r[2] = aes_dec_round_x2 (r[2], k2);
+ }
+
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x2 (r[0], k2);
+ r[1] = aes_dec_last_round_x2 (r[1], k2);
+ r[2] = aes_dec_last_round_x2 (r[2], k2);
+
+ dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes2_cbc_dec_permute (c[0], c[1]);
+ dst[2] = r[2] ^= aes2_cbc_dec_permute (c[1], c[2]);
+ f = c[2];
+
+ n_blocks -= 6;
+ src += 3;
+ dst += 3;
+ }
+ else if (n_blocks >= 4)
+ {
+ k2 = u8x32_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+
+ r[0] = c[0] ^ k2;
+ r[1] = c[1] ^ k2;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x2 (r[0], k2);
+ r[1] = aes_dec_round_x2 (r[1], k2);
+ }
+
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x2 (r[0], k2);
+ r[1] = aes_dec_last_round_x2 (r[1], k2);
+
+ dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes2_cbc_dec_permute (c[0], c[1]);
+ f = c[1];
+
+ n_blocks -= 4;
+ src += 2;
+ dst += 2;
+ }
+ else if (n_blocks >= 2)
+ {
+ k2 = u8x32_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ r[0] = c[0] ^ k2;
+
+ for (i = 1; i < rounds; i++)
+ r[0] = aes_dec_round_x2 (r[0], u8x32_splat_u8x16 (k[i]));
+
+ r[0] = aes_dec_last_round_x2 (r[0], u8x32_splat_u8x16 (k[i]));
+ dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]);
+ f = c[0];
+
+ n_blocks -= 2;
+ src += 1;
+ dst += 1;
+ }
+
+ if (n_blocks > 0)
+ {
+ u8x16 rl = *(u8x16u *) src ^ k[0];
+ for (i = 1; i < rounds; i++)
+ rl = aes_dec_round (rl, k[i]);
+ rl = aes_dec_last_round (rl, k[i]);
+ *(u8x16 *) dst = rl ^ u8x32_extract_hi (f);
+ }
+}
+#endif
+#endif
+
+static_always_inline void
+clib_aes_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key,
+ aes_key_size_t ks)
+{
+ u8x16 e[15], d[15];
+ aes_key_expand (e, key, ks);
+ aes_key_enc_to_dec (e, d, ks);
+ for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
+ {
+ ((u8x16 *) kd->decrypt_key)[i] = d[i];
+ ((u8x16 *) kd->encrypt_key)[i] = e[i];
+ }
+}
+
+static_always_inline void
+clib_aes128_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key)
+{
+ clib_aes_cbc_key_expand (kd, key, AES_KEY_128);
+}
+static_always_inline void
+clib_aes192_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key)
+{
+ clib_aes_cbc_key_expand (kd, key, AES_KEY_192);
+}
+static_always_inline void
+clib_aes256_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key)
+{
+ clib_aes_cbc_key_expand (kd, key, AES_KEY_256);
+}
+
+static_always_inline void
+clib_aes_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext,
+ uword len, const u8 *iv, aes_key_size_t ks,
+ u8 *plaintext)
+{
+ int rounds = AES_KEY_ROUNDS (ks);
+#if defined(__VAES__) && defined(__AVX512F__)
+ aes4_cbc_dec (kd->decrypt_key, (u8x64u *) ciphertext, (u8x64u *) plaintext,
+ (u8x16u *) iv, (int) len, rounds);
+#elif defined(__VAES__)
+ aes2_cbc_dec (kd->decrypt_key, (u8x32u *) ciphertext, (u8x32u *) plaintext,
+ (u8x16u *) iv, (int) len, rounds);
+#else
+ aes_cbc_dec (kd->decrypt_key, (u8x16u *) ciphertext, (u8x16u *) plaintext,
+ (u8x16u *) iv, (int) len, rounds);
+#endif
+}
+
+static_always_inline void
+clib_aes128_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext,
+ uword len, const u8 *iv, u8 *plaintext)
+{
+ clib_aes_cbc_decrypt (kd, ciphertext, len, iv, AES_KEY_128, plaintext);
+}
+
+static_always_inline void
+clib_aes192_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext,
+ uword len, const u8 *iv, u8 *plaintext)
+{
+ clib_aes_cbc_decrypt (kd, ciphertext, len, iv, AES_KEY_192, plaintext);
+}
+
+static_always_inline void
+clib_aes256_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext,
+ uword len, const u8 *iv, u8 *plaintext)
+{
+ clib_aes_cbc_decrypt (kd, ciphertext, len, iv, AES_KEY_256, plaintext);
+}
+
+#endif /* __crypto_aes_cbc_h__ */
diff --git a/src/vppinfra/crypto/aes_gcm.h b/src/vppinfra/crypto/aes_gcm.h
new file mode 100644
index 0000000..8a5f76c
--- /dev/null
+++ b/src/vppinfra/crypto/aes_gcm.h
@@ -0,0 +1,975 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef __crypto_aes_gcm_h__
+#define __crypto_aes_gcm_h__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vector.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/string.h>
+#include <vppinfra/crypto/aes.h>
+#include <vppinfra/crypto/ghash.h>
+
+#define NUM_HI 36
+#if defined(__VAES__) && defined(__AVX512F__)
+typedef u8x64 aes_data_t;
+typedef u8x64u aes_ghash_t;
+typedef u8x64u aes_mem_t;
+typedef u32x16 aes_gcm_counter_t;
+#define N 64
+#define aes_gcm_load_partial(p, n) u8x64_load_partial ((u8 *) (p), n)
+#define aes_gcm_store_partial(v, p, n) u8x64_store_partial (v, (u8 *) (p), n)
+#define aes_gcm_splat(v) u8x64_splat (v)
+#define aes_gcm_reflect(r) u8x64_reflect_u8x16 (r)
+#define aes_gcm_ghash_reduce(c) ghash4_reduce (&(c)->gd)
+#define aes_gcm_ghash_reduce2(c) ghash4_reduce2 (&(c)->gd)
+#define aes_gcm_ghash_final(c) (c)->T = ghash4_final (&(c)->gd)
+#elif defined(__VAES__)
+typedef u8x32 aes_data_t;
+typedef u8x32u aes_ghash_t;
+typedef u8x32u aes_mem_t;
+typedef u32x8 aes_gcm_counter_t;
+#define N 32
+#define aes_gcm_load_partial(p, n) u8x32_load_partial ((u8 *) (p), n)
+#define aes_gcm_store_partial(v, p, n) u8x32_store_partial (v, (u8 *) (p), n)
+#define aes_gcm_splat(v) u8x32_splat (v)
+#define aes_gcm_reflect(r) u8x32_reflect_u8x16 (r)
+#define aes_gcm_ghash_reduce(c) ghash2_reduce (&(c)->gd)
+#define aes_gcm_ghash_reduce2(c) ghash2_reduce2 (&(c)->gd)
+#define aes_gcm_ghash_final(c) (c)->T = ghash2_final (&(c)->gd)
+#else
+typedef u8x16 aes_data_t;
+typedef u8x16 aes_ghash_t;
+typedef u8x16u aes_mem_t;
+typedef u32x4 aes_gcm_counter_t;
+#define N 16
+#define aes_gcm_load_partial(p, n) u8x16_load_partial ((u8 *) (p), n)
+#define aes_gcm_store_partial(v, p, n) u8x16_store_partial (v, (u8 *) (p), n)
+#define aes_gcm_splat(v) u8x16_splat (v)
+#define aes_gcm_reflect(r) u8x16_reflect (r)
+#define aes_gcm_ghash_reduce(c) ghash_reduce (&(c)->gd)
+#define aes_gcm_ghash_reduce2(c) ghash_reduce2 (&(c)->gd)
+#define aes_gcm_ghash_final(c) (c)->T = ghash_final (&(c)->gd)
+#endif
+#define N_LANES (N / 16)
+
+typedef enum
+{
+ AES_GCM_OP_UNKNONW = 0,
+ AES_GCM_OP_ENCRYPT,
+ AES_GCM_OP_DECRYPT,
+ AES_GCM_OP_GMAC
+} aes_gcm_op_t;
+
+typedef union
+{
+ u8x16 x1;
+ u8x32 x2;
+ u8x64 x4;
+ u8x16 lanes[4];
+} __clib_aligned (64)
+aes_gcm_expaned_key_t;
+
+typedef struct
+{
+ /* pre-calculated hash key values */
+ const u8x16 Hi[NUM_HI];
+ /* extracted AES key */
+ const aes_gcm_expaned_key_t Ke[AES_KEY_ROUNDS (AES_KEY_256) + 1];
+} aes_gcm_key_data_t;
+
+typedef struct
+{
+ aes_gcm_op_t operation;
+ int last;
+ u8 rounds;
+ uword data_bytes;
+ uword aad_bytes;
+
+ u8x16 T;
+
+ /* hash */
+ const u8x16 *Hi;
+ const aes_ghash_t *next_Hi;
+
+ /* expaded keys */
+ const aes_gcm_expaned_key_t *Ke;
+
+ /* counter */
+ u32 counter;
+ u8x16 EY0;
+ aes_gcm_counter_t Y;
+
+ /* ghash */
+ ghash_data_t gd;
+} aes_gcm_ctx_t;
+
+static_always_inline void
+aes_gcm_ghash_mul_first (aes_gcm_ctx_t *ctx, aes_data_t data, u32 n_lanes)
+{
+ uword hash_offset = NUM_HI - n_lanes;
+ ctx->next_Hi = (aes_ghash_t *) (ctx->Hi + hash_offset);
+#if N_LANES == 4
+ u8x64 tag4 = {};
+ tag4 = u8x64_insert_u8x16 (tag4, ctx->T, 0);
+ ghash4_mul_first (&ctx->gd, aes_gcm_reflect (data) ^ tag4, *ctx->next_Hi++);
+#elif N_LANES == 2
+ u8x32 tag2 = {};
+ tag2 = u8x32_insert_lo (tag2, ctx->T);
+ ghash2_mul_first (&ctx->gd, aes_gcm_reflect (data) ^ tag2, *ctx->next_Hi++);
+#else
+ ghash_mul_first (&ctx->gd, aes_gcm_reflect (data) ^ ctx->T, *ctx->next_Hi++);
+#endif
+}
+
+static_always_inline void
+aes_gcm_ghash_mul_next (aes_gcm_ctx_t *ctx, aes_data_t data)
+{
+#if N_LANES == 4
+ ghash4_mul_next (&ctx->gd, aes_gcm_reflect (data), *ctx->next_Hi++);
+#elif N_LANES == 2
+ ghash2_mul_next (&ctx->gd, aes_gcm_reflect (data), *ctx->next_Hi++);
+#else
+ ghash_mul_next (&ctx->gd, aes_gcm_reflect (data), *ctx->next_Hi++);
+#endif
+}
+
+static_always_inline void
+aes_gcm_ghash_mul_bit_len (aes_gcm_ctx_t *ctx)
+{
+ u8x16 r = (u8x16) ((u64x2){ ctx->data_bytes, ctx->aad_bytes } << 3);
+#if N_LANES == 4
+ u8x64 h = u8x64_insert_u8x16 (u8x64_zero (), ctx->Hi[NUM_HI - 1], 0);
+ u8x64 r4 = u8x64_insert_u8x16 (u8x64_zero (), r, 0);
+ ghash4_mul_next (&ctx->gd, r4, h);
+#elif N_LANES == 2
+ u8x32 h = u8x32_insert_lo (u8x32_zero (), ctx->Hi[NUM_HI - 1]);
+ u8x32 r2 = u8x32_insert_lo (u8x32_zero (), r);
+ ghash2_mul_next (&ctx->gd, r2, h);
+#else
+ ghash_mul_next (&ctx->gd, r, ctx->Hi[NUM_HI - 1]);
+#endif
+}
+
+static_always_inline void
+aes_gcm_enc_ctr0_round (aes_gcm_ctx_t *ctx, int aes_round)
+{
+ if (aes_round == 0)
+ ctx->EY0 ^= ctx->Ke[0].x1;
+ else if (aes_round == ctx->rounds)
+ ctx->EY0 = aes_enc_last_round (ctx->EY0, ctx->Ke[aes_round].x1);
+ else
+ ctx->EY0 = aes_enc_round (ctx->EY0, ctx->Ke[aes_round].x1);
+}
+
+static_always_inline void
+aes_gcm_ghash (aes_gcm_ctx_t *ctx, u8 *data, u32 n_left)
+{
+ uword i;
+ aes_data_t r = {};
+ const aes_mem_t *d = (aes_mem_t *) data;
+
+ for (; n_left >= 8 * N; n_left -= 8 * N, d += 8)
+ {
+ if (ctx->operation == AES_GCM_OP_GMAC && n_left == N * 8)
+ {
+ aes_gcm_ghash_mul_first (ctx, d[0], 8 * N_LANES + 1);
+ for (i = 1; i < 8; i++)
+ aes_gcm_ghash_mul_next (ctx, d[i]);
+ aes_gcm_ghash_mul_bit_len (ctx);
+ aes_gcm_ghash_reduce (ctx);
+ aes_gcm_ghash_reduce2 (ctx);
+ aes_gcm_ghash_final (ctx);
+ goto done;
+ }
+
+ aes_gcm_ghash_mul_first (ctx, d[0], 8 * N_LANES);
+ for (i = 1; i < 8; i++)
+ aes_gcm_ghash_mul_next (ctx, d[i]);
+ aes_gcm_ghash_reduce (ctx);
+ aes_gcm_ghash_reduce2 (ctx);
+ aes_gcm_ghash_final (ctx);
+ }
+
+ if (n_left > 0)
+ {
+ int n_lanes = (n_left + 15) / 16;
+
+ if (ctx->operation == AES_GCM_OP_GMAC)
+ n_lanes++;
+
+ if (n_left < N)
+ {
+ clib_memcpy_fast (&r, d, n_left);
+ aes_gcm_ghash_mul_first (ctx, r, n_lanes);
+ }
+ else
+ {
+ aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
+ n_left -= N;
+ i = 1;
+
+ if (n_left >= 4 * N)
+ {
+ aes_gcm_ghash_mul_next (ctx, d[i]);
+ aes_gcm_ghash_mul_next (ctx, d[i + 1]);
+ aes_gcm_ghash_mul_next (ctx, d[i + 2]);
+ aes_gcm_ghash_mul_next (ctx, d[i + 3]);
+ n_left -= 4 * N;
+ i += 4;
+ }
+ if (n_left >= 2 * N)
+ {
+ aes_gcm_ghash_mul_next (ctx, d[i]);
+ aes_gcm_ghash_mul_next (ctx, d[i + 1]);
+ n_left -= 2 * N;
+ i += 2;
+ }
+
+ if (n_left >= N)
+ {
+ aes_gcm_ghash_mul_next (ctx, d[i]);
+ n_left -= N;
+ i += 1;
+ }
+
+ if (n_left)
+ {
+ clib_memcpy_fast (&r, d + i, n_left);
+ aes_gcm_ghash_mul_next (ctx, r);
+ }
+ }
+
+ if (ctx->operation == AES_GCM_OP_GMAC)
+ aes_gcm_ghash_mul_bit_len (ctx);
+ aes_gcm_ghash_reduce (ctx);
+ aes_gcm_ghash_reduce2 (ctx);
+ aes_gcm_ghash_final (ctx);
+ }
+ else if (ctx->operation == AES_GCM_OP_GMAC)
+ {
+ u8x16 r = (u8x16) ((u64x2){ ctx->data_bytes, ctx->aad_bytes } << 3);
+ ctx->T = ghash_mul (r ^ ctx->T, ctx->Hi[NUM_HI - 1]);
+ }
+
+done:
+ /* encrypt counter 0 E(Y0, k) */
+ if (ctx->operation == AES_GCM_OP_GMAC)
+ for (int i = 0; i < ctx->rounds + 1; i += 1)
+ aes_gcm_enc_ctr0_round (ctx, i);
+}
+
+static_always_inline void
+aes_gcm_enc_first_round (aes_gcm_ctx_t *ctx, aes_data_t *r, uword n_blocks)
+{
+ const aes_gcm_expaned_key_t Ke0 = ctx->Ke[0];
+ uword i = 0;
+
+#if N_LANES == 4
+ const u32x16 ctr_inv_4444 = { 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24,
+ 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24 };
+
+ const u32x16 ctr_4444 = {
+ 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0,
+ };
+
+ /* As counter is stored in network byte order for performance reasons we
+ are incrementing least significant byte only except in case where we
+ overlow. As we are processing four 512-blocks in parallel except the
+ last round, overflow can happen only when n == 4 */
+
+ if (n_blocks == 4)
+ for (; i < 2; i++)
+ {
+ r[i] = Ke0.x4 ^ (u8x64) ctx->Y;
+ ctx->Y += ctr_inv_4444;
+ }
+
+ if (n_blocks == 4 && PREDICT_FALSE ((u8) ctx->counter == 242))
+ {
+ u32x16 Yr = (u32x16) aes_gcm_reflect ((u8x64) ctx->Y);
+
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x4 ^ (u8x64) ctx->Y;
+ Yr += ctr_4444;
+ ctx->Y = (u32x16) aes_gcm_reflect ((u8x64) Yr);
+ }
+ }
+ else
+ {
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x4 ^ (u8x64) ctx->Y;
+ ctx->Y += ctr_inv_4444;
+ }
+ }
+ ctx->counter += n_blocks * 4;
+#elif N_LANES == 2
+ const u32x8 ctr_inv_22 = { 0, 0, 0, 2 << 24, 0, 0, 0, 2 << 24 };
+ const u32x8 ctr_22 = { 2, 0, 0, 0, 2, 0, 0, 0 };
+
+ /* As counter is stored in network byte order for performance reasons we
+ are incrementing least significant byte only except in case where we
+ overlow. As we are processing four 512-blocks in parallel except the
+ last round, overflow can happen only when n == 4 */
+
+ if (n_blocks == 4)
+ for (; i < 2; i++)
+ {
+ r[i] = Ke0.x2 ^ (u8x32) ctx->Y;
+ ctx->Y += ctr_inv_22;
+ }
+
+ if (n_blocks == 4 && PREDICT_FALSE ((u8) ctx->counter == 250))
+ {
+ u32x8 Yr = (u32x8) aes_gcm_reflect ((u8x32) ctx->Y);
+
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x2 ^ (u8x32) ctx->Y;
+ Yr += ctr_22;
+ ctx->Y = (u32x8) aes_gcm_reflect ((u8x32) Yr);
+ }
+ }
+ else
+ {
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x2 ^ (u8x32) ctx->Y;
+ ctx->Y += ctr_inv_22;
+ }
+ }
+ ctx->counter += n_blocks * 2;
+#else
+ const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 };
+
+ if (PREDICT_TRUE ((u8) ctx->counter < 0xfe) || n_blocks < 3)
+ {
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x1 ^ (u8x16) ctx->Y;
+ ctx->Y += ctr_inv_1;
+ }
+ ctx->counter += n_blocks;
+ }
+ else
+ {
+ r[i++] = Ke0.x1 ^ (u8x16) ctx->Y;
+ ctx->Y += ctr_inv_1;
+ ctx->counter += 1;
+
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x1 ^ (u8x16) ctx->Y;
+ ctx->counter++;
+ ctx->Y[3] = clib_host_to_net_u32 (ctx->counter);
+ }
+ }
+#endif
+}
+
+static_always_inline void
+aes_gcm_enc_round (aes_data_t *r, const aes_gcm_expaned_key_t *Ke,
+ uword n_blocks)
+{
+ for (int i = 0; i < n_blocks; i++)
+#if N_LANES == 4
+ r[i] = aes_enc_round_x4 (r[i], Ke->x4);
+#elif N_LANES == 2
+ r[i] = aes_enc_round_x2 (r[i], Ke->x2);
+#else
+ r[i] = aes_enc_round (r[i], Ke->x1);
+#endif
+}
+
+static_always_inline void
+aes_gcm_enc_last_round (aes_gcm_ctx_t *ctx, aes_data_t *r, aes_data_t *d,
+ const aes_gcm_expaned_key_t *Ke, uword n_blocks)
+{
+ /* additional ronuds for AES-192 and AES-256 */
+ for (int i = 10; i < ctx->rounds; i++)
+ aes_gcm_enc_round (r, Ke + i, n_blocks);
+
+ for (int i = 0; i < n_blocks; i++)
+#if N_LANES == 4
+ d[i] ^= aes_enc_last_round_x4 (r[i], Ke[ctx->rounds].x4);
+#elif N_LANES == 2
+ d[i] ^= aes_enc_last_round_x2 (r[i], Ke[ctx->rounds].x2);
+#else
+ d[i] ^= aes_enc_last_round (r[i], Ke[ctx->rounds].x1);
+#endif
+}
+
+static_always_inline void
+aes_gcm_calc (aes_gcm_ctx_t *ctx, aes_data_t *d, const u8 *src, u8 *dst, u32 n,
+ u32 n_bytes, int with_ghash)
+{
+ const aes_gcm_expaned_key_t *k = ctx->Ke;
+ const aes_mem_t *sv = (aes_mem_t *) src;
+ aes_mem_t *dv = (aes_mem_t *) dst;
+ uword ghash_blocks, gc = 1;
+ aes_data_t r[4];
+ u32 i, n_lanes;
+
+ if (ctx->operation == AES_GCM_OP_ENCRYPT)
+ {
+ ghash_blocks = 4;
+ n_lanes = N_LANES * 4;
+ }
+ else
+ {
+ ghash_blocks = n;
+ n_lanes = n * N_LANES;
+#if N_LANES != 1
+ if (ctx->last)
+ n_lanes = (n_bytes + 15) / 16;
+#endif
+ }
+
+ n_bytes -= (n - 1) * N;
+
+ /* AES rounds 0 and 1 */
+ aes_gcm_enc_first_round (ctx, r, n);
+ aes_gcm_enc_round (r, k + 1, n);
+
+ /* load data - decrypt round */
+ if (ctx->operation == AES_GCM_OP_DECRYPT)
+ {
+ for (i = 0; i < n - ctx->last; i++)
+ d[i] = sv[i];
+
+ if (ctx->last)
+ d[n - 1] = aes_gcm_load_partial ((u8 *) (sv + n - 1), n_bytes);
+ }
+
+ /* GHASH multiply block 0 */
+ if (with_ghash)
+ aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
+
+ /* AES rounds 2 and 3 */
+ aes_gcm_enc_round (r, k + 2, n);
+ aes_gcm_enc_round (r, k + 3, n);
+
+ /* GHASH multiply block 1 */
+ if (with_ghash && gc++ < ghash_blocks)
+ aes_gcm_ghash_mul_next (ctx, (d[1]));
+
+ /* AES rounds 4 and 5 */
+ aes_gcm_enc_round (r, k + 4, n);
+ aes_gcm_enc_round (r, k + 5, n);
+
+ /* GHASH multiply block 2 */
+ if (with_ghash && gc++ < ghash_blocks)
+ aes_gcm_ghash_mul_next (ctx, (d[2]));
+
+ /* AES rounds 6 and 7 */
+ aes_gcm_enc_round (r, k + 6, n);
+ aes_gcm_enc_round (r, k + 7, n);
+
+ /* GHASH multiply block 3 */
+ if (with_ghash && gc++ < ghash_blocks)
+ aes_gcm_ghash_mul_next (ctx, (d[3]));
+
+ /* load 4 blocks of data - decrypt round */
+ if (ctx->operation == AES_GCM_OP_ENCRYPT)
+ {
+ for (i = 0; i < n - ctx->last; i++)
+ d[i] = sv[i];
+
+ if (ctx->last)
+ d[n - 1] = aes_gcm_load_partial (sv + n - 1, n_bytes);
+ }
+
+ /* AES rounds 8 and 9 */
+ aes_gcm_enc_round (r, k + 8, n);
+ aes_gcm_enc_round (r, k + 9, n);
+
+ /* AES last round(s) */
+ aes_gcm_enc_last_round (ctx, r, d, k, n);
+
+ /* store data */
+ for (i = 0; i < n - ctx->last; i++)
+ dv[i] = d[i];
+
+ if (ctx->last)
+ aes_gcm_store_partial (d[n - 1], dv + n - 1, n_bytes);
+
+ /* GHASH reduce 1st step */
+ aes_gcm_ghash_reduce (ctx);
+
+ /* GHASH reduce 2nd step */
+ if (with_ghash)
+ aes_gcm_ghash_reduce2 (ctx);
+
+ /* GHASH final step */
+ if (with_ghash)
+ aes_gcm_ghash_final (ctx);
+}
+
+static_always_inline void
+aes_gcm_calc_double (aes_gcm_ctx_t *ctx, aes_data_t *d, const u8 *src, u8 *dst,
+ int with_ghash)
+{
+ const aes_gcm_expaned_key_t *k = ctx->Ke;
+ const aes_mem_t *sv = (aes_mem_t *) src;
+ aes_mem_t *dv = (aes_mem_t *) dst;
+ aes_data_t r[4];
+
+ /* AES rounds 0 and 1 */
+ aes_gcm_enc_first_round (ctx, r, 4);
+ aes_gcm_enc_round (r, k + 1, 4);
+
+ /* load 4 blocks of data - decrypt round */
+ if (ctx->operation == AES_GCM_OP_DECRYPT)
+ for (int i = 0; i < 4; i++)
+ d[i] = sv[i];
+
+ /* GHASH multiply block 0 */
+ aes_gcm_ghash_mul_first (ctx, d[0], N_LANES * 8);
+
+ /* AES rounds 2 and 3 */
+ aes_gcm_enc_round (r, k + 2, 4);
+ aes_gcm_enc_round (r, k + 3, 4);
+
+ /* GHASH multiply block 1 */
+ aes_gcm_ghash_mul_next (ctx, (d[1]));
+
+ /* AES rounds 4 and 5 */
+ aes_gcm_enc_round (r, k + 4, 4);
+ aes_gcm_enc_round (r, k + 5, 4);
+
+ /* GHASH multiply block 2 */
+ aes_gcm_ghash_mul_next (ctx, (d[2]));
+
+ /* AES rounds 6 and 7 */
+ aes_gcm_enc_round (r, k + 6, 4);
+ aes_gcm_enc_round (r, k + 7, 4);
+
+ /* GHASH multiply block 3 */
+ aes_gcm_ghash_mul_next (ctx, (d[3]));
+
+ /* AES rounds 8 and 9 */
+ aes_gcm_enc_round (r, k + 8, 4);
+ aes_gcm_enc_round (r, k + 9, 4);
+
+ /* load 4 blocks of data - encrypt round */
+ if (ctx->operation == AES_GCM_OP_ENCRYPT)
+ for (int i = 0; i < 4; i++)
+ d[i] = sv[i];
+
+ /* AES last round(s) */
+ aes_gcm_enc_last_round (ctx, r, d, k, 4);
+
+ /* store 4 blocks of data */
+ for (int i = 0; i < 4; i++)
+ dv[i] = d[i];
+
+ /* load next 4 blocks of data data - decrypt round */
+ if (ctx->operation == AES_GCM_OP_DECRYPT)
+ for (int i = 0; i < 4; i++)
+ d[i] = sv[i + 4];
+
+ /* GHASH multiply block 4 */
+ aes_gcm_ghash_mul_next (ctx, (d[0]));
+
+ /* AES rounds 0 and 1 */
+ aes_gcm_enc_first_round (ctx, r, 4);
+ aes_gcm_enc_round (r, k + 1, 4);
+
+ /* GHASH multiply block 5 */
+ aes_gcm_ghash_mul_next (ctx, (d[1]));
+
+ /* AES rounds 2 and 3 */
+ aes_gcm_enc_round (r, k + 2, 4);
+ aes_gcm_enc_round (r, k + 3, 4);
+
+ /* GHASH multiply block 6 */
+ aes_gcm_ghash_mul_next (ctx, (d[2]));
+
+ /* AES rounds 4 and 5 */
+ aes_gcm_enc_round (r, k + 4, 4);
+ aes_gcm_enc_round (r, k + 5, 4);
+
+ /* GHASH multiply block 7 */
+ aes_gcm_ghash_mul_next (ctx, (d[3]));
+
+ /* AES rounds 6 and 7 */
+ aes_gcm_enc_round (r, k + 6, 4);
+ aes_gcm_enc_round (r, k + 7, 4);
+
+ /* GHASH reduce 1st step */
+ aes_gcm_ghash_reduce (ctx);
+
+ /* AES rounds 8 and 9 */
+ aes_gcm_enc_round (r, k + 8, 4);
+ aes_gcm_enc_round (r, k + 9, 4);
+
+ /* GHASH reduce 2nd step */
+ aes_gcm_ghash_reduce2 (ctx);
+
+ /* load 4 blocks of data - encrypt round */
+ if (ctx->operation == AES_GCM_OP_ENCRYPT)
+ for (int i = 0; i < 4; i++)
+ d[i] = sv[i + 4];
+
+ /* AES last round(s) */
+ aes_gcm_enc_last_round (ctx, r, d, k, 4);
+
+ /* store data */
+ for (int i = 0; i < 4; i++)
+ dv[i + 4] = d[i];
+
+ /* GHASH final step */
+ aes_gcm_ghash_final (ctx);
+}
+
+static_always_inline void
+aes_gcm_mask_bytes (aes_data_t *d, uword n_bytes)
+{
+ const union
+ {
+ u8 b[64];
+ aes_data_t r;
+ } scale = {
+ .b = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 },
+ };
+
+ d[0] &= (aes_gcm_splat (n_bytes) > scale.r);
+}
+
+static_always_inline void
+aes_gcm_calc_last (aes_gcm_ctx_t *ctx, aes_data_t *d, int n_blocks,
+ u32 n_bytes)
+{
+ int n_lanes = (N_LANES == 1 ? n_blocks : (n_bytes + 15) / 16) + 1;
+ n_bytes -= (n_blocks - 1) * N;
+ int i;
+
+ aes_gcm_enc_ctr0_round (ctx, 0);
+ aes_gcm_enc_ctr0_round (ctx, 1);
+
+ if (n_bytes != N)
+ aes_gcm_mask_bytes (d + n_blocks - 1, n_bytes);
+
+ aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
+
+ aes_gcm_enc_ctr0_round (ctx, 2);
+ aes_gcm_enc_ctr0_round (ctx, 3);
+
+ if (n_blocks > 1)
+ aes_gcm_ghash_mul_next (ctx, d[1]);
+
+ aes_gcm_enc_ctr0_round (ctx, 4);
+ aes_gcm_enc_ctr0_round (ctx, 5);
+
+ if (n_blocks > 2)
+ aes_gcm_ghash_mul_next (ctx, d[2]);
+
+ aes_gcm_enc_ctr0_round (ctx, 6);
+ aes_gcm_enc_ctr0_round (ctx, 7);
+
+ if (n_blocks > 3)
+ aes_gcm_ghash_mul_next (ctx, d[3]);
+
+ aes_gcm_enc_ctr0_round (ctx, 8);
+ aes_gcm_enc_ctr0_round (ctx, 9);
+
+ aes_gcm_ghash_mul_bit_len (ctx);
+ aes_gcm_ghash_reduce (ctx);
+
+ for (i = 10; i < ctx->rounds; i++)
+ aes_gcm_enc_ctr0_round (ctx, i);
+
+ aes_gcm_ghash_reduce2 (ctx);
+
+ aes_gcm_ghash_final (ctx);
+
+ aes_gcm_enc_ctr0_round (ctx, i);
+}
+
+static_always_inline void
+aes_gcm_enc (aes_gcm_ctx_t *ctx, const u8 *src, u8 *dst, u32 n_left)
+{
+ aes_data_t d[4];
+
+ if (PREDICT_FALSE (n_left == 0))
+ {
+ int i;
+ for (i = 0; i < ctx->rounds + 1; i++)
+ aes_gcm_enc_ctr0_round (ctx, i);
+ return;
+ }
+
+ if (n_left < 4 * N)
+ {
+ ctx->last = 1;
+ if (n_left > 3 * N)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 0);
+ aes_gcm_calc_last (ctx, d, 4, n_left);
+ }
+ else if (n_left > 2 * N)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 0);
+ aes_gcm_calc_last (ctx, d, 3, n_left);
+ }
+ else if (n_left > N)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 0);
+ aes_gcm_calc_last (ctx, d, 2, n_left);
+ }
+ else
+ {
+ aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 0);
+ aes_gcm_calc_last (ctx, d, 1, n_left);
+ }
+ return;
+ }
+ aes_gcm_calc (ctx, d, src, dst, 4, 4 * N, /* with_ghash */ 0);
+
+ /* next */
+ n_left -= 4 * N;
+ dst += 4 * N;
+ src += 4 * N;
+
+ for (; n_left >= 8 * N; n_left -= 8 * N, src += 8 * N, dst += 8 * N)
+ aes_gcm_calc_double (ctx, d, src, dst, /* with_ghash */ 1);
+
+ if (n_left >= 4 * N)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 4, 4 * N, /* with_ghash */ 1);
+
+ /* next */
+ n_left -= 4 * N;
+ dst += 4 * N;
+ src += 4 * N;
+ }
+
+ if (n_left == 0)
+ {
+ aes_gcm_calc_last (ctx, d, 4, 4 * N);
+ return;
+ }
+
+ ctx->last = 1;
+
+ if (n_left > 3 * N)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 1);
+ aes_gcm_calc_last (ctx, d, 4, n_left);
+ }
+ else if (n_left > 2 * N)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 1);
+ aes_gcm_calc_last (ctx, d, 3, n_left);
+ }
+ else if (n_left > N)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 1);
+ aes_gcm_calc_last (ctx, d, 2, n_left);
+ }
+ else
+ {
+ aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 1);
+ aes_gcm_calc_last (ctx, d, 1, n_left);
+ }
+}
+
+static_always_inline void
+aes_gcm_dec (aes_gcm_ctx_t *ctx, const u8 *src, u8 *dst, uword n_left)
+{
+ aes_data_t d[4] = {};
+ for (; n_left >= 8 * N; n_left -= 8 * N, dst += 8 * N, src += 8 * N)
+ aes_gcm_calc_double (ctx, d, src, dst, /* with_ghash */ 1);
+
+ if (n_left >= 4 * N)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 4, 4 * N, /* with_ghash */ 1);
+
+ /* next */
+ n_left -= 4 * N;
+ dst += N * 4;
+ src += N * 4;
+ }
+
+ if (n_left == 0)
+ goto done;
+
+ ctx->last = 1;
+
+ if (n_left > 3 * N)
+ aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 1);
+ else if (n_left > 2 * N)
+ aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 1);
+ else if (n_left > N)
+ aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 1);
+ else
+ aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 1);
+
+ u8x16 r;
+done:
+ r = (u8x16) ((u64x2){ ctx->data_bytes, ctx->aad_bytes } << 3);
+ ctx->T = ghash_mul (r ^ ctx->T, ctx->Hi[NUM_HI - 1]);
+
+ /* encrypt counter 0 E(Y0, k) */
+ for (int i = 0; i < ctx->rounds + 1; i += 1)
+ aes_gcm_enc_ctr0_round (ctx, i);
+}
+
+static_always_inline int
+aes_gcm (const u8 *src, u8 *dst, const u8 *aad, u8 *ivp, u8 *tag,
+ u32 data_bytes, u32 aad_bytes, u8 tag_len,
+ const aes_gcm_key_data_t *kd, int aes_rounds, aes_gcm_op_t op)
+{
+ u8 *addt = (u8 *) aad;
+ u32x4 Y0;
+
+ aes_gcm_ctx_t _ctx = { .counter = 2,
+ .rounds = aes_rounds,
+ .operation = op,
+ .data_bytes = data_bytes,
+ .aad_bytes = aad_bytes,
+ .Hi = kd->Hi },
+ *ctx = &_ctx;
+
+ /* initalize counter */
+ Y0 = (u32x4) (u64x2){ *(u64u *) ivp, 0 };
+ Y0[2] = *(u32u *) (ivp + 8);
+ Y0[3] = 1 << 24;
+ ctx->EY0 = (u8x16) Y0;
+ ctx->Ke = kd->Ke;
+#if N_LANES == 4
+ ctx->Y = u32x16_splat_u32x4 (Y0) + (u32x16){
+ 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24,
+ };
+#elif N_LANES == 2
+ ctx->Y =
+ u32x8_splat_u32x4 (Y0) + (u32x8){ 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24 };
+#else
+ ctx->Y = Y0 + (u32x4){ 0, 0, 0, 1 << 24 };
+#endif
+
+ /* calculate ghash for AAD */
+ aes_gcm_ghash (ctx, addt, aad_bytes);
+
+ clib_prefetch_load (tag);
+
+ /* ghash and encrypt/edcrypt */
+ if (op == AES_GCM_OP_ENCRYPT)
+ aes_gcm_enc (ctx, src, dst, data_bytes);
+ else if (op == AES_GCM_OP_DECRYPT)
+ aes_gcm_dec (ctx, src, dst, data_bytes);
+
+ /* final tag is */
+ ctx->T = u8x16_reflect (ctx->T) ^ ctx->EY0;
+
+ /* tag_len 16 -> 0 */
+ tag_len &= 0xf;
+
+ if (op == AES_GCM_OP_ENCRYPT || op == AES_GCM_OP_GMAC)
+ {
+ /* store tag */
+ if (tag_len)
+ u8x16_store_partial (ctx->T, tag, tag_len);
+ else
+ ((u8x16u *) tag)[0] = ctx->T;
+ }
+ else
+ {
+ /* check tag */
+ if (tag_len)
+ {
+ u16 mask = pow2_mask (tag_len);
+ u8x16 expected = u8x16_load_partial (tag, tag_len);
+ if ((u8x16_msb_mask (expected == ctx->T) & mask) == mask)
+ return 1;
+ }
+ else
+ {
+ if (u8x16_is_equal (ctx->T, *(u8x16u *) tag))
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static_always_inline void
+clib_aes_gcm_key_expand (aes_gcm_key_data_t *kd, const u8 *key,
+ aes_key_size_t ks)
+{
+ u8x16 H;
+ u8x16 ek[AES_KEY_ROUNDS (AES_KEY_256) + 1];
+ aes_gcm_expaned_key_t *Ke = (aes_gcm_expaned_key_t *) kd->Ke;
+
+ /* expand AES key */
+ aes_key_expand (ek, key, ks);
+ for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
+ Ke[i].lanes[0] = Ke[i].lanes[1] = Ke[i].lanes[2] = Ke[i].lanes[3] = ek[i];
+
+ /* pre-calculate H */
+ H = aes_encrypt_block (u8x16_zero (), ek, ks);
+ H = u8x16_reflect (H);
+ ghash_precompute (H, (u8x16 *) kd->Hi, ARRAY_LEN (kd->Hi));
+}
+
+static_always_inline void
+clib_aes128_gcm_enc (const aes_gcm_key_data_t *kd, const u8 *plaintext,
+ u32 data_bytes, const u8 *aad, u32 aad_bytes,
+ const u8 *iv, u32 tag_bytes, u8 *cyphertext, u8 *tag)
+{
+ aes_gcm (plaintext, cyphertext, aad, (u8 *) iv, tag, data_bytes, aad_bytes,
+ tag_bytes, kd, AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_ENCRYPT);
+}
+
+static_always_inline void
+clib_aes256_gcm_enc (const aes_gcm_key_data_t *kd, const u8 *plaintext,
+ u32 data_bytes, const u8 *aad, u32 aad_bytes,
+ const u8 *iv, u32 tag_bytes, u8 *cyphertext, u8 *tag)
+{
+ aes_gcm (plaintext, cyphertext, aad, (u8 *) iv, tag, data_bytes, aad_bytes,
+ tag_bytes, kd, AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_ENCRYPT);
+}
+
+static_always_inline int
+clib_aes128_gcm_dec (const aes_gcm_key_data_t *kd, const u8 *cyphertext,
+ u32 data_bytes, const u8 *aad, u32 aad_bytes,
+ const u8 *iv, const u8 *tag, u32 tag_bytes, u8 *plaintext)
+{
+ return aes_gcm (cyphertext, plaintext, aad, (u8 *) iv, (u8 *) tag,
+ data_bytes, aad_bytes, tag_bytes, kd,
+ AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_DECRYPT);
+}
+
+static_always_inline int
+clib_aes256_gcm_dec (const aes_gcm_key_data_t *kd, const u8 *cyphertext,
+ u32 data_bytes, const u8 *aad, u32 aad_bytes,
+ const u8 *iv, const u8 *tag, u32 tag_bytes, u8 *plaintext)
+{
+ return aes_gcm (cyphertext, plaintext, aad, (u8 *) iv, (u8 *) tag,
+ data_bytes, aad_bytes, tag_bytes, kd,
+ AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_DECRYPT);
+}
+
+static_always_inline void
+clib_aes128_gmac (const aes_gcm_key_data_t *kd, const u8 *data, u32 data_bytes,
+ const u8 *iv, u32 tag_bytes, u8 *tag)
+{
+ aes_gcm (0, 0, data, (u8 *) iv, tag, 0, data_bytes, tag_bytes, kd,
+ AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_GMAC);
+}
+
+static_always_inline void
+clib_aes256_gmac (const aes_gcm_key_data_t *kd, const u8 *data, u32 data_bytes,
+ const u8 *iv, u32 tag_bytes, u8 *tag)
+{
+ aes_gcm (0, 0, data, (u8 *) iv, tag, 0, data_bytes, tag_bytes, kd,
+ AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_GMAC);
+}
+
+#endif /* __crypto_aes_gcm_h__ */
diff --git a/src/vppinfra/crypto/ghash.h b/src/vppinfra/crypto/ghash.h
new file mode 100644
index 0000000..bae8bad
--- /dev/null
+++ b/src/vppinfra/crypto/ghash.h
@@ -0,0 +1,515 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+/*
+ *------------------------------------------------------------------
+ * Copyright(c) 2018, Intel Corporation All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES * LOSS OF USE,
+ * DATA, OR PROFITS * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *------------------------------------------------------------------
+ */
+
+/*
+ * Based on work by: Shay Gueron, Michael E. Kounavis, Erdinc Ozturk,
+ * Vinodh Gopal, James Guilford, Tomasz Kantecki
+ *
+ * References:
+ * [1] Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation on
+ * Intel Architecture Processors. August, 2010
+ * [2] Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode on
+ * Intel Architecture Processors. October, 2012.
+ * [3] intel-ipsec-mb library, https://github.com/01org/intel-ipsec-mb.git
+ *
+ * Definitions:
+ * GF Galois Extension Field GF(2^128) - finite field where elements are
+ * represented as polynomials with coefficients in GF(2) with the
+ * highest degree of 127. Polynomials are represented as 128-bit binary
+ * numbers where each bit represents one coefficient.
+ * e.g. polynomial x^5 + x^3 + x + 1 is represented in binary 101011.
+ * H hash key (128 bit)
+ * POLY irreducible polynomial x^127 + x^7 + x^2 + x + 1
+ * RPOLY irreducible polynomial x^128 + x^127 + x^126 + x^121 + 1
+ * + addition in GF, which equals to XOR operation
+ * * multiplication in GF
+ *
+ * GF multiplication consists of 2 steps:
+ * - carry-less multiplication of two 128-bit operands into 256-bit result
+ * - reduction of 256-bit result into 128-bit with modulo POLY
+ *
+ * GHash is calculated on 128-bit blocks of data according to the following
+ * formula:
+ * GH = (GH + data) * hash_key
+ *
+ * To avoid bit-reflection of data, this code uses GF multipication
+ * with reversed polynomial:
+ * a * b * x^-127 mod RPOLY
+ *
+ * To improve computation speed table Hi is precomputed with powers of H',
+ * where H' is calculated as H<<1 mod RPOLY.
+ * This allows us to improve performance by deferring reduction. For example
+ * to caclulate ghash of 4 128-bit blocks of data (b0, b1, b2, b3), we can do:
+ *
+ * u8x16 Hi[4];
+ * ghash_precompute (H, Hi, 4);
+ *
+ * ghash_data_t _gd, *gd = &_gd;
+ * ghash_mul_first (gd, GH ^ b0, Hi[3]);
+ * ghash_mul_next (gd, b1, Hi[2]);
+ * ghash_mul_next (gd, b2, Hi[1]);
+ * ghash_mul_next (gd, b3, Hi[0]);
+ * ghash_reduce (gd);
+ * ghash_reduce2 (gd);
+ * GH = ghash_final (gd);
+ *
+ * Reduction step is split into 3 functions so it can be better interleaved
+ * with other code, (i.e. with AES computation).
+ */
+
+#ifndef __ghash_h__
+#define __ghash_h__
+
+static_always_inline u8x16
+gmul_lo_lo (u8x16 a, u8x16 b)
+{
+#if defined (__PCLMUL__)
+ return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x00);
+#elif defined (__ARM_FEATURE_CRYPTO)
+ return (u8x16) vmull_p64 ((poly64_t) vget_low_p64 ((poly64x2_t) a),
+ (poly64_t) vget_low_p64 ((poly64x2_t) b));
+#endif
+}
+
+static_always_inline u8x16
+gmul_hi_lo (u8x16 a, u8x16 b)
+{
+#if defined (__PCLMUL__)
+ return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x01);
+#elif defined (__ARM_FEATURE_CRYPTO)
+ return (u8x16) vmull_p64 ((poly64_t) vget_high_p64 ((poly64x2_t) a),
+ (poly64_t) vget_low_p64 ((poly64x2_t) b));
+#endif
+}
+
+static_always_inline u8x16
+gmul_lo_hi (u8x16 a, u8x16 b)
+{
+#if defined (__PCLMUL__)
+ return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x10);
+#elif defined (__ARM_FEATURE_CRYPTO)
+ return (u8x16) vmull_p64 ((poly64_t) vget_low_p64 ((poly64x2_t) a),
+ (poly64_t) vget_high_p64 ((poly64x2_t) b));
+#endif
+}
+
+static_always_inline u8x16
+gmul_hi_hi (u8x16 a, u8x16 b)
+{
+#if defined (__PCLMUL__)
+ return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x11);
+#elif defined (__ARM_FEATURE_CRYPTO)
+ return (u8x16) vmull_high_p64 ((poly64x2_t) a, (poly64x2_t) b);
+#endif
+}
+
+typedef struct
+{
+ u8x16 mid, hi, lo, tmp_lo, tmp_hi;
+ u8x32 hi2, lo2, mid2, tmp_lo2, tmp_hi2;
+ u8x64 hi4, lo4, mid4, tmp_lo4, tmp_hi4;
+ int pending;
+} ghash_data_t;
+
+static const u8x16 ghash_poly = {
+ 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2
+};
+
+static const u8x16 ghash_poly2 = {
+ 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2
+};
+
+static_always_inline void
+ghash_mul_first (ghash_data_t * gd, u8x16 a, u8x16 b)
+{
+ /* a1 * b1 */
+ gd->hi = gmul_hi_hi (a, b);
+ /* a0 * b0 */
+ gd->lo = gmul_lo_lo (a, b);
+ /* a0 * b1 ^ a1 * b0 */
+ gd->mid = gmul_hi_lo (a, b) ^ gmul_lo_hi (a, b);
+
+ /* set gd->pending to 0 so next invocation of ghash_mul_next(...) knows that
+ there is no pending data in tmp_lo and tmp_hi */
+ gd->pending = 0;
+}
+
+static_always_inline void
+ghash_mul_next (ghash_data_t * gd, u8x16 a, u8x16 b)
+{
+ /* a1 * b1 */
+ u8x16 hi = gmul_hi_hi (a, b);
+ /* a0 * b0 */
+ u8x16 lo = gmul_lo_lo (a, b);
+
+ /* this branch will be optimized out by the compiler, and it allows us to
+ reduce number of XOR operations by using ternary logic */
+ if (gd->pending)
+ {
+ /* there is peding data from previous invocation so we can XOR */
+ gd->hi = u8x16_xor3 (gd->hi, gd->tmp_hi, hi);
+ gd->lo = u8x16_xor3 (gd->lo, gd->tmp_lo, lo);
+ gd->pending = 0;
+ }
+ else
+ {
+ /* there is no peding data from previous invocation so we postpone XOR */
+ gd->tmp_hi = hi;
+ gd->tmp_lo = lo;
+ gd->pending = 1;
+ }
+
+ /* gd->mid ^= a0 * b1 ^ a1 * b0 */
+ gd->mid = u8x16_xor3 (gd->mid, gmul_hi_lo (a, b), gmul_lo_hi (a, b));
+}
+
+static_always_inline void
+ghash_reduce (ghash_data_t * gd)
+{
+ u8x16 r;
+
+ /* Final combination:
+ gd->lo ^= gd->mid << 64
+ gd->hi ^= gd->mid >> 64 */
+ u8x16 midl = u8x16_word_shift_left (gd->mid, 8);
+ u8x16 midr = u8x16_word_shift_right (gd->mid, 8);
+
+ if (gd->pending)
+ {
+ gd->lo = u8x16_xor3 (gd->lo, gd->tmp_lo, midl);
+ gd->hi = u8x16_xor3 (gd->hi, gd->tmp_hi, midr);
+ }
+ else
+ {
+ gd->lo ^= midl;
+ gd->hi ^= midr;
+ }
+ r = gmul_hi_lo (ghash_poly2, gd->lo);
+ gd->lo ^= u8x16_word_shift_left (r, 8);
+}
+
+static_always_inline void
+ghash_reduce2 (ghash_data_t * gd)
+{
+ gd->tmp_lo = gmul_lo_lo (ghash_poly2, gd->lo);
+ gd->tmp_hi = gmul_lo_hi (ghash_poly2, gd->lo);
+}
+
+static_always_inline u8x16
+ghash_final (ghash_data_t * gd)
+{
+ return u8x16_xor3 (gd->hi, u8x16_word_shift_right (gd->tmp_lo, 4),
+ u8x16_word_shift_left (gd->tmp_hi, 4));
+}
+
+static_always_inline u8x16
+ghash_mul (u8x16 a, u8x16 b)
+{
+ ghash_data_t _gd, *gd = &_gd;
+ ghash_mul_first (gd, a, b);
+ ghash_reduce (gd);
+ ghash_reduce2 (gd);
+ return ghash_final (gd);
+}
+
+#if defined(__VPCLMULQDQ__) && defined(__AVX512F__)
+
+static const u8x64 ghash4_poly2 = {
+ 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
+ 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
+ 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
+ 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
+};
+
+static_always_inline u8x64
+gmul4_lo_lo (u8x64 a, u8x64 b)
+{
+ return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x00);
+}
+
+static_always_inline u8x64
+gmul4_hi_lo (u8x64 a, u8x64 b)
+{
+ return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x01);
+}
+
+static_always_inline u8x64
+gmul4_lo_hi (u8x64 a, u8x64 b)
+{
+ return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x10);
+}
+
+static_always_inline u8x64
+gmul4_hi_hi (u8x64 a, u8x64 b)
+{
+ return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x11);
+}
+
+static_always_inline void
+ghash4_mul_first (ghash_data_t *gd, u8x64 a, u8x64 b)
+{
+ gd->hi4 = gmul4_hi_hi (a, b);
+ gd->lo4 = gmul4_lo_lo (a, b);
+ gd->mid4 = gmul4_hi_lo (a, b) ^ gmul4_lo_hi (a, b);
+ gd->pending = 0;
+}
+
+static_always_inline void
+ghash4_mul_next (ghash_data_t *gd, u8x64 a, u8x64 b)
+{
+ u8x64 hi = gmul4_hi_hi (a, b);
+ u8x64 lo = gmul4_lo_lo (a, b);
+
+ if (gd->pending)
+ {
+ /* there is peding data from previous invocation so we can XOR */
+ gd->hi4 = u8x64_xor3 (gd->hi4, gd->tmp_hi4, hi);
+ gd->lo4 = u8x64_xor3 (gd->lo4, gd->tmp_lo4, lo);
+ gd->pending = 0;
+ }
+ else
+ {
+ /* there is no peding data from previous invocation so we postpone XOR */
+ gd->tmp_hi4 = hi;
+ gd->tmp_lo4 = lo;
+ gd->pending = 1;
+ }
+ gd->mid4 = u8x64_xor3 (gd->mid4, gmul4_hi_lo (a, b), gmul4_lo_hi (a, b));
+}
+
+static_always_inline void
+ghash4_reduce (ghash_data_t *gd)
+{
+ u8x64 r;
+
+ /* Final combination:
+ gd->lo4 ^= gd->mid4 << 64
+ gd->hi4 ^= gd->mid4 >> 64 */
+
+ u8x64 midl = u8x64_word_shift_left (gd->mid4, 8);
+ u8x64 midr = u8x64_word_shift_right (gd->mid4, 8);
+
+ if (gd->pending)
+ {
+ gd->lo4 = u8x64_xor3 (gd->lo4, gd->tmp_lo4, midl);
+ gd->hi4 = u8x64_xor3 (gd->hi4, gd->tmp_hi4, midr);
+ }
+ else
+ {
+ gd->lo4 ^= midl;
+ gd->hi4 ^= midr;
+ }
+
+ r = gmul4_hi_lo (ghash4_poly2, gd->lo4);
+ gd->lo4 ^= u8x64_word_shift_left (r, 8);
+}
+
+static_always_inline void
+ghash4_reduce2 (ghash_data_t *gd)
+{
+ gd->tmp_lo4 = gmul4_lo_lo (ghash4_poly2, gd->lo4);
+ gd->tmp_hi4 = gmul4_lo_hi (ghash4_poly2, gd->lo4);
+}
+
+static_always_inline u8x16
+ghash4_final (ghash_data_t *gd)
+{
+ u8x64 r;
+ u8x32 t;
+
+ r = u8x64_xor3 (gd->hi4, u8x64_word_shift_right (gd->tmp_lo4, 4),
+ u8x64_word_shift_left (gd->tmp_hi4, 4));
+
+ /* horizontal XOR of 4 128-bit lanes */
+ t = u8x64_extract_lo (r) ^ u8x64_extract_hi (r);
+ return u8x32_extract_hi (t) ^ u8x32_extract_lo (t);
+}
+#endif
+
+#if defined(__VPCLMULQDQ__)
+
+static const u8x32 ghash2_poly2 = {
+ 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
+};
+
+static_always_inline u8x32
+gmul2_lo_lo (u8x32 a, u8x32 b)
+{
+ return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x00);
+}
+
+static_always_inline u8x32
+gmul2_hi_lo (u8x32 a, u8x32 b)
+{
+ return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x01);
+}
+
+static_always_inline u8x32
+gmul2_lo_hi (u8x32 a, u8x32 b)
+{
+ return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x10);
+}
+
+static_always_inline u8x32
+gmul2_hi_hi (u8x32 a, u8x32 b)
+{
+ return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x11);
+}
+
+static_always_inline void
+ghash2_mul_first (ghash_data_t *gd, u8x32 a, u8x32 b)
+{
+ gd->hi2 = gmul2_hi_hi (a, b);
+ gd->lo2 = gmul2_lo_lo (a, b);
+ gd->mid2 = gmul2_hi_lo (a, b) ^ gmul2_lo_hi (a, b);
+ gd->pending = 0;
+}
+
+static_always_inline void
+ghash2_mul_next (ghash_data_t *gd, u8x32 a, u8x32 b)
+{
+ u8x32 hi = gmul2_hi_hi (a, b);
+ u8x32 lo = gmul2_lo_lo (a, b);
+
+ if (gd->pending)
+ {
+ /* there is peding data from previous invocation so we can XOR */
+ gd->hi2 = u8x32_xor3 (gd->hi2, gd->tmp_hi2, hi);
+ gd->lo2 = u8x32_xor3 (gd->lo2, gd->tmp_lo2, lo);
+ gd->pending = 0;
+ }
+ else
+ {
+ /* there is no peding data from previous invocation so we postpone XOR */
+ gd->tmp_hi2 = hi;
+ gd->tmp_lo2 = lo;
+ gd->pending = 1;
+ }
+ gd->mid2 = u8x32_xor3 (gd->mid2, gmul2_hi_lo (a, b), gmul2_lo_hi (a, b));
+}
+
+static_always_inline void
+ghash2_reduce (ghash_data_t *gd)
+{
+ u8x32 r;
+
+ /* Final combination:
+ gd->lo2 ^= gd->mid2 << 64
+ gd->hi2 ^= gd->mid2 >> 64 */
+
+ u8x32 midl = u8x32_word_shift_left (gd->mid2, 8);
+ u8x32 midr = u8x32_word_shift_right (gd->mid2, 8);
+
+ if (gd->pending)
+ {
+ gd->lo2 = u8x32_xor3 (gd->lo2, gd->tmp_lo2, midl);
+ gd->hi2 = u8x32_xor3 (gd->hi2, gd->tmp_hi2, midr);
+ }
+ else
+ {
+ gd->lo2 ^= midl;
+ gd->hi2 ^= midr;
+ }
+
+ r = gmul2_hi_lo (ghash2_poly2, gd->lo2);
+ gd->lo2 ^= u8x32_word_shift_left (r, 8);
+}
+
+static_always_inline void
+ghash2_reduce2 (ghash_data_t *gd)
+{
+ gd->tmp_lo2 = gmul2_lo_lo (ghash2_poly2, gd->lo2);
+ gd->tmp_hi2 = gmul2_lo_hi (ghash2_poly2, gd->lo2);
+}
+
+static_always_inline u8x16
+ghash2_final (ghash_data_t *gd)
+{
+ u8x32 r;
+
+ r = u8x32_xor3 (gd->hi2, u8x32_word_shift_right (gd->tmp_lo2, 4),
+ u8x32_word_shift_left (gd->tmp_hi2, 4));
+
+ /* horizontal XOR of 2 128-bit lanes */
+ return u8x32_extract_hi (r) ^ u8x32_extract_lo (r);
+}
+#endif
+
+static_always_inline void
+ghash_precompute (u8x16 H, u8x16 * Hi, int n)
+{
+ u8x16 r8;
+ u32x4 r32;
+ /* calcullate H<<1 mod poly from the hash key */
+ r8 = (u8x16) ((u64x2) H >> 63);
+ H = (u8x16) ((u64x2) H << 1);
+ H |= u8x16_word_shift_left (r8, 8);
+ r32 = (u32x4) u8x16_word_shift_right (r8, 8);
+#ifdef __SSE2__
+ r32 = u32x4_shuffle (r32, 0, 1, 2, 0);
+#else
+ r32[3] = r32[0];
+#endif
+ r32 = r32 == (u32x4) {1, 0, 0, 1};
+ Hi[n - 1] = H = H ^ ((u8x16) r32 & ghash_poly);
+
+ /* calculate H^(i + 1) */
+ for (int i = n - 2; i >= 0; i--)
+ Hi[i] = ghash_mul (H, Hi[i + 1]);
+}
+
+#endif /* __ghash_h__ */
+
diff --git a/src/vppinfra/perfmon/bundle_default.c b/src/vppinfra/perfmon/bundle_default.c
index b5282c5..c2118ae 100644
--- a/src/vppinfra/perfmon/bundle_default.c
+++ b/src/vppinfra/perfmon/bundle_default.c
@@ -24,25 +24,21 @@
case 1:
return format (s, "%5.2f", (f64) d[2] / d[0]);
case 2:
- if (c->n_ops > 1)
- return format (s, "%8.2f", (f64) d[0] / c->n_ops);
- else
- return format (s, "%8u", d[0]);
+ return format (s, "%8u", d[0]);
case 3:
- if (c->n_ops > 1)
- return format (s, "%8.2f", (f64) d[2] / c->n_ops);
- else
- return format (s, "%8u", d[2]);
+ return format (s, "%8.2f", (f64) d[0] / c->n_ops);
case 4:
- if (c->n_ops > 1)
- return format (s, "%9.2f", (f64) d[3] / c->n_ops);
- else
- return format (s, "%9u", d[3]);
+ return format (s, "%8u", d[2]);
case 5:
- if (c->n_ops > 1)
- return format (s, "%10.2f", (f64) d[4] / c->n_ops);
- else
- return format (s, "%10u", d[4]);
+ return format (s, "%8.2f", (f64) d[2] / c->n_ops);
+ case 6:
+ return format (s, "%9u", d[3]);
+ case 7:
+ return format (s, "%9.2f", (f64) d[3] / c->n_ops);
+ case 8:
+ return format (s, "%10u", d[4]);
+ case 9:
+ return format (s, "%10.2f", (f64) d[4] / c->n_ops);
default:
return s;
}
@@ -59,6 +55,7 @@
.config[4] = PERF_COUNT_HW_BRANCH_MISSES,
.n_events = 5,
.format_fn = format_perfmon_bundle_default,
- .column_headers = CLIB_STRING_ARRAY ("Freq", "IPC", "Clks/Op", "Inst/Op",
- "Brnch/Op", "BrMiss/Op"),
+ .column_headers = CLIB_STRING_ARRAY ("Freq", "IPC", "Clks", "Clks/Op",
+ "Inst", "Inst/Op", "Brnch", "Brnch/Op",
+ "BrMiss", "BrMiss/Op"),
};
diff --git a/src/vppinfra/sha2.h b/src/vppinfra/sha2.h
deleted file mode 100644
index 61fb7f5..0000000
--- a/src/vppinfra/sha2.h
+++ /dev/null
@@ -1,637 +0,0 @@
-/*
- * Copyright (c) 2019 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef included_sha2_h
-#define included_sha2_h
-
-#include <vppinfra/clib.h>
-
-#define SHA224_DIGEST_SIZE 28
-#define SHA224_BLOCK_SIZE 64
-
-#define SHA256_DIGEST_SIZE 32
-#define SHA256_BLOCK_SIZE 64
-#define SHA256_ROTR(x, y) ((x >> y) | (x << (32 - y)))
-#define SHA256_CH(a, b, c) ((a & b) ^ (~a & c))
-#define SHA256_MAJ(a, b, c) ((a & b) ^ (a & c) ^ (b & c))
-#define SHA256_CSIGMA0(x) (SHA256_ROTR(x, 2) ^ \
- SHA256_ROTR(x, 13) ^ \
- SHA256_ROTR(x, 22));
-#define SHA256_CSIGMA1(x) (SHA256_ROTR(x, 6) ^ \
- SHA256_ROTR(x, 11) ^ \
- SHA256_ROTR(x, 25));
-#define SHA256_SSIGMA0(x) (SHA256_ROTR (x, 7) ^ \
- SHA256_ROTR (x, 18) ^ \
- (x >> 3))
-#define SHA256_SSIGMA1(x) (SHA256_ROTR (x, 17) ^ \
- SHA256_ROTR (x, 19) ^ \
- (x >> 10))
-
-#define SHA256_MSG_SCHED(w, j) \
-{ \
- w[j] = w[j - 7] + w[j - 16]; \
- w[j] += SHA256_SSIGMA0 (w[j - 15]); \
- w[j] += SHA256_SSIGMA1 (w[j - 2]); \
-}
-
-#define SHA256_TRANSFORM(s, w, i, k) \
-{ \
- __typeof__(s[0]) t1, t2; \
- t1 = k + w[i] + s[7]; \
- t1 += SHA256_CSIGMA1 (s[4]); \
- t1 += SHA256_CH (s[4], s[5], s[6]); \
- t2 = SHA256_CSIGMA0 (s[0]); \
- t2 += SHA256_MAJ (s[0], s[1], s[2]); \
- s[7] = s[6]; \
- s[6] = s[5]; \
- s[5] = s[4]; \
- s[4] = s[3] + t1; \
- s[3] = s[2]; \
- s[2] = s[1]; \
- s[1] = s[0]; \
- s[0] = t1 + t2; \
-}
-
-#define SHA512_224_DIGEST_SIZE 28
-#define SHA512_224_BLOCK_SIZE 128
-
-#define SHA512_256_DIGEST_SIZE 32
-#define SHA512_256_BLOCK_SIZE 128
-
-#define SHA384_DIGEST_SIZE 48
-#define SHA384_BLOCK_SIZE 128
-
-#define SHA512_DIGEST_SIZE 64
-#define SHA512_BLOCK_SIZE 128
-#define SHA512_ROTR(x, y) ((x >> y) | (x << (64 - y)))
-#define SHA512_CH(a, b, c) ((a & b) ^ (~a & c))
-#define SHA512_MAJ(a, b, c) ((a & b) ^ (a & c) ^ (b & c))
-#define SHA512_CSIGMA0(x) (SHA512_ROTR (x, 28) ^ \
- SHA512_ROTR (x, 34) ^ \
- SHA512_ROTR (x, 39))
-#define SHA512_CSIGMA1(x) (SHA512_ROTR (x, 14) ^ \
- SHA512_ROTR (x, 18) ^ \
- SHA512_ROTR (x, 41))
-#define SHA512_SSIGMA0(x) (SHA512_ROTR (x, 1) ^ \
- SHA512_ROTR (x, 8) ^ \
- (x >> 7))
-#define SHA512_SSIGMA1(x) (SHA512_ROTR (x, 19) ^ \
- SHA512_ROTR (x, 61) ^ \
- (x >> 6))
-
-#define SHA512_MSG_SCHED(w, j) \
-{ \
- w[j] = w[j - 7] + w[j - 16]; \
- w[j] += SHA512_SSIGMA0 (w[j - 15]); \
- w[j] += SHA512_SSIGMA1 (w[j - 2]); \
-}
-
-#define SHA512_TRANSFORM(s, w, i, k) \
-{ \
- __typeof__(s[0]) t1, t2; \
- t1 = k + w[i] + s[7]; \
- t1 += SHA512_CSIGMA1 (s[4]); \
- t1 += SHA512_CH (s[4], s[5], s[6]); \
- t2 = SHA512_CSIGMA0 (s[0]); \
- t2 += SHA512_MAJ (s[0], s[1], s[2]); \
- s[7] = s[6]; \
- s[6] = s[5]; \
- s[5] = s[4]; \
- s[4] = s[3] + t1; \
- s[3] = s[2]; \
- s[2] = s[1]; \
- s[1] = s[0]; \
- s[0] = t1 + t2; \
-}
-
-static const u32 sha224_h[8] = {
- 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939,
- 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4
-};
-
-static const u32 sha256_h[8] = {
- 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
- 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
-};
-
-static const u32 sha256_k[64] = {
- 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
- 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
- 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
- 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
- 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
- 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
- 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
- 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
- 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
- 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
- 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
- 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
- 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
- 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
- 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
- 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-static const u64 sha384_h[8] = {
- 0xcbbb9d5dc1059ed8, 0x629a292a367cd507,
- 0x9159015a3070dd17, 0x152fecd8f70e5939,
- 0x67332667ffc00b31, 0x8eb44a8768581511,
- 0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4
-};
-
-static const u64 sha512_h[8] = {
- 0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
- 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
- 0x510e527fade682d1, 0x9b05688c2b3e6c1f,
- 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179
-};
-
-static const u64 sha512_224_h[8] = {
- 0x8c3d37c819544da2, 0x73e1996689dcd4d6,
- 0x1dfab7ae32ff9c82, 0x679dd514582f9fcf,
- 0x0f6d2b697bd44da8, 0x77e36f7304c48942,
- 0x3f9d85a86a1d36c8, 0x1112e6ad91d692a1
-};
-
-static const u64 sha512_256_h[8] = {
- 0x22312194fc2bf72c, 0x9f555fa3c84c64c2,
- 0x2393b86b6f53b151, 0x963877195940eabd,
- 0x96283ee2a88effe3, 0xbe5e1e2553863992,
- 0x2b0199fc2c85b8aa, 0x0eb72ddc81c52ca2
-};
-
-static const u64 sha512_k[80] = {
- 0x428a2f98d728ae22, 0x7137449123ef65cd,
- 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
- 0x3956c25bf348b538, 0x59f111f1b605d019,
- 0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
- 0xd807aa98a3030242, 0x12835b0145706fbe,
- 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
- 0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
- 0x9bdc06a725c71235, 0xc19bf174cf692694,
- 0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
- 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
- 0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
- 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
- 0x983e5152ee66dfab, 0xa831c66d2db43210,
- 0xb00327c898fb213f, 0xbf597fc7beef0ee4,
- 0xc6e00bf33da88fc2, 0xd5a79147930aa725,
- 0x06ca6351e003826f, 0x142929670a0e6e70,
- 0x27b70a8546d22ffc, 0x2e1b21385c26c926,
- 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
- 0x650a73548baf63de, 0x766a0abb3c77b2a8,
- 0x81c2c92e47edaee6, 0x92722c851482353b,
- 0xa2bfe8a14cf10364, 0xa81a664bbc423001,
- 0xc24b8b70d0f89791, 0xc76c51a30654be30,
- 0xd192e819d6ef5218, 0xd69906245565a910,
- 0xf40e35855771202a, 0x106aa07032bbd1b8,
- 0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
- 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
- 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
- 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
- 0x748f82ee5defb2fc, 0x78a5636f43172f60,
- 0x84c87814a1f0ab72, 0x8cc702081a6439ec,
- 0x90befffa23631e28, 0xa4506cebde82bde9,
- 0xbef9a3f7b2c67915, 0xc67178f2e372532b,
- 0xca273eceea26619c, 0xd186b8c721c0c207,
- 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
- 0x06f067aa72176fba, 0x0a637dc5a2c898a6,
- 0x113f9804bef90dae, 0x1b710b35131c471b,
- 0x28db77f523047d84, 0x32caab7b40c72493,
- 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
- 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
- 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
-};
-
-typedef enum
-{
- CLIB_SHA2_224,
- CLIB_SHA2_256,
- CLIB_SHA2_384,
- CLIB_SHA2_512,
- CLIB_SHA2_512_224,
- CLIB_SHA2_512_256,
-} clib_sha2_type_t;
-
-#define SHA2_MAX_BLOCK_SIZE SHA512_BLOCK_SIZE
-#define SHA2_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE
-
-typedef struct
-{
- u64 total_bytes;
- u16 n_pending;
- u8 block_size;
- u8 digest_size;
- union
- {
- u32 h32[8];
- u64 h64[8];
-#if defined(__SHA__) && defined (__x86_64__)
- u32x4 h32x4[2];
-#endif
- };
- union
- {
- u8 as_u8[SHA2_MAX_BLOCK_SIZE];
- u64 as_u64[SHA2_MAX_BLOCK_SIZE / sizeof (u64)];
- uword as_uword[SHA2_MAX_BLOCK_SIZE / sizeof (uword)];
- }
- pending;
-}
-clib_sha2_ctx_t;
-
-static_always_inline void
-clib_sha2_init (clib_sha2_ctx_t * ctx, clib_sha2_type_t type)
-{
- const u32 *h32 = 0;
- const u64 *h64 = 0;
-
- ctx->total_bytes = 0;
- ctx->n_pending = 0;
-
- switch (type)
- {
- case CLIB_SHA2_224:
- h32 = sha224_h;
- ctx->block_size = SHA224_BLOCK_SIZE;
- ctx->digest_size = SHA224_DIGEST_SIZE;
- break;
- case CLIB_SHA2_256:
- h32 = sha256_h;
- ctx->block_size = SHA256_BLOCK_SIZE;
- ctx->digest_size = SHA256_DIGEST_SIZE;
- break;
- case CLIB_SHA2_384:
- h64 = sha384_h;
- ctx->block_size = SHA384_BLOCK_SIZE;
- ctx->digest_size = SHA384_DIGEST_SIZE;
- break;
- case CLIB_SHA2_512:
- h64 = sha512_h;
- ctx->block_size = SHA512_BLOCK_SIZE;
- ctx->digest_size = SHA512_DIGEST_SIZE;
- break;
- case CLIB_SHA2_512_224:
- h64 = sha512_224_h;
- ctx->block_size = SHA512_224_BLOCK_SIZE;
- ctx->digest_size = SHA512_224_DIGEST_SIZE;
- break;
- case CLIB_SHA2_512_256:
- h64 = sha512_256_h;
- ctx->block_size = SHA512_256_BLOCK_SIZE;
- ctx->digest_size = SHA512_256_DIGEST_SIZE;
- break;
- }
- if (h32)
- for (int i = 0; i < 8; i++)
- ctx->h32[i] = h32[i];
-
- if (h64)
- for (int i = 0; i < 8; i++)
- ctx->h64[i] = h64[i];
-}
-
-#if defined(__SHA__) && defined (__x86_64__)
-static inline void
-shani_sha256_cycle_w (u32x4 cw[], u8 a, u8 b, u8 c, u8 d)
-{
- cw[a] = (u32x4) _mm_sha256msg1_epu32 ((__m128i) cw[a], (__m128i) cw[b]);
- cw[a] += (u32x4) _mm_alignr_epi8 ((__m128i) cw[d], (__m128i) cw[c], 4);
- cw[a] = (u32x4) _mm_sha256msg2_epu32 ((__m128i) cw[a], (__m128i) cw[d]);
-}
-
-static inline void
-shani_sha256_4_rounds (u32x4 cw, u8 n, u32x4 s[])
-{
- u32x4 r = *(u32x4 *) (sha256_k + 4 * n) + cw;
- s[0] = (u32x4) _mm_sha256rnds2_epu32 ((__m128i) s[0], (__m128i) s[1],
- (__m128i) r);
- r = (u32x4) u64x2_interleave_hi ((u64x2) r, (u64x2) r);
- s[1] = (u32x4) _mm_sha256rnds2_epu32 ((__m128i) s[1], (__m128i) s[0],
- (__m128i) r);
-}
-
-static inline void
-shani_sha256_shuffle (u32x4 d[2], u32x4 s[2])
-{
- /* {0, 1, 2, 3}, {4, 5, 6, 7} -> {7, 6, 3, 2}, {5, 4, 1, 0} */
- d[0] = (u32x4) _mm_shuffle_ps ((__m128) s[1], (__m128) s[0], 0xbb);
- d[1] = (u32x4) _mm_shuffle_ps ((__m128) s[1], (__m128) s[0], 0x11);
-}
-#endif
-
-static inline void
-clib_sha256_block (clib_sha2_ctx_t *ctx, const u8 *msg, uword n_blocks)
-{
-#if defined(__SHA__) && defined (__x86_64__)
- u32x4 h[2], s[2], w[4];
-
- shani_sha256_shuffle (h, ctx->h32x4);
-
- while (n_blocks)
- {
- w[0] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 0));
- w[1] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 16));
- w[2] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 32));
- w[3] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 48));
-
- s[0] = h[0];
- s[1] = h[1];
-
- shani_sha256_4_rounds (w[0], 0, s);
- shani_sha256_4_rounds (w[1], 1, s);
- shani_sha256_4_rounds (w[2], 2, s);
- shani_sha256_4_rounds (w[3], 3, s);
-
- shani_sha256_cycle_w (w, 0, 1, 2, 3);
- shani_sha256_4_rounds (w[0], 4, s);
- shani_sha256_cycle_w (w, 1, 2, 3, 0);
- shani_sha256_4_rounds (w[1], 5, s);
- shani_sha256_cycle_w (w, 2, 3, 0, 1);
- shani_sha256_4_rounds (w[2], 6, s);
- shani_sha256_cycle_w (w, 3, 0, 1, 2);
- shani_sha256_4_rounds (w[3], 7, s);
-
- shani_sha256_cycle_w (w, 0, 1, 2, 3);
- shani_sha256_4_rounds (w[0], 8, s);
- shani_sha256_cycle_w (w, 1, 2, 3, 0);
- shani_sha256_4_rounds (w[1], 9, s);
- shani_sha256_cycle_w (w, 2, 3, 0, 1);
- shani_sha256_4_rounds (w[2], 10, s);
- shani_sha256_cycle_w (w, 3, 0, 1, 2);
- shani_sha256_4_rounds (w[3], 11, s);
-
- shani_sha256_cycle_w (w, 0, 1, 2, 3);
- shani_sha256_4_rounds (w[0], 12, s);
- shani_sha256_cycle_w (w, 1, 2, 3, 0);
- shani_sha256_4_rounds (w[1], 13, s);
- shani_sha256_cycle_w (w, 2, 3, 0, 1);
- shani_sha256_4_rounds (w[2], 14, s);
- shani_sha256_cycle_w (w, 3, 0, 1, 2);
- shani_sha256_4_rounds (w[3], 15, s);
-
- h[0] += s[0];
- h[1] += s[1];
-
- /* next */
- msg += SHA256_BLOCK_SIZE;
- n_blocks--;
- }
-
- shani_sha256_shuffle (ctx->h32x4, h);
-#else
- u32 w[64], s[8], i;
-
- while (n_blocks)
- {
- for (i = 0; i < 8; i++)
- s[i] = ctx->h32[i];
-
- for (i = 0; i < 16; i++)
- {
- w[i] = clib_net_to_host_u32 (*((u32 *) msg + i));
- SHA256_TRANSFORM (s, w, i, sha256_k[i]);
- }
-
- for (i = 16; i < 64; i++)
- {
- SHA256_MSG_SCHED (w, i);
- SHA256_TRANSFORM (s, w, i, sha256_k[i]);
- }
-
- for (i = 0; i < 8; i++)
- ctx->h32[i] += s[i];
-
- /* next */
- msg += SHA256_BLOCK_SIZE;
- n_blocks--;
- }
-#endif
-}
-
-static_always_inline void
-clib_sha512_block (clib_sha2_ctx_t * ctx, const u8 * msg, uword n_blocks)
-{
- u64 w[80], s[8], i;
-
- while (n_blocks)
- {
- for (i = 0; i < 8; i++)
- s[i] = ctx->h64[i];
-
- for (i = 0; i < 16; i++)
- {
- w[i] = clib_net_to_host_u64 (*((u64 *) msg + i));
- SHA512_TRANSFORM (s, w, i, sha512_k[i]);
- }
-
- for (i = 16; i < 80; i++)
- {
- SHA512_MSG_SCHED (w, i);
- SHA512_TRANSFORM (s, w, i, sha512_k[i]);
- }
-
- for (i = 0; i < 8; i++)
- ctx->h64[i] += s[i];
-
- /* next */
- msg += SHA512_BLOCK_SIZE;
- n_blocks--;
- }
-}
-
-static_always_inline void
-clib_sha2_update (clib_sha2_ctx_t * ctx, const u8 * msg, uword n_bytes)
-{
- uword n_blocks;
- if (ctx->n_pending)
- {
- uword n_left = ctx->block_size - ctx->n_pending;
- if (n_bytes < n_left)
- {
- clib_memcpy_fast (ctx->pending.as_u8 + ctx->n_pending, msg,
- n_bytes);
- ctx->n_pending += n_bytes;
- return;
- }
- else
- {
- clib_memcpy_fast (ctx->pending.as_u8 + ctx->n_pending, msg, n_left);
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, ctx->pending.as_u8, 1);
- else
- clib_sha256_block (ctx, ctx->pending.as_u8, 1);
- ctx->n_pending = 0;
- ctx->total_bytes += ctx->block_size;
- n_bytes -= n_left;
- msg += n_left;
- }
- }
-
- if ((n_blocks = n_bytes / ctx->block_size))
- {
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, msg, n_blocks);
- else
- clib_sha256_block (ctx, msg, n_blocks);
- n_bytes -= n_blocks * ctx->block_size;
- msg += n_blocks * ctx->block_size;
- ctx->total_bytes += n_blocks * ctx->block_size;
- }
-
- if (n_bytes)
- {
- clib_memset_u8 (ctx->pending.as_u8, 0, ctx->block_size);
- clib_memcpy_fast (ctx->pending.as_u8, msg, n_bytes);
- ctx->n_pending = n_bytes;
- }
- else
- ctx->n_pending = 0;
-}
-
-static_always_inline void
-clib_sha2_final (clib_sha2_ctx_t * ctx, u8 * digest)
-{
- int i;
-
- ctx->total_bytes += ctx->n_pending;
- if (ctx->n_pending == 0)
- {
- clib_memset (ctx->pending.as_u8, 0, ctx->block_size);
- ctx->pending.as_u8[0] = 0x80;
- }
- else if (ctx->n_pending + sizeof (u64) + sizeof (u8) > ctx->block_size)
- {
- ctx->pending.as_u8[ctx->n_pending] = 0x80;
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, ctx->pending.as_u8, 1);
- else
- clib_sha256_block (ctx, ctx->pending.as_u8, 1);
- clib_memset (ctx->pending.as_u8, 0, ctx->block_size);
- }
- else
- ctx->pending.as_u8[ctx->n_pending] = 0x80;
-
- ctx->pending.as_u64[ctx->block_size / 8 - 1] =
- clib_net_to_host_u64 (ctx->total_bytes * 8);
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, ctx->pending.as_u8, 1);
- else
- clib_sha256_block (ctx, ctx->pending.as_u8, 1);
-
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- {
- for (i = 0; i < ctx->digest_size / sizeof (u64); i++)
- *((u64 *) digest + i) = clib_net_to_host_u64 (ctx->h64[i]);
-
- /* sha512-224 case - write half of u64 */
- if (i * sizeof (u64) < ctx->digest_size)
- *((u32 *) digest + 2 * i) = clib_net_to_host_u32 (ctx->h64[i] >> 32);
- }
- else
- for (i = 0; i < ctx->digest_size / sizeof (u32); i++)
- *((u32 *) digest + i) = clib_net_to_host_u32 (ctx->h32[i]);
-}
-
-static_always_inline void
-clib_sha2 (clib_sha2_type_t type, const u8 * msg, uword len, u8 * digest)
-{
- clib_sha2_ctx_t ctx;
- clib_sha2_init (&ctx, type);
- clib_sha2_update (&ctx, msg, len);
- clib_sha2_final (&ctx, digest);
-}
-
-#define clib_sha224(...) clib_sha2 (CLIB_SHA2_224, __VA_ARGS__)
-#define clib_sha256(...) clib_sha2 (CLIB_SHA2_256, __VA_ARGS__)
-#define clib_sha384(...) clib_sha2 (CLIB_SHA2_384, __VA_ARGS__)
-#define clib_sha512(...) clib_sha2 (CLIB_SHA2_512, __VA_ARGS__)
-#define clib_sha512_224(...) clib_sha2 (CLIB_SHA2_512_224, __VA_ARGS__)
-#define clib_sha512_256(...) clib_sha2 (CLIB_SHA2_512_256, __VA_ARGS__)
-
-static_always_inline void
-clib_hmac_sha2 (clib_sha2_type_t type, const u8 * key, uword key_len,
- const u8 * msg, uword len, u8 * digest)
-{
- clib_sha2_ctx_t _ctx, *ctx = &_ctx;
- uword key_data[SHA2_MAX_BLOCK_SIZE / sizeof (uword)];
- u8 i_digest[SHA2_MAX_DIGEST_SIZE];
- int i, n_words;
-
- clib_sha2_init (ctx, type);
- n_words = ctx->block_size / sizeof (uword);
-
- /* key */
- if (key_len > ctx->block_size)
- {
- /* key is longer than block, calculate hash of key */
- clib_sha2_update (ctx, key, key_len);
- for (i = (ctx->digest_size / sizeof (uword)) / 2; i < n_words; i++)
- key_data[i] = 0;
- clib_sha2_final (ctx, (u8 *) key_data);
- clib_sha2_init (ctx, type);
- }
- else
- {
- for (i = 0; i < n_words; i++)
- key_data[i] = 0;
- clib_memcpy_fast (key_data, key, key_len);
- }
-
- /* ipad */
- for (i = 0; i < n_words; i++)
- ctx->pending.as_uword[i] = key_data[i] ^ (uword) 0x3636363636363636;
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, ctx->pending.as_u8, 1);
- else
- clib_sha256_block (ctx, ctx->pending.as_u8, 1);
- ctx->total_bytes += ctx->block_size;
-
- /* message */
- clib_sha2_update (ctx, msg, len);
- clib_sha2_final (ctx, i_digest);
-
- /* opad */
- clib_sha2_init (ctx, type);
- for (i = 0; i < n_words; i++)
- ctx->pending.as_uword[i] = key_data[i] ^ (uword) 0x5c5c5c5c5c5c5c5c;
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, ctx->pending.as_u8, 1);
- else
- clib_sha256_block (ctx, ctx->pending.as_u8, 1);
- ctx->total_bytes += ctx->block_size;
-
- /* digest */
- clib_sha2_update (ctx, i_digest, ctx->digest_size);
- clib_sha2_final (ctx, digest);
-}
-
-#define clib_hmac_sha224(...) clib_hmac_sha2 (CLIB_SHA2_224, __VA_ARGS__)
-#define clib_hmac_sha256(...) clib_hmac_sha2 (CLIB_SHA2_256, __VA_ARGS__)
-#define clib_hmac_sha384(...) clib_hmac_sha2 (CLIB_SHA2_384, __VA_ARGS__)
-#define clib_hmac_sha512(...) clib_hmac_sha2 (CLIB_SHA2_512, __VA_ARGS__)
-#define clib_hmac_sha512_224(...) clib_hmac_sha2 (CLIB_SHA2_512_224, __VA_ARGS__)
-#define clib_hmac_sha512_256(...) clib_hmac_sha2 (CLIB_SHA2_512_256, __VA_ARGS__)
-
-#endif /* included_sha2_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/test/aes_cbc.c b/src/vppinfra/test/aes_cbc.c
new file mode 100644
index 0000000..bff439a
--- /dev/null
+++ b/src/vppinfra/test/aes_cbc.c
@@ -0,0 +1,187 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifdef __AES__
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/crypto/aes_cbc.h>
+
+static const u8 iv[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+};
+
+static const u8 plaintext[] = {
+ 0x6B, 0xC1, 0xBE, 0xE2, 0x2E, 0x40, 0x9F, 0x96, 0xE9, 0x3D, 0x7E, 0x11, 0x73,
+ 0x93, 0x17, 0x2A, 0xAE, 0x2D, 0x8A, 0x57, 0x1E, 0x03, 0xAC, 0x9C, 0x9E, 0xB7,
+ 0x6F, 0xAC, 0x45, 0xAF, 0x8E, 0x51, 0x30, 0xC8, 0x1C, 0x46, 0xA3, 0x5C, 0xE4,
+ 0x11, 0xE5, 0xFB, 0xC1, 0x19, 0x1A, 0x0A, 0x52, 0xEF, 0xF6, 0x9F, 0x24, 0x45,
+ 0xDF, 0x4F, 0x9B, 0x17, 0xAD, 0x2B, 0x41, 0x7B, 0xE6, 0x6C, 0x37, 0x10,
+};
+
+static const u8 key128[] = { 0x2B, 0x7E, 0x15, 0x16, 0x28, 0xAE, 0xD2, 0xA6,
+ 0xAB, 0xF7, 0x15, 0x88, 0x09, 0xCF, 0x4F, 0x3C };
+
+static const u8 key192[24] = {
+ 0x8E, 0x73, 0xB0, 0xF7, 0xDA, 0x0E, 0x64, 0x52, 0xC8, 0x10, 0xF3, 0x2B,
+ 0x80, 0x90, 0x79, 0xE5, 0x62, 0xF8, 0xEA, 0xD2, 0x52, 0x2C, 0x6B, 0x7B,
+};
+
+static const u8 ciphertext128[] = {
+ 0x76, 0x49, 0xAB, 0xAC, 0x81, 0x19, 0xB2, 0x46, 0xCE, 0xE9, 0x8E, 0x9B, 0x12,
+ 0xE9, 0x19, 0x7D, 0x50, 0x86, 0xCB, 0x9B, 0x50, 0x72, 0x19, 0xEE, 0x95, 0xDB,
+ 0x11, 0x3A, 0x91, 0x76, 0x78, 0xB2, 0x73, 0xBE, 0xD6, 0xB8, 0xE3, 0xC1, 0x74,
+ 0x3B, 0x71, 0x16, 0xE6, 0x9E, 0x22, 0x22, 0x95, 0x16, 0x3F, 0xF1, 0xCA, 0xA1,
+ 0x68, 0x1F, 0xAC, 0x09, 0x12, 0x0E, 0xCA, 0x30, 0x75, 0x86, 0xE1, 0xA7,
+};
+
+static const u8 ciphertext192[64] = {
+ 0x4F, 0x02, 0x1D, 0xB2, 0x43, 0xBC, 0x63, 0x3D, 0x71, 0x78, 0x18, 0x3A, 0x9F,
+ 0xA0, 0x71, 0xE8, 0xB4, 0xD9, 0xAD, 0xA9, 0xAD, 0x7D, 0xED, 0xF4, 0xE5, 0xE7,
+ 0x38, 0x76, 0x3F, 0x69, 0x14, 0x5A, 0x57, 0x1B, 0x24, 0x20, 0x12, 0xFB, 0x7A,
+ 0xE0, 0x7F, 0xA9, 0xBA, 0xAC, 0x3D, 0xF1, 0x02, 0xE0, 0x08, 0xB0, 0xE2, 0x79,
+ 0x88, 0x59, 0x88, 0x81, 0xD9, 0x20, 0xA9, 0xE6, 0x4F, 0x56, 0x15, 0xCD,
+};
+
+static const u8 key256[32] = {
+ 0x60, 0x3D, 0xEB, 0x10, 0x15, 0xCA, 0x71, 0xBE, 0x2B, 0x73, 0xAE,
+ 0xF0, 0x85, 0x7D, 0x77, 0x81, 0x1F, 0x35, 0x2C, 0x07, 0x3B, 0x61,
+ 0x08, 0xD7, 0x2D, 0x98, 0x10, 0xA3, 0x09, 0x14, 0xDF, 0xF4,
+};
+
+static const u8 ciphertext256[64] = {
+ 0xF5, 0x8C, 0x4C, 0x04, 0xD6, 0xE5, 0xF1, 0xBA, 0x77, 0x9E, 0xAB, 0xFB, 0x5F,
+ 0x7B, 0xFB, 0xD6, 0x9C, 0xFC, 0x4E, 0x96, 0x7E, 0xDB, 0x80, 0x8D, 0x67, 0x9F,
+ 0x77, 0x7B, 0xC6, 0x70, 0x2C, 0x7D, 0x39, 0xF2, 0x33, 0x69, 0xA9, 0xD9, 0xBA,
+ 0xCF, 0xA5, 0x30, 0xE2, 0x63, 0x04, 0x23, 0x14, 0x61, 0xB2, 0xEB, 0x05, 0xE2,
+ 0xC3, 0x9B, 0xE9, 0xFC, 0xDA, 0x6C, 0x19, 0x07, 0x8C, 0x6A, 0x9D, 0x1B,
+};
+
+#define _(b) \
+ static clib_error_t *test_clib_aes##b##_cbc_encrypt (clib_error_t *err) \
+ { \
+ aes_cbc_key_data_t k; \
+ u8 data[512]; \
+ clib_aes##b##_cbc_key_expand (&k, key##b); \
+ clib_aes##b##_cbc_encrypt (&k, plaintext, sizeof (plaintext), iv, data); \
+ if (memcmp (ciphertext##b, data, sizeof (ciphertext##b)) != 0) \
+ err = \
+ clib_error_return (err, "encrypted data doesn't match plaintext"); \
+ return err; \
+ } \
+ void __test_perf_fn perftest_aes##b##_enc_var_sz (test_perf_t *tp) \
+ { \
+ u32 n = tp->n_ops; \
+ aes_cbc_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \
+ u8 *dst = test_mem_alloc (n + 16); \
+ u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \
+ clib_aes##b##_cbc_key_expand (kd, key##b); \
+ \
+ test_perf_event_enable (tp); \
+ clib_aes##b##_cbc_encrypt (kd, src, n, iv, dst); \
+ test_perf_event_disable (tp); \
+ }
+_ (128)
+_ (192)
+_ (256)
+#undef _
+
+REGISTER_TEST (clib_aes128_cbc_encrypt) = {
+ .name = "clib_aes128_cbc_encrypt",
+ .fn = test_clib_aes128_cbc_encrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes128_enc_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes128_enc_var_sz }),
+};
+
+REGISTER_TEST (clib_aes192_cbc_encrypt) = {
+ .name = "clib_aes192_cbc_encrypt",
+ .fn = test_clib_aes192_cbc_encrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes192_enc_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes192_enc_var_sz }),
+};
+
+REGISTER_TEST (clib_aes256_cbc_encrypt) = {
+ .name = "clib_aes256_cbc_encrypt",
+ .fn = test_clib_aes256_cbc_encrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes256_enc_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes256_enc_var_sz }),
+};
+
+#define _(b) \
+ static clib_error_t *test_clib_aes##b##_cbc_decrypt (clib_error_t *err) \
+ { \
+ aes_cbc_key_data_t k; \
+ u8 data[512]; \
+ clib_aes##b##_cbc_key_expand (&k, key##b); \
+ clib_aes##b##_cbc_decrypt (&k, ciphertext##b, sizeof (ciphertext##b), iv, \
+ data); \
+ if (memcmp (plaintext, data, sizeof (plaintext)) != 0) \
+ err = \
+ clib_error_return (err, "decrypted data doesn't match plaintext"); \
+ return err; \
+ } \
+ void __test_perf_fn perftest_aes##b##_dec_var_sz (test_perf_t *tp) \
+ { \
+ u32 n = tp->n_ops; \
+ aes_cbc_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \
+ u8 *dst = test_mem_alloc (n + 16); \
+ u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \
+ clib_aes##b##_cbc_key_expand (kd, key##b); \
+ \
+ test_perf_event_enable (tp); \
+ clib_aes##b##_cbc_decrypt (kd, src, n, iv, dst); \
+ test_perf_event_disable (tp); \
+ }
+
+_ (128)
+_ (192)
+_ (256)
+#undef _
+
+REGISTER_TEST (clib_aes128_cbc_decrypt) = {
+ .name = "clib_aes128_cbc_decrypt",
+ .fn = test_clib_aes128_cbc_decrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes128_dec_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes128_dec_var_sz }),
+};
+
+REGISTER_TEST (clib_aes192_cbc_decrypt) = {
+ .name = "clib_aes192_cbc_decrypt",
+ .fn = test_clib_aes192_cbc_decrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes192_dec_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes192_dec_var_sz }),
+};
+
+REGISTER_TEST (clib_aes256_cbc_decrypt) = {
+ .name = "clib_aes256_cbc_decrypt",
+ .fn = test_clib_aes256_cbc_decrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes256_dec_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes256_dec_var_sz }),
+};
+
+#endif
diff --git a/src/vppinfra/test/aes_gcm.c b/src/vppinfra/test/aes_gcm.c
new file mode 100644
index 0000000..aeaf7cf
--- /dev/null
+++ b/src/vppinfra/test/aes_gcm.c
@@ -0,0 +1,1177 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#if defined(__AES__) && defined(__PCLMUL__)
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/crypto/aes_gcm.h>
+
+static const u8 tc1_key128[16] = {
+ 0,
+};
+
+static const u8 tc1_iv[12] = {
+ 0,
+};
+
+static const u8 tc1_tag128[] = { 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e,
+ 0x30, 0x61, 0x36, 0x7f, 0x1d, 0x57,
+ 0xa4, 0xe7, 0x45, 0x5a };
+static const u8 tc1_key256[32] = {
+ 0,
+};
+
+static const u8 tc1_tag256[] = {
+ 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
+ 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b,
+};
+
+static const u8 tc2_ciphertext256[] = { 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60,
+ 0x6b, 0x6e, 0x07, 0x4e, 0xc5, 0xd3,
+ 0xba, 0xf3, 0x9d, 0x18 };
+
+static const u8 tc2_tag256[] = { 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99,
+ 0x6b, 0xf0, 0x26, 0x5b, 0x98, 0xb5,
+ 0xd4, 0x8a, 0xb9, 0x19 };
+
+static const u8 tc2_plaintext[16] = {
+ 0,
+};
+
+static const u8 tc2_tag128[] = { 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec,
+ 0x13, 0xbd, 0xf5, 0x3a, 0x67, 0xb2,
+ 0x12, 0x57, 0xbd, 0xdf };
+
+static const u8 tc2_ciphertext128[] = { 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6,
+ 0xa3, 0x92, 0xf3, 0x28, 0xc2, 0xb9,
+ 0x71, 0xb2, 0xfe, 0x78 };
+
+static const u8 tc3_key128[] = { 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65,
+ 0x73, 0x1c, 0x6d, 0x6a, 0x8f, 0x94,
+ 0x67, 0x30, 0x83, 0x08 };
+
+static const u8 tc3_iv[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce,
+ 0xdb, 0xad, 0xde, 0xca, 0xf8, 0x88 };
+
+static const u8 tc3_plaintext[] = {
+ 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, 0xa5, 0x59, 0x09, 0xc5, 0xaf,
+ 0xf5, 0x26, 0x9a, 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, 0x2e, 0x4c,
+ 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09,
+ 0x53, 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, 0xb1, 0x6a, 0xed, 0xf5,
+ 0xaa, 0x0d, 0xe6, 0x57, 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
+};
+
+static const u8 tc3_ciphertext128[] = {
+ 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24, 0x4b, 0x72, 0x21, 0xb7, 0x84,
+ 0xd0, 0xd4, 0x9c, 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0, 0x35, 0xc1,
+ 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e, 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93,
+ 0x1c, 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05, 0x1b, 0xa3, 0x0b, 0x39,
+ 0x6a, 0x0a, 0xac, 0x97, 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
+};
+
+static const u8 tc3_tag128[] = { 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd,
+ 0x64, 0xa6, 0x2c, 0xf3, 0x5a, 0xbd,
+ 0x2b, 0xa6, 0xfa, 0xb4 };
+
+static const u8 tc3_key256[] = { 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73,
+ 0x1c, 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30,
+ 0x83, 0x08, 0xfe, 0xff, 0xe9, 0x92, 0x86,
+ 0x65, 0x73, 0x1c, 0x6d, 0x6a, 0x8f, 0x94,
+ 0x67, 0x30, 0x83, 0x08 };
+
+static const u8 tc3_ciphertext256[] = {
+ 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07, 0xf4, 0x7f, 0x37, 0xa3, 0x2a,
+ 0x84, 0x42, 0x7d, 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9, 0x75, 0x98,
+ 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa, 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb,
+ 0x3d, 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38, 0xc5, 0xf6, 0x1e, 0x63,
+ 0x93, 0xba, 0x7a, 0x0a, 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
+};
+
+static const u8 tc3_tag256[] = { 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34,
+ 0x71, 0xbd, 0xec, 0x1a, 0x50, 0x22,
+ 0x70, 0xe3, 0xcc, 0x6c };
+
+static const u8 tc4_plaintext[] = {
+ 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, 0xa5, 0x59, 0x09, 0xc5,
+ 0xaf, 0xf5, 0x26, 0x9a, 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
+ 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, 0x1c, 0x3c, 0x0c, 0x95,
+ 0x95, 0x68, 0x09, 0x53, 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
+ 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, 0xba, 0x63, 0x7b, 0x39,
+};
+
+static const u8 tc4_aad[] = { 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe,
+ 0xef, 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad,
+ 0xbe, 0xef, 0xab, 0xad, 0xda, 0xd2 };
+
+static const u8 tc4_ciphertext128[] = {
+ 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24, 0x4b, 0x72, 0x21, 0xb7,
+ 0x84, 0xd0, 0xd4, 0x9c, 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
+ 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e, 0x21, 0xd5, 0x14, 0xb2,
+ 0x54, 0x66, 0x93, 0x1c, 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
+ 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97, 0x3d, 0x58, 0xe0, 0x91
+};
+
+static const u8 tc4_tag128[] = { 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21,
+ 0xa5, 0xdb, 0x94, 0xfa, 0xe9, 0x5a,
+ 0xe7, 0x12, 0x1a, 0x47 };
+
+static const u8 tc4_ciphertext256[] = {
+ 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07, 0xf4, 0x7f, 0x37, 0xa3,
+ 0x2a, 0x84, 0x42, 0x7d, 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
+ 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa, 0x8c, 0xb0, 0x8e, 0x48,
+ 0x59, 0x0d, 0xbb, 0x3d, 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
+ 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a, 0xbc, 0xc9, 0xf6, 0x62
+};
+
+static const u8 tc4_tag256[] = { 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e,
+ 0x17, 0x68, 0xcd, 0xdf, 0x88, 0x53,
+ 0xbb, 0x2d, 0x55, 0x1b };
+
+static const u8 inc_key[] = { 0x97, 0x3e, 0x43, 0x70, 0x84, 0x71, 0xd4, 0xe2,
+ 0x45, 0xd1, 0xcb, 0x79, 0xe8, 0xd7, 0x5f, 0x3b,
+ 0x97, 0x3e, 0x43, 0x70, 0x84, 0x71, 0xd4, 0xe2,
+ 0x45, 0xd1, 0xcb, 0x79, 0xe8, 0xd7, 0x5f, 0x3b };
+static const u8 inc_iv[] = { 0xe2, 0xe4, 0x3f, 0x29, 0xfe, 0xd4,
+ 0xbc, 0x31, 0x56, 0xa7, 0x97, 0xf5 };
+
+static const struct
+{
+ const u16 n_bytes;
+ const u64 tag_gcm_128[2];
+ const u64 tag_gcm_256[2];
+ const u64 tag_gmac_128[2];
+ const u64 tag_gmac_256[2];
+ const u8 tag256[16];
+} inc_test_cases[] = {
+ {
+ .n_bytes = 0,
+ .tag_gcm_128 = { 0x95f4b8cc824294eb, 0xbf964ccf94b47f96 },
+ .tag_gcm_256 = { 0x206b456eaa81a3c8, 0xa308160d180e080d },
+ .tag_gmac_128 = { 0x95f4b8cc824294eb, 0xbf964ccf94b47f96 },
+ .tag_gmac_256 = { 0x206b456eaa81a3c8, 0xa308160d180e080d },
+ },
+ {
+ .n_bytes = 1,
+ .tag_gcm_128 = { 0xe89aa5be94fa1db4, 0x70d82ed02542a560 },
+ .tag_gcm_256 = { 0xcb0659b38e60d3a7, 0x9758b874959187ff },
+ .tag_gmac_128 = { 0xf9be1e7db073c565, 0x3b8a0ecc7a91f09d },
+ .tag_gmac_256 = { 0x1e302e97ab394130, 0xef29621c33bdb710 },
+ },
+ {
+ .n_bytes = 7,
+ .tag_gcm_128 = { 0xf4af7cbe57bd2078, 0x063dd60abbe51049 },
+ .tag_gcm_256 = { 0x7d231388fe8a19be, 0x59be3e7205269abd },
+ .tag_gmac_128 = { 0x27d0a47980eed1c6, 0xe6163485e73d02b3 },
+ .tag_gmac_256 = { 0x61ce281b47729f6c, 0x128a6bc0880e5d84 },
+ },
+ {
+ .n_bytes = 8,
+ .tag_gcm_128 = { 0xf45b40961422abc4, 0x0a932b98c4999694 },
+ .tag_gcm_256 = { 0xf7f945beed586ee2, 0x67239433a7bd3f23 },
+ .tag_gmac_128 = { 0x3a25d38572abe3b1, 0x220798aca96d594a },
+ .tag_gmac_256 = { 0x2e0e6d58d1ab41ca, 0x09bbc83e3b7b5e11 },
+ },
+ {
+ .n_bytes = 9,
+ .tag_gcm_128 = { 0x791b0a879d236364, 0xde9553e3ed1b763f },
+ .tag_gcm_256 = { 0x24c13ed7b46813cd, 0xe646ce24ea4b281e },
+ .tag_gmac_128 = { 0x0e521672b23a4fc7, 0x16f129224dec5fd8 },
+ .tag_gmac_256 = { 0x8b9c603789c34043, 0x0a8b626928c9fb6f },
+ },
+ {
+ .n_bytes = 15,
+ .tag_gcm_128 = { 0xb277ef05e2be1cc0, 0x2922fba5e321c81e },
+ .tag_gcm_256 = { 0xc3ca9f633fa803dc, 0x96e60b0c3347d744 },
+ .tag_gmac_128 = { 0xab99e6327c8e1493, 0x09a9a153045ba43f },
+ .tag_gmac_256 = { 0xfc9ec2d6a1ad492b, 0xf0b0ba877663732d },
+ },
+ {
+ .n_bytes = 16,
+ .tag_gcm_128 = { 0x3e3438e8f932ebe3, 0x958e270d56ae588e },
+ .tag_gcm_256 = { 0x6ac53524effc8171, 0xccab3a16a0b5813c },
+ .tag_gmac_128 = { 0x0eb4a09c6c7db16b, 0x1cdb5573a27a2e4a },
+ .tag_gmac_256 = { 0x71752018b31eae33, 0xdc4bd36d44b9fd5d },
+ },
+ {
+ .n_bytes = 31,
+ .tag_gcm_128 = { 0x1f4d4a7a056e4bca, 0x97ac76121dccb4e0 },
+ .tag_gcm_256 = { 0x609aea9aec919ab6, 0x1eba3c4998e7abb9 },
+ .tag_gmac_128 = { 0x289280f9e8879c68, 0xe6b0e36afc0d2ae1 },
+ .tag_gmac_256 = { 0x0b3f61762ba4ed43, 0x293f596a76d63b37 },
+ },
+ {
+ .n_bytes = 32,
+ .tag_gcm_128 = { 0xc4b64505d045818f, 0x72bfd499f0f983b4 },
+ .tag_gcm_256 = { 0x3f003fb179b2c480, 0x883876d4904700c2 },
+ .tag_gmac_128 = { 0x3dd10ab954d807f0, 0x5ae32ee41675051e },
+ .tag_gmac_256 = { 0x1a80ab830fc736c0, 0x51db27630adae337 },
+ },
+ {
+ .n_bytes = 47,
+ .tag_gcm_128 = { 0x3aedb0c6c14f2ea1, 0xe4626626bae641cd },
+ .tag_gcm_256 = { 0x9c91b87dfd302880, 0x05bb594dde5abb9c },
+ .tag_gmac_128 = { 0xe0fe54f2bdadeba8, 0x6f8f40edb569701f },
+ .tag_gmac_256 = { 0x26c5632c7abbdb3f, 0xc18ccc24df8bb239 },
+ },
+ {
+ .n_bytes = 48,
+ .tag_gcm_128 = { 0xdbceb2aed0dbbe27, 0xfef0013e8ebe6ef1 },
+ .tag_gcm_256 = { 0x98ad025f30b58ffa, 0xabc8a99857034e42 },
+ .tag_gmac_128 = { 0x269518e8584b7f6c, 0x1c9f41410a81799c },
+ .tag_gmac_256 = { 0x144807ce7aa8eb61, 0x611a8355b4377dc6 },
+ },
+ {
+ .n_bytes = 63,
+ .tag_gcm_128 = { 0x1769ccf523a2046e, 0x7328e18749a559b4 },
+ .tag_gcm_256 = { 0xcdf2f28efa9689ce, 0x636676f6aedea9de },
+ .tag_gmac_128 = { 0x4d47537060defce8, 0x0d4819c20ba8e889 },
+ .tag_gmac_256 = { 0x7b60615e7bfc9a7a, 0x610633296eb30b94 },
+ },
+ {
+ .n_bytes = 64,
+ .tag_gcm_128 = { 0xa5602f73865b6a77, 0x78317e461ff9b560 },
+ .tag_gcm_256 = { 0x5c17a6dcd1f23b65, 0x25331c378256a93e },
+ .tag_gmac_128 = { 0x39d941ed85d81ab0, 0xe358a61078628d63 },
+ .tag_gmac_256 = { 0x5276fbdd333f380d, 0xb0dc63e68f137e74 },
+ },
+ {
+ .n_bytes = 79,
+ .tag_gcm_128 = { 0x5d32cd75f2e82d84, 0xbc15801c1fe285bd },
+ .tag_gcm_256 = { 0xb2b2855f4b1ecf70, 0xa524adc1609c757b },
+ .tag_gmac_128 = { 0xa147493f08a8738e, 0xbf07da9f4a88944f },
+ .tag_gmac_256 = { 0xfee15e0d4b936bc7, 0x1dc88398c6b168bc },
+ },
+ {
+ .n_bytes = 80,
+ .tag_gcm_128 = { 0xa303b7247b9b00df, 0xe72d6d7063d48b72 },
+ .tag_gcm_256 = { 0x7abfffc9ecfa00ec, 0x9c5ffcd753ee4568 },
+ .tag_gmac_128 = { 0xc3e61bf9f370b40e, 0x66b1c4a6df3b19d7 },
+ .tag_gmac_256 = { 0x0cc7b09a7d602352, 0x29e8a64447a764d2 },
+ },
+ {
+ .n_bytes = 95,
+ .tag_gcm_128 = { 0xf0fb35c36eac3025, 0xa13463307fc48907 },
+ .tag_gcm_256 = { 0x283a73a05bd0e3c2, 0x794a181dd07a0fb7 },
+ .tag_gmac_128 = { 0x26f3546060d9f958, 0xc1367fca8869ab40 },
+ .tag_gmac_256 = { 0xa046e1705100c711, 0xbcf9d6a06f360260 },
+ },
+ {
+ .n_bytes = 96,
+ .tag_gcm_128 = { 0x974bb3c1c258bfb5, 0xcf057344bccb0596 },
+ .tag_gcm_256 = { 0x18920d75fcfb702e, 0x18e5f14ba429b7be },
+ .tag_gmac_128 = { 0xf43cca4837ad00b8, 0xb1a1585d51838352 },
+ .tag_gmac_256 = { 0xce3427dc5123b31f, 0xdcc6e49fa0f6587e },
+ },
+ {
+ .n_bytes = 111,
+ .tag_gcm_128 = { 0x5d73baa8eef0ced3, 0x79339e31d5d813de },
+ .tag_gcm_256 = { 0x4cefa311c9c39a86, 0xe809ee78930ef736 },
+ .tag_gmac_128 = { 0x452003e6d535a523, 0x723f08581012c62e },
+ .tag_gmac_256 = { 0x6ce2e1661db942ca, 0xccd700c9c6d03cfd },
+ },
+ {
+ .n_bytes = 112,
+ .tag_gcm_128 = { 0x189aa61ce15a0d11, 0xc907e6bccbdbb8f9 },
+ .tag_gcm_256 = { 0xa41c96c843b791b4, 0x0f9f60953f03e5fc },
+ .tag_gmac_128 = { 0x44c75b94dbf8539f, 0xcdebe3ed9c68c840 },
+ .tag_gmac_256 = { 0x21a289dd39eadd19, 0x749a038e1ea0711c },
+ },
+ {
+ .n_bytes = 127,
+ .tag_gcm_128 = { 0xc6ea87bfe82d73f6, 0x9d85dbf8072bb051 },
+ .tag_gcm_256 = { 0xd5e436b2ddfac9fa, 0x54d7d13fa214703a },
+ .tag_gmac_128 = { 0xdc5374b7d7d221c4, 0xa8cf4e11958b9dff },
+ .tag_gmac_256 = { 0xc7ad0bba9de54f6a, 0x38ed037fe0924dee },
+ },
+ {
+ .n_bytes = 128,
+ .tag_gcm_128 = { 0x357d4954b7c2b440, 0xb3b07ce0cd143149 },
+ .tag_gcm_256 = { 0x5555d09cb247322d, 0xeb9d1cea38b68951 },
+ .tag_gmac_128 = { 0x6a77579181663dde, 0xe359157bd4246d3f },
+ .tag_gmac_256 = { 0x9fe930d50d661e37, 0xba4a0f3c3a6b63cf },
+ },
+ {
+ .n_bytes = 143,
+ .tag_gcm_128 = { 0x358f897d4783966f, 0x6fa44993a9ed54c4 },
+ .tag_gcm_256 = { 0x60e91f959f2ccdbe, 0x116c56fdaa107deb },
+ .tag_gmac_128 = { 0x121d26aba8aaee0d, 0xc37cda9c43f51008 },
+ .tag_gmac_256 = { 0x06918b1cd20e0abc, 0x42938b1d8e745dcd },
+ },
+ {
+ .n_bytes = 144,
+ .tag_gcm_128 = { 0x8a9efe3df387e069, 0xc0a3f2f7547c704b },
+ .tag_gcm_256 = { 0x217d59f53bfbc314, 0x2d8f088d05532b0d },
+ .tag_gmac_128 = { 0x382949d56e0e8f05, 0x4e87fb8f83f095a7 },
+ .tag_gmac_256 = { 0x75e07060883db37d, 0x5fde7b9bda37d680 },
+ },
+ {
+ .n_bytes = 159,
+ .tag_gcm_128 = { 0x297252081cc8db1e, 0x6357143fa7f756c8 },
+ .tag_gcm_256 = { 0x7e8fca9d1b17e003, 0x7bf7dad063b9a5c9 },
+ .tag_gmac_128 = { 0x5d0524b130e97547, 0xd6befd8591072437 },
+ .tag_gmac_256 = { 0xf5f631d391b635fc, 0xe8f7b6808544f312 },
+ },
+ {
+ .n_bytes = 160,
+ .tag_gcm_128 = { 0x90e034ee0f08a871, 0x002f483eefa24ec9 },
+ .tag_gcm_256 = { 0xed24df02e455d6d3, 0x7a7d318ed132cb7f },
+ .tag_gmac_128 = { 0xc75f87215ae12a2f, 0xf264e5381d5b0412 },
+ .tag_gmac_256 = { 0x1ad3e294fd55b0a6, 0xa1a551e59fd12e2f },
+ },
+ {
+ .n_bytes = 175,
+ .tag_gcm_128 = { 0x8f663955c8e4249e, 0xd9d8d8d7352b18d9 },
+ .tag_gcm_256 = { 0xd9af34eae74a35e1, 0xc22e74b34267e5df },
+ .tag_gmac_128 = { 0xb54a2e8b186a55db, 0x980f586c6da8afce },
+ .tag_gmac_256 = { 0x9cceb31baad18ff1, 0xce97588909ece8af },
+ },
+ {
+ .n_bytes = 176,
+ .tag_gcm_128 = { 0x258ec0df82f003bd, 0x571496e92c966695 },
+ .tag_gcm_256 = { 0xa1925cda1fa1dd2c, 0x914038618faecf99 },
+ .tag_gmac_128 = { 0xfc384b412bdb05ef, 0x73968cf3b464a997 },
+ .tag_gmac_256 = { 0x50d9ce4be242e176, 0x5fb78e9404c9226d },
+ },
+ {
+ .n_bytes = 191,
+ .tag_gcm_128 = { 0x796a90a3edaab614, 0x4bf34c2c6333c736 },
+ .tag_gcm_256 = { 0x4ffd3a84b346c6d5, 0x9d4c84c7ac5a191c },
+ .tag_gmac_128 = { 0x16c11c6bfad5973e, 0xa0825b9c827137c8 },
+ .tag_gmac_256 = { 0x82c144c209c22758, 0x7428b4ac38a65c56 },
+ },
+ {
+ .n_bytes = 192,
+ .tag_gcm_128 = { 0x2a44492af2e06a75, 0xbe4eab62aacfc2d3 },
+ .tag_gcm_256 = { 0xb7d4971a8061092d, 0x94da543669369e41 },
+ .tag_gmac_128 = { 0xed462726c984b596, 0xd61b317d979f5df8 },
+ .tag_gmac_256 = { 0x554dc7f30981dbf6, 0x94447d0fbf9f2c8b },
+ },
+ {
+ .n_bytes = 207,
+ .tag_gcm_128 = { 0xcfac9f67252713c8, 0xd638cf6b74c6acf6 },
+ .tag_gcm_256 = { 0x57a4a9d299663925, 0xa802f8453e8bcc5b },
+ .tag_gmac_128 = { 0xef03f3cdcb0ea819, 0xeea8f0f7f805c306 },
+ .tag_gmac_256 = { 0x3d8cd7d92cf0a212, 0x12c1ddddab7e752c },
+ },
+ {
+ .n_bytes = 208,
+ .tag_gcm_128 = { 0x5467633795b92cf5, 0x6b45fb93e19f9341 },
+ .tag_gcm_256 = { 0xaeced4090d4d20bc, 0xd20161cd2617613e },
+ .tag_gmac_128 = { 0x02bb88dbe681ab69, 0xaf973bfd0b924144 },
+ .tag_gmac_256 = { 0x313020fc5283b45e, 0x1757616d4cf17c7f },
+ },
+ {
+ .n_bytes = 223,
+ .tag_gcm_128 = { 0x2f9c725903c07adf, 0xe01712c7d6d5055d },
+ .tag_gcm_256 = { 0xeae53a9b0d03a4f9, 0x42b2375d569d384e },
+ .tag_gmac_128 = { 0x6ea092dd400ec00d, 0x23237fa0bd0c1977 },
+ .tag_gmac_256 = { 0xa02e0f41f12f0053, 0xfba53430aa616219 },
+ },
+ {
+ .n_bytes = 224,
+ .tag_gcm_128 = { 0x73e40772334901a9, 0xddf6075b357cb307 },
+ .tag_gcm_256 = { 0x2eb3450f9462c968, 0xa9fb95f281c117e9 },
+ .tag_gmac_128 = { 0x33762525c12dfd1d, 0xcb3d8d0402c23ebf },
+ .tag_gmac_256 = { 0x30c6d05fb98c2a84, 0xaa2c9f6303383d3a },
+ },
+ {
+ .n_bytes = 239,
+ .tag_gcm_128 = { 0x184d15fd2e2c63a6, 0x3dfe238b88dd2924 },
+ .tag_gcm_256 = { 0x18deafee39975b36, 0xc07761cf4fc16c06 },
+ .tag_gmac_128 = { 0x10a48f2bc4e64f87, 0x85eec49ae83d4256 },
+ .tag_gmac_256 = { 0x5ac87f47f32770eb, 0x31706ca888dd6d44 },
+ },
+ {
+ .n_bytes = 240,
+ .tag_gcm_128 = { 0x153134f11cfa06ec, 0xd987642cc3688a34 },
+ .tag_gcm_256 = { 0x3eb66b6dc0bba824, 0x274c4648d515c844 },
+ .tag_gmac_128 = { 0x9e5afe891c7c7dcb, 0xa2b3fa1c026343e2 },
+ .tag_gmac_256 = { 0xe9120e4e9ff4b1e1, 0xb88bf68336342598 },
+ },
+ {
+ .n_bytes = 255,
+ .tag_gcm_128 = { 0x2b5e78936d1ace73, 0x15b766bfee18d348 },
+ .tag_gcm_256 = { 0xeb3741a345395c97, 0x02e11e0478e4cc5a },
+ .tag_gmac_128 = { 0xf7daf525751192df, 0x1b1641c3362905ac },
+ .tag_gmac_256 = { 0x0b16a2bb842caaca, 0x996732fedaa6b829 },
+ },
+ {
+ .n_bytes = 256,
+ .tag_gcm_128 = { 0x6d4507e0c354e80a, 0x2345eccddd0bd71e },
+ .tag_gcm_256 = { 0xa582b8122d699b63, 0xb16db944f6b073f3 },
+ .tag_gmac_128 = { 0xc58bb57544c07b40, 0x1a8dd3d8124cdf39 },
+ .tag_gmac_256 = { 0xb0f6db0da52e1dc2, 0xbd3a86a577ed208a },
+ },
+ {
+ .n_bytes = 319,
+ .tag_gcm_128 = { 0x2cd41fdf6f659a6b, 0x2486849d7666d76e },
+ .tag_gcm_256 = { 0xb7e416c8a716cb4d, 0xc7abe0d755b48845 },
+ .tag_gmac_128 = { 0xad83725394d4a36b, 0x5fdd42e941cad49b },
+ .tag_gmac_256 = { 0xbb0b73609b90f7eb, 0xe4d382b8b9b7d43e },
+ },
+ {
+ .n_bytes = 320,
+ .tag_gcm_128 = { 0x064cfe34b7d9f89c, 0xb6c7263f66c89b47 },
+ .tag_gcm_256 = { 0x1254c9ae84d8ff50, 0x9faeab423099dc9a },
+ .tag_gmac_128 = { 0xd91d60ce71d24533, 0xb1cdfd3b3200b171 },
+ .tag_gmac_256 = { 0x921de9e3d353559c, 0x3509d2775817a1de },
+ },
+ {
+ .n_bytes = 383,
+ .tag_gcm_128 = { 0x14788c7531d682e1, 0x8af79effe807a4dc },
+ .tag_gcm_256 = { 0x947754a0844b4a4d, 0x9eb3849d93d5048e },
+ .tag_gmac_128 = { 0xfa84d3a18ea6f895, 0x9a45c729797a8ac4 },
+ .tag_gmac_256 = { 0xe8e61e134e40359a, 0xe8e404d4b523607c },
+ },
+ {
+ .n_bytes = 384,
+ .tag_gcm_128 = { 0xfba3fcfd9022e9a7, 0x257ba59f12055d70 },
+ .tag_gcm_256 = { 0x7c6ca4e7fba2bc35, 0x1c590be09b3d549b },
+ .tag_gmac_128 = { 0x4ca0f087d812e48f, 0xd1d39c4338d57a04 },
+ .tag_gmac_256 = { 0xb0a2257cdec364c7, 0x6a4308976fda4e5d },
+ },
+ {
+ .n_bytes = 447,
+ .tag_gcm_128 = { 0x8fde1490c60f09bf, 0xd2932f04c202c5e4 },
+ .tag_gcm_256 = { 0x1845a80cbdcf2e62, 0xc7c49c9864bca732 },
+ .tag_gmac_128 = { 0x35aa90d2deb41b9c, 0x516ab85a3f17b71e },
+ .tag_gmac_256 = { 0x1db78f8b7b34d9e7, 0xd168177351e601fe },
+ },
+ {
+ .n_bytes = 448,
+ .tag_gcm_128 = { 0xd0a7b75f734a1a7c, 0xc7689b7c571a09bf },
+ .tag_gcm_256 = { 0xef3a9118c347118d, 0x282a7736060d7bb5 },
+ .tag_gmac_128 = { 0xce2dab9fede53934, 0x27f3d2bb2af9dd2e },
+ .tag_gmac_256 = { 0xca3b0cba7b772549, 0x3104ded0d6df7123 },
+ },
+ {
+ .n_bytes = 511,
+ .tag_gcm_128 = { 0x6fb5d366fa97b2d2, 0xed2d955fcc78e556 },
+ .tag_gcm_256 = { 0xc2bc52eca9348b7c, 0x0ec18a2eb637446f },
+ .tag_gmac_128 = { 0xe3012a4897edd5b5, 0xfe18c3ec617a7e88 },
+ .tag_gmac_256 = { 0x00e050eecf184591, 0xba24484f84867f4f },
+ },
+ {
+ .n_bytes = 512,
+ .tag_gcm_128 = { 0x25138f7fe88b54bd, 0xcc078b619c0e83a2 },
+ .tag_gcm_256 = { 0x63313c5ebe68fa92, 0xccc78784896cdcc3 },
+ .tag_gmac_128 = { 0xc688fe54c5595ec0, 0x5b8a687343c3ef03 },
+ .tag_gmac_256 = { 0x807c9f8e1c198242, 0xb1e0befc0b9b8193 },
+ },
+ {
+ .n_bytes = 575,
+ .tag_gcm_128 = { 0x0ce8e0b7332a7076, 0xe4aa7ab60dd0946a },
+ .tag_gcm_256 = { 0x585cff3cf78504d4, 0x45f3a9532ea40e8b },
+ .tag_gmac_128 = { 0xc06ca34dbad542b4, 0x840508722ff031dc },
+ .tag_gmac_256 = { 0xa46e22748f195488, 0x43817a5d4d17408a },
+ },
+ {
+ .n_bytes = 576,
+ .tag_gcm_128 = { 0x45360be81e8323bd, 0x10892d9804b75bb5 },
+ .tag_gcm_256 = { 0x66208ae5d809036e, 0x603d0af49475de88 },
+ .tag_gmac_128 = { 0xb4f2b1d05fd3a4ec, 0x6a15b7a05c3a5436 },
+ .tag_gmac_256 = { 0x8d78b8f7c7daf6ff, 0x925b2a92acb7356a },
+ },
+ {
+ .n_bytes = 577,
+ .tag_gcm_128 = { 0xc7e5cd17251fd138, 0xecfb0e05110303df },
+ .tag_gcm_256 = { 0x2939d12c85ea8cf8, 0xea063fba37c92eb5 },
+ .tag_gmac_128 = { 0x1fa02b370bec64a0, 0x8c759ca95a8cea85 },
+ .tag_gmac_256 = { 0x6a602c2b1fff6617, 0x17e06d829bd24a8d },
+ },
+ {
+ .n_bytes = 639,
+ .tag_gcm_128 = { 0xc679ef7a01e8f14c, 0x281e3b9a9f715cb9 },
+ .tag_gcm_256 = { 0x13abd2d67e162f98, 0xf637d467046af949 },
+ .tag_gmac_128 = { 0x05037392550b7ae2, 0x5095b4629ba46d40 },
+ .tag_gmac_256 = { 0xd8e8045772299aa7, 0x564d72fb58ea9808 },
+ },
+ {
+ .n_bytes = 640,
+ .tag_gcm_128 = { 0xff1a2c922cdd1336, 0xcaa02eab8691bf51 },
+ .tag_gcm_256 = { 0xd57e16f169d79da5, 0x3e2b47264f8efe9c },
+ .tag_gmac_128 = { 0xb32750b403bf66f8, 0x1b03ef08da0b9d80 },
+ .tag_gmac_256 = { 0x80ac3f38e2aacbfa, 0xd4ea7eb88213b629 },
+ },
+ {
+ .n_bytes = 703,
+ .tag_gcm_128 = { 0xefd0804f0155b8f1, 0xb1849ed867269569 },
+ .tag_gcm_256 = { 0xf66c5ecbd1a06fa4, 0x55ef36f3fdbe763a },
+ .tag_gmac_128 = { 0x725813463d977e5b, 0xd52aaabb923cfabb },
+ .tag_gmac_256 = { 0x4add8f86736adc52, 0xf6dabb4596975fd7 },
+ },
+ {
+ .n_bytes = 704,
+ .tag_gcm_128 = { 0x583b29260ea8e49f, 0xfaa93b0db98f9274 },
+ .tag_gcm_256 = { 0x0b777f2cd9e2f0ef, 0x01510fc85a99382e },
+ .tag_gmac_128 = { 0x89df280b0ec65cf3, 0xa3b3c05a87d2908b },
+ .tag_gmac_256 = { 0x9d510cb7732920fc, 0x16b672e611ae2f0a },
+ },
+ {
+ .n_bytes = 767,
+ .tag_gcm_128 = { 0x671ec58ab6d4a210, 0x0845fbe603169eff },
+ .tag_gcm_256 = { 0xb3913f7eb9bbdbbb, 0x4cb17aa290f6ab11 },
+ .tag_gmac_128 = { 0x3036046580a81443, 0xe18d34bb706e632b },
+ .tag_gmac_256 = { 0x4e82bc959349466c, 0x01210641d62bbdda },
+ },
+ {
+ .n_bytes = 768,
+ .tag_gcm_128 = { 0x66993b5de915fc6e, 0x4aaf0b8441040267 },
+ .tag_gcm_256 = { 0x958ed0a6c1bf11e0, 0xc29d9f4a8ce8bdc6 },
+ .tag_gmac_128 = { 0x02674435b179fddc, 0xe016a6a0540bb9be },
+ .tag_gmac_256 = { 0xf562c523b24bf164, 0x257cb21a7b602579 },
+ },
+ {
+ .n_bytes = 831,
+ .tag_gcm_128 = { 0x4914f7980699f93c, 0xc2e44fdba6a839e7 },
+ .tag_gcm_256 = { 0xa8fab43ecd572a25, 0x3cd465e491195b81 },
+ .tag_gmac_128 = { 0xa6d725516e956d5d, 0x630768e80ac3de3d },
+ .tag_gmac_256 = { 0xb4746cdde367c9e2, 0x3ea53280901a0375 },
+ },
+ {
+ .n_bytes = 832,
+ .tag_gcm_128 = { 0xac9a519f06fb8c70, 0xdc1a6544ed2cfcf7 },
+ .tag_gcm_256 = { 0x54877a7ccd02c592, 0x1a09a4474d903b56 },
+ .tag_gmac_128 = { 0xd24937cc8b938b05, 0x8d17d73a7909bbd7 },
+ .tag_gmac_256 = { 0x9d62f65eaba46b95, 0xef7f624f71ba7695 },
+ },
+ {
+ .n_bytes = 895,
+ .tag_gcm_128 = { 0x3d365bf4d44c1071, 0x07ac3129079f2013 },
+ .tag_gcm_256 = { 0x608543d4fe6526a1, 0xc78a987b87c8d96c },
+ .tag_gmac_128 = { 0xc71cf903f7a557c5, 0x06788583ad2122a5 },
+ .tag_gmac_256 = { 0x7cdaa511565b289a, 0xf818a4c85a8bd575 },
+ },
+ {
+ .n_bytes = 896,
+ .tag_gcm_128 = { 0x97000fafd1359a0b, 0xfc226d534866b495 },
+ .tag_gcm_256 = { 0x1850ee7af3133326, 0xf198d539eee4b1f5 },
+ .tag_gmac_128 = { 0x7138da25a1114bdf, 0x4deedee9ec8ed265 },
+ .tag_gmac_256 = { 0x249e9e7ec6d879c7, 0x7abfa88b8072fb54 },
+ },
+ {
+ .n_bytes = 959,
+ .tag_gcm_128 = { 0x17200025564902f2, 0x3f2c3b711ba4086d },
+ .tag_gcm_256 = { 0x3d0bf3e8b24e296d, 0x42fe0f54e33deb6d },
+ .tag_gmac_128 = { 0x8baae9b6f3bd797a, 0x177e0b6c577f2436 },
+ .tag_gmac_256 = { 0x853f961c965f472c, 0x8adc4113b3cf933a },
+ },
+ {
+ .n_bytes = 960,
+ .tag_gcm_128 = { 0x2a30ca7325e7a81b, 0xacbc71832bdceb63 },
+ .tag_gcm_256 = { 0x037786319dc22ed7, 0x6730acf359ec3b6e },
+ .tag_gmac_128 = { 0x702dd2fbc0ec5bd2, 0x61e7618d42914e06 },
+ .tag_gmac_256 = { 0x52b3152d961cbb82, 0x6ab088b034f6e3e7 },
+ },
+ {
+ .n_bytes = 1023,
+ .tag_gcm_128 = { 0x8e8789e6c4c90855, 0x4ec5503d7f953df6 },
+ .tag_gcm_256 = { 0xdb0afebe6c085f53, 0x4eb6f07b63b8a020 },
+ .tag_gmac_128 = { 0x6e9b48e5ad508180, 0xdc86430db2bad514 },
+ .tag_gmac_256 = { 0xbb52b4fbf236b741, 0x47ae63bc836dfba3 },
+ },
+ {
+ .n_bytes = 1024,
+ .tag_gcm_128 = { 0x94e1ccbea0f24089, 0xf51b53b600363bd2 },
+ .tag_gcm_256 = { 0x70f3eb3d562f0b34, 0xffd09e1a25d5bef3 },
+ .tag_gmac_128 = { 0x65a2b560392ecee3, 0x30079a9a9dbbd3a3 },
+ .tag_gmac_256 = { 0x4d361736c43090e6, 0x135810df49dcc981 },
+ },
+ {
+ .n_bytes = 1025,
+ .tag_gcm_128 = { 0x830a99737df5a71a, 0xd9ea6e87c63d3aae },
+ .tag_gcm_256 = { 0xa3fc30e0254a5ee2, 0x52e59adc9a75be40 },
+ .tag_gmac_128 = { 0xb217556427fc09ab, 0xc32fd72ec886730d },
+ .tag_gmac_256 = { 0xeab5a9a02cb0869e, 0xd59e51684bc2839c },
+ },
+ {
+ .n_bytes = 1039,
+ .tag_gcm_128 = { 0x238f229130e92934, 0x52752fc860bca067 },
+ .tag_gcm_256 = { 0xae2754bcaed68191, 0xe0770d1e9a7a67f3 },
+ .tag_gmac_128 = { 0xe030ad2beb01d85d, 0xf10c78b1b64c27af },
+ .tag_gmac_256 = { 0x081b45e126248e85, 0xca0789f30e1c47a1 },
+ },
+ {
+ .n_bytes = 1040,
+ .tag_gcm_128 = { 0x4eebcf7391d66c6f, 0x107d8bef4a93d9c6 },
+ .tag_gcm_256 = { 0xbeb02ae5466964f3, 0x8eb90364c5f9e4cb },
+ .tag_gmac_128 = { 0x451deb85fbf27da5, 0xe47e8c91106dadda },
+ .tag_gmac_256 = { 0x85f0a72f3497699d, 0xe6fce0193cc6c9d1 },
+ },
+ {
+ .n_bytes = 1041,
+ .tag_gcm_128 = { 0xbbddfb0304411d71, 0xe573f63553d7ede4 },
+ .tag_gcm_256 = { 0x68e42d2959af0b24, 0x35ac8e73c749e7f4 },
+ .tag_gmac_128 = { 0x98d022b9896b68f8, 0x98dfde2a17b2869b },
+ .tag_gmac_256 = { 0xb8dac6add35d0d9b, 0x1c55973c6dd769af },
+ },
+ {
+ .n_bytes = 1536,
+ .tag_gcm_128 = { 0x7d8933fd922418bd, 0xc88c2f289c5d3d83 },
+ .tag_gcm_256 = { 0x966c103eb6ee69f2, 0x2f6b070b5c0fc66f },
+ .tag_gmac_128 = { 0x3b70f6154246e758, 0xd485c0edf236b6e2 },
+ .tag_gmac_256 = { 0xfefe1832387b9768, 0xc876712098256ca3 },
+ },
+ {
+ .n_bytes = 2047,
+ .tag_gcm_128 = { 0x15c6bbcb0d835fd4, 0xc33afd1328c1deb1 },
+ .tag_gcm_256 = { 0xcde3edeea228ada6, 0x8276721a8662e708 },
+ .tag_gmac_128 = { 0xb556b0e42419759e, 0x23b0365cf956a3ad },
+ .tag_gmac_256 = { 0x8df762cbbe4b2a04, 0x6841bc61e5702419 },
+ },
+ {
+ .n_bytes = 2048,
+ .tag_gcm_128 = { 0xc5ddbeb8765e3aac, 0x1bad7349fd9f2b50 },
+ .tag_gcm_256 = { 0xa2a623dde251a98d, 0xaf905fbd16f6a7d9 },
+ .tag_gmac_128 = { 0xe20f1e533df2b3d0, 0x5d170bdbcc278a63 },
+ .tag_gmac_256 = { 0x9663185c4342cd4a, 0x82d3c5a3a4998fc6 },
+ },
+ {
+ .n_bytes = 2064,
+ .tag_gcm_128 = { 0x12b76ea0a6ee9cbc, 0xdaecfae7c815aa58 },
+ .tag_gcm_256 = { 0xb5bb2f76028713dd, 0xc8f3a1448b3bd050 },
+ .tag_gmac_128 = { 0x019445c168c42f9b, 0xdf33e251bd9a27fe },
+ .tag_gmac_256 = { 0xbbabd0cefc4d6a42, 0xb138675ca66ba54f },
+ },
+ {
+ .n_bytes = 2065,
+ .tag_gcm_128 = { 0x8758c5168ffc3fd7, 0x554f1df7cfa3b976 },
+ .tag_gcm_256 = { 0xc9808cf0fd21aede, 0xe26921f3fd308006 },
+ .tag_gmac_128 = { 0x44a57e7a32031596, 0x75476d5542faa57b },
+ .tag_gmac_256 = { 0xea0e81807fa79a4a, 0x889cca80746fb8d5 },
+ },
+ {
+ .n_bytes = 4095,
+ .tag_gcm_128 = { 0x06db87757f541dc9, 0x823c619c6b88ef80 },
+ .tag_gcm_256 = { 0xdf0861a56a7fe7b0, 0xe077a5c735cc21b2 },
+ .tag_gmac_128 = { 0x43cb482bea0449e9, 0x70d668af983c9a6c },
+ .tag_gmac_256 = { 0x5fc304ad7be1d19a, 0x81bf2f4111de0b06 },
+ },
+ {
+ .n_bytes = 4096,
+ .tag_gcm_128 = { 0xe4afdad642876152, 0xf78cfcfcb92520b6 },
+ .tag_gcm_256 = { 0x7552cda8d91bdab1, 0x4bf57b7567d59e89 },
+ .tag_gmac_128 = { 0xac5240f8e9c49cfc, 0x2a3c9d0999aded50 },
+ .tag_gmac_256 = { 0x9fb6cd8f10f7b6c5, 0x16e442c147869222 },
+ },
+ {
+ .n_bytes = 4112,
+ .tag_gcm_128 = { 0x2a34db8f06bcf0ee, 0x7a4a2456fa340c33 },
+ .tag_gcm_256 = { 0x4b6c0c5b5c943f5e, 0x6d1669e849ce061a },
+ .tag_gmac_128 = { 0x143bfc9ab07d9bb5, 0xf0aa7510a9039349 },
+ .tag_gmac_256 = { 0x8a97bdd033775ba0, 0x5901a5160739be25 },
+ },
+ {
+ .n_bytes = 4113,
+ .tag_gcm_128 = { 0x296acfcbcbf529af, 0xe3e2cfb1bc5855c8 },
+ .tag_gcm_256 = { 0x181f6f9068ea477e, 0x1e05bfd01ee3e173 },
+ .tag_gmac_128 = { 0x0d81fcb0829e3c8b, 0x68016225b5fa7745 },
+ .tag_gmac_256 = { 0xa2421ac50d65c6b5, 0x84bd16fa55486af8 },
+ },
+ {
+ .n_bytes = 16382,
+ .tag_gcm_128 = { 0xd39fd367e00a103d, 0xf873a278b32d207f },
+ .tag_gcm_256 = { 0xa8da09a851ae6c88, 0x2ef17f0da7f191f1 },
+ .tag_gmac_128 = { 0xd4a22896f44c1c14, 0x69a5d02715c90ea4 },
+ .tag_gmac_256 = { 0x64788ca5e11722b6, 0x63d74a4b24538762 },
+ },
+ {
+ .n_bytes = 16383,
+ .tag_gcm_128 = { 0x2162b91aad49eebc, 0x28c7efe93e639c75 },
+ .tag_gcm_256 = { 0xc5baee5e40004087, 0xf6b26211facc66a5 },
+ .tag_gmac_128 = { 0x3ec003d690d3d846, 0x204baef851d8ad7d },
+ .tag_gmac_256 = { 0xdb51d6f5dddf16bb, 0x529f3825cf78dbd5 },
+ },
+ {
+ .n_bytes = 16384,
+ .tag_gcm_128 = { 0x2272e778c4c5c9ef, 0x84c50021e75ddbab },
+ .tag_gcm_256 = { 0x6c32f1c5666b1f4c, 0x91142a86ae5241b2 },
+ .tag_gmac_128 = { 0x43dadd5ecee9674b, 0xa30fea9ae8091c6c },
+ .tag_gmac_256 = { 0xc360b76ac1887181, 0xcb732f29ea86edeb },
+ },
+ {
+ .n_bytes = 16385,
+ .tag_gcm_128 = { 0xe2a47837578b4056, 0xf96e7233cbeb1ce1 },
+ .tag_gcm_256 = { 0xfa3aa4ebe36fb390, 0x6a2cf1671f4f1a01 },
+ .tag_gmac_128 = { 0xfd0b7312c4975687, 0xdd3096b1c850e80a },
+ .tag_gmac_256 = { 0xaf2cae4642a5536a, 0xb27aff5cc8bd354c },
+ },
+ {
+ .n_bytes = 16386,
+ .tag_gcm_128 = { 0xe1b4c0e5825304ae, 0x48c5dd82aa114320 },
+ .tag_gcm_256 = { 0x76c3612118f47fa8, 0xdd0a47b132ecad3a },
+ .tag_gmac_128 = { 0x346bc841a7f5b642, 0x6fb1b96391c66b40 },
+ .tag_gmac_256 = { 0x2f1a1b6a000e18b2, 0xf7cba25e02551d43 },
+ },
+};
+
+#define MAX_TEST_DATA_LEN 32768
+
+static const struct
+{
+ char *name;
+ const u8 *pt, *key128, *key256, *ct128, *ct256, *tag128, *tag256, *aad, *iv;
+ u32 data_len, tag128_len, tag256_len, aad_len;
+} test_cases[] = {
+ /* test cases */
+ {
+ .name = "GCM Spec. TC1",
+ .iv = tc1_iv,
+ .key128 = tc1_key128,
+ .key256 = tc1_key256,
+ .tag128 = tc1_tag128,
+ .tag128_len = sizeof (tc1_tag128),
+ .tag256 = tc1_tag256,
+ .tag256_len = sizeof (tc1_tag256),
+ },
+ {
+ .name = "GCM Spec. TC2",
+ .pt = tc2_plaintext,
+ .data_len = sizeof (tc2_plaintext),
+ .iv = tc1_iv,
+ .key128 = tc1_key128,
+ .key256 = tc1_key256,
+ .ct128 = tc2_ciphertext128,
+ .ct256 = tc2_ciphertext256,
+ .tag128 = tc2_tag128,
+ .tag128_len = sizeof (tc2_tag128),
+ .tag256 = tc2_tag256,
+ .tag256_len = sizeof (tc2_tag256),
+ },
+ {
+ .name = "GCM Spec. TC3",
+ .pt = tc3_plaintext,
+ .data_len = sizeof (tc3_plaintext),
+ .iv = tc3_iv,
+ .key128 = tc3_key128,
+ .key256 = tc3_key256,
+ .ct128 = tc3_ciphertext128,
+ .ct256 = tc3_ciphertext256,
+ .tag128 = tc3_tag128,
+ .tag128_len = sizeof (tc3_tag128),
+ .tag256 = tc3_tag256,
+ .tag256_len = sizeof (tc3_tag256),
+ },
+ {
+ .name = "GCM Spec. TC4",
+ .pt = tc4_plaintext,
+ .data_len = sizeof (tc4_plaintext),
+ .aad = tc4_aad,
+ .aad_len = sizeof (tc4_aad),
+ .iv = tc3_iv,
+ .key128 = tc3_key128,
+ .key256 = tc3_key256,
+ .ct128 = tc4_ciphertext128,
+ .ct256 = tc4_ciphertext256,
+ .tag128 = tc4_tag128,
+ .tag128_len = sizeof (tc4_tag128),
+ .tag256 = tc4_tag256,
+ .tag256_len = sizeof (tc4_tag256),
+ }
+};
+
+#define perftest_aesXXX_enc_var_sz(a) \
+ void __test_perf_fn perftest_aes##a##_enc_var_sz (test_perf_t *tp) \
+ { \
+ u32 n = tp->n_ops; \
+ aes_gcm_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \
+ u8 *dst = test_mem_alloc (n + 16); \
+ u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \
+ u8 *tag = test_mem_alloc (16); \
+ u8 *key = test_mem_alloc_and_fill_inc_u8 (32, 192, 0); \
+ u8 *iv = test_mem_alloc_and_fill_inc_u8 (16, 128, 0); \
+ \
+ clib_aes_gcm_key_expand (kd, key, AES_KEY_##a); \
+ \
+ test_perf_event_enable (tp); \
+ clib_aes##a##_gcm_enc (kd, src, n, 0, 0, iv, 16, dst, tag); \
+ test_perf_event_disable (tp); \
+ }
+
+#define perftest_aesXXX_dec_var_sz(a) \
+ void __test_perf_fn perftest_aes##a##_dec_var_sz (test_perf_t *tp) \
+ { \
+ u32 n = tp->n_ops; \
+ aes_gcm_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \
+ u8 *dst = test_mem_alloc (n + 16); \
+ u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \
+ u8 *tag = test_mem_alloc (16); \
+ u8 *key = test_mem_alloc_and_fill_inc_u8 (32, 192, 0); \
+ u8 *iv = test_mem_alloc_and_fill_inc_u8 (16, 128, 0); \
+ int *rv = test_mem_alloc (16); \
+ \
+ clib_aes_gcm_key_expand (kd, key, AES_KEY_##a); \
+ \
+ test_perf_event_enable (tp); \
+ rv[0] = clib_aes##a##_gcm_dec (kd, src, n, 0, 0, iv, tag, 16, dst); \
+ test_perf_event_disable (tp); \
+ }
+
+static clib_error_t *
+test_clib_aes128_gcm_enc (clib_error_t *err)
+{
+ aes_gcm_key_data_t kd;
+ u8 pt[MAX_TEST_DATA_LEN];
+ u8 ct[MAX_TEST_DATA_LEN];
+ u8 tag[16];
+
+ FOREACH_ARRAY_ELT (tc, test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key128, AES_KEY_128);
+ clib_aes128_gcm_enc (&kd, tc->pt, tc->data_len, tc->aad, tc->aad_len,
+ tc->iv, tc->tag128_len, ct, tag);
+
+ if (memcmp (tc->tag128, tag, tc->tag128_len) != 0)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+
+ if (tc->data_len && memcmp (tc->ct128, ct, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (pt); i++)
+ pt[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128);
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ clib_aes128_gcm_enc (&kd, pt, tc->n_bytes, 0, 0, inc_iv, 16, ct, tag);
+
+ if (memcmp (tc->tag_gcm_128, tag, 16) != 0)
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_enc_var_sz (128);
+
+REGISTER_TEST (clib_aes128_gcm_enc) = {
+ .name = "clib_aes128_gcm_enc",
+ .fn = test_clib_aes128_gcm_enc,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes128_enc_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes128_enc_var_sz }),
+};
+
+static clib_error_t *
+test_clib_aes256_gcm_enc (clib_error_t *err)
+{
+ aes_gcm_key_data_t kd;
+ u8 pt[MAX_TEST_DATA_LEN];
+ u8 ct[MAX_TEST_DATA_LEN];
+ u8 tag[16];
+
+ FOREACH_ARRAY_ELT (tc, test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key256, AES_KEY_256);
+ clib_aes256_gcm_enc (&kd, tc->pt, tc->data_len, tc->aad, tc->aad_len,
+ tc->iv, tc->tag256_len, ct, tag);
+
+ if (memcmp (tc->tag256, tag, tc->tag256_len) != 0)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+
+ if (tc->data_len && memcmp (tc->ct256, ct, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (pt); i++)
+ pt[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_256);
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ clib_aes256_gcm_enc (&kd, pt, tc->n_bytes, 0, 0, inc_iv, 16, ct, tag);
+
+ if (memcmp (tc->tag_gcm_256, tag, 16) != 0)
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_enc_var_sz (256);
+REGISTER_TEST (clib_aes256_gcm_enc) = {
+ .name = "clib_aes256_gcm_enc",
+ .fn = test_clib_aes256_gcm_enc,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes256_enc_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes256_enc_var_sz }),
+};
+
+static clib_error_t *
+test_clib_aes128_gcm_dec (clib_error_t *err)
+{
+ aes_gcm_key_data_t kd;
+ u8 pt[MAX_TEST_DATA_LEN];
+ u8 ct[MAX_TEST_DATA_LEN];
+ u8 tag[16];
+ int rv;
+
+ FOREACH_ARRAY_ELT (tc, test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key128, AES_KEY_128);
+ rv = clib_aes128_gcm_dec (&kd, tc->ct128, tc->data_len, tc->aad,
+ tc->aad_len, tc->iv, tc->tag128,
+ tc->tag128_len, pt);
+
+ if (!rv)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+
+ if (tc->data_len && memcmp (tc->pt, pt, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (pt); i++)
+ pt[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128);
+ clib_aes128_gcm_enc (&kd, pt, sizeof (ct), 0, 0, inc_iv, 16, ct, tag);
+
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ if (!clib_aes128_gcm_dec (&kd, ct, tc->n_bytes, 0, 0, inc_iv,
+ (u8 *) tc->tag_gcm_128, 16, pt))
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_dec_var_sz (128);
+
+REGISTER_TEST (clib_aes128_gcm_dec) = {
+ .name = "clib_aes128_gcm_dec",
+ .fn = test_clib_aes128_gcm_dec,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes128_dec_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes128_dec_var_sz }),
+};
+
+static clib_error_t *
+test_clib_aes256_gcm_dec (clib_error_t *err)
+{
+ aes_gcm_key_data_t kd;
+ u8 pt[MAX_TEST_DATA_LEN];
+ u8 ct[MAX_TEST_DATA_LEN];
+ u8 tag[16];
+ int rv;
+
+ FOREACH_ARRAY_ELT (tc, test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key256, AES_KEY_256);
+ rv = clib_aes256_gcm_dec (&kd, tc->ct256, tc->data_len, tc->aad,
+ tc->aad_len, tc->iv, tc->tag256,
+ tc->tag256_len, pt);
+
+ if (!rv)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+
+ if (tc->data_len && memcmp (tc->pt, pt, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (pt); i++)
+ pt[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128);
+ clib_aes128_gcm_enc (&kd, pt, sizeof (ct), 0, 0, inc_iv, 16, ct, tag);
+
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ if (!clib_aes128_gcm_dec (&kd, ct, tc->n_bytes, 0, 0, inc_iv,
+ (u8 *) tc->tag_gcm_128, 16, pt))
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_dec_var_sz (256);
+REGISTER_TEST (clib_aes256_gcm_dec) = {
+ .name = "clib_aes256_gcm_dec",
+ .fn = test_clib_aes256_gcm_dec,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes256_dec_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes256_dec_var_sz }),
+};
+
+static const u8 gmac1_key[] = {
+ 0x77, 0xbe, 0x63, 0x70, 0x89, 0x71, 0xc4, 0xe2,
+ 0x40, 0xd1, 0xcb, 0x79, 0xe8, 0xd7, 0x7f, 0xeb
+};
+static const u8 gmac1_iv[] = { 0xe0, 0xe0, 0x0f, 0x19, 0xfe, 0xd7,
+ 0xba, 0x01, 0x36, 0xa7, 0x97, 0xf3 };
+static const u8 gmac1_aad[] = {
+ 0x7a, 0x43, 0xec, 0x1d, 0x9c, 0x0a, 0x5a, 0x78,
+ 0xa0, 0xb1, 0x65, 0x33, 0xa6, 0x21, 0x3c, 0xab
+};
+static const u8 gmac1_tag[] = {
+ 0x20, 0x9f, 0xcc, 0x8d, 0x36, 0x75, 0xed, 0x93,
+ 0x8e, 0x9c, 0x71, 0x66, 0x70, 0x9d, 0xd9, 0x46
+};
+
+static const u8 gmac2_key[] = {
+ 0x20, 0xb5, 0xb6, 0xb8, 0x54, 0xe1, 0x87, 0xb0,
+ 0x58, 0xa8, 0x4d, 0x57, 0xbc, 0x15, 0x38, 0xb6
+};
+
+static const u8 gmac2_iv[] = { 0x94, 0xc1, 0x93, 0x5a, 0xfc, 0x06,
+ 0x1c, 0xbf, 0x25, 0x4b, 0x93, 0x6f };
+
+static const u8 gmac2_aad[] = {
+ 0xca, 0x41, 0x8e, 0x71, 0xdb, 0xf8, 0x10, 0x03, 0x81, 0x74, 0xea, 0xa3, 0x71,
+ 0x9b, 0x3f, 0xcb, 0x80, 0x53, 0x1c, 0x71, 0x10, 0xad, 0x91, 0x92, 0xd1, 0x05,
+ 0xee, 0xaa, 0xfa, 0x15, 0xb8, 0x19, 0xac, 0x00, 0x56, 0x68, 0x75, 0x2b, 0x34,
+ 0x4e, 0xd1, 0xb2, 0x2f, 0xaf, 0x77, 0x04, 0x8b, 0xaf, 0x03, 0xdb, 0xdd, 0xb3,
+ 0xb4, 0x7d, 0x6b, 0x00, 0xe9, 0x5c, 0x4f, 0x00, 0x5e, 0x0c, 0xc9, 0xb7, 0x62,
+ 0x7c, 0xca, 0xfd, 0x3f, 0x21, 0xb3, 0x31, 0x2a, 0xa8, 0xd9, 0x1d, 0x3f, 0xa0,
+ 0x89, 0x3f, 0xe5, 0xbf, 0xf7, 0xd4, 0x4c, 0xa4, 0x6f, 0x23, 0xaf, 0xe0
+};
+
+static const u8 gmac2_tag[] = {
+ 0xb3, 0x72, 0x86, 0xeb, 0xaf, 0x4a, 0x54, 0xe0,
+ 0xff, 0xc2, 0xa1, 0xde, 0xaf, 0xc9, 0xf6, 0xdb
+};
+
+static const struct
+{
+ char *name;
+ const u8 *key128, *key256, *tag128, *tag256, *aad, *iv;
+ u32 tag128_len, tag256_len, aad_len;
+} gmac_test_cases[] = {
+ /* test cases */
+ {
+ .name = "GMAC1",
+ .iv = gmac1_iv,
+ .key128 = gmac1_key,
+ .tag128 = gmac1_tag,
+ .tag128_len = sizeof (gmac1_tag),
+ .aad = gmac1_aad,
+ .aad_len = sizeof (gmac1_aad),
+ },
+ {
+ .name = "GMAC2",
+ .iv = gmac2_iv,
+ .key128 = gmac2_key,
+ .tag128 = gmac2_tag,
+ .tag128_len = sizeof (gmac2_tag),
+ .aad = gmac2_aad,
+ .aad_len = sizeof (gmac2_aad),
+ },
+};
+
+static clib_error_t *
+test_clib_aes128_gmac (clib_error_t *err)
+{
+ u8 data[MAX_TEST_DATA_LEN];
+ aes_gcm_key_data_t kd;
+ u8 tag[16];
+
+ FOREACH_ARRAY_ELT (tc, gmac_test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key128, AES_KEY_128);
+ clib_aes128_gmac (&kd, tc->aad, tc->aad_len, tc->iv, tc->tag128_len,
+ tag);
+
+ if (memcmp (tc->tag128, tag, tc->tag128_len) != 0)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (data); i++)
+ data[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128);
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ clib_aes128_gmac (&kd, data, tc->n_bytes, inc_iv, 16, tag);
+
+ if (memcmp (tc->tag_gmac_128, tag, 16) != 0)
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+void __test_perf_fn
+perftest_gmac256_fixed_512byte (test_perf_t *tp)
+{
+ uword n = tp->n_ops;
+ aes_gcm_key_data_t *kd = test_mem_alloc (sizeof (aes_gcm_key_data_t));
+ u8 *ivs = test_mem_alloc_and_fill_inc_u8 (n * 12, 0, 0);
+ u8 *tags = test_mem_alloc_and_fill_inc_u8 (8 + n * 16, 0, 0);
+ u8 *data = test_mem_alloc_and_fill_inc_u8 (512, 0, 0);
+
+ test_perf_event_enable (tp);
+ clib_aes_gcm_key_expand (kd, inc_key, AES_KEY_128);
+
+ for (int i = 0; i < n; i++)
+ clib_aes128_gmac (kd, data, 512, ivs + n * 12, 16, tags + n * 16);
+ test_perf_event_disable (tp);
+}
+
+REGISTER_TEST (clib_aes128_gmac) = {
+ .name = "clib_aes128_gmac",
+ .fn = test_clib_aes128_gmac,
+ .perf_tests = PERF_TESTS ({ .name = "fixed (512 byte)",
+ .n_ops = 256,
+ .fn = perftest_gmac256_fixed_512byte }),
+};
+
+static clib_error_t *
+test_clib_aes256_gmac (clib_error_t *err)
+{
+ u8 data[MAX_TEST_DATA_LEN];
+ aes_gcm_key_data_t kd;
+ u8 tag[16];
+
+#if 0
+ FOREACH_ARRAY_ELT (tc, gmac_test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key256, AES_KEY_256);
+ clib_aes256_gmac (&kd, tc->aad, tc->aad_len, tc->iv, tc->tag256_len,
+ tag);
+
+ if (memcmp (tc->tag256, tag, tc->tag256_len) != 0)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+ }
+#endif
+
+ for (int i = 0; i < sizeof (data); i++)
+ data[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_256);
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ clib_aes256_gmac (&kd, data, tc->n_bytes, inc_iv, 16, tag);
+
+ if (memcmp (tc->tag_gmac_256, tag, 16) != 0)
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+REGISTER_TEST (clib_aes256_gmac) = {
+ .name = "clib_aes256_gmac",
+ .fn = test_clib_aes256_gmac,
+};
+#endif
diff --git a/src/vppinfra/vector_avx2.h b/src/vppinfra/vector_avx2.h
index 80c2e39..17271b8 100644
--- a/src/vppinfra/vector_avx2.h
+++ b/src/vppinfra/vector_avx2.h
@@ -223,6 +223,16 @@
return a ^ b ^ c;
}
+static_always_inline u8x32
+u8x32_reflect_u8x16 (u8x32 x)
+{
+ static const u8x32 mask = {
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ };
+ return (u8x32) _mm256_shuffle_epi8 ((__m256i) x, (__m256i) mask);
+}
+
static_always_inline u16x16
u16x16_mask_last (u16x16 v, u8 n_last)
{
@@ -332,6 +342,11 @@
(__m256i) mask);
}
+#define u8x32_word_shift_left(a, n) \
+ (u8x32) _mm256_bslli_epi128 ((__m256i) a, n)
+#define u8x32_word_shift_right(a, n) \
+ (u8x32) _mm256_bsrli_epi128 ((__m256i) a, n)
+
#define u32x8_permute_lanes(a, b, m) \
(u32x8) _mm256_permute2x128_si256 ((__m256i) a, (__m256i) b, m)
#define u64x4_permute_lanes(a, b, m) \
@@ -407,6 +422,46 @@
return (u8x32) _mm256_broadcastsi128_si256 ((__m128i) a);
}
+static_always_inline u32x8
+u32x8_splat_u32x4 (u32x4 a)
+{
+ return (u32x8) _mm256_broadcastsi128_si256 ((__m128i) a);
+}
+
+static_always_inline u8x32
+u8x32_load_partial (u8 *data, uword n)
+{
+#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ return u8x32_mask_load_zero (data, pow2_mask (n));
+#else
+ u8x32 r = {};
+ if (n > 16)
+ {
+ r = u8x32_insert_lo (r, *(u8x16u *) data);
+ r = u8x32_insert_hi (r, u8x16_load_partial (data + 16, n - 16));
+ }
+ else
+ r = u8x32_insert_lo (r, u8x16_load_partial (data, n));
+ return r;
+#endif
+}
+
+static_always_inline void
+u8x32_store_partial (u8x32 r, u8 *data, uword n)
+{
+#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u8x32_mask_store (r, data, pow2_mask (n));
+#else
+ if (n > 16)
+ {
+ *(u8x16u *) data = u8x32_extract_lo (r);
+ u8x16_store_partial (u8x32_extract_hi (r), data + 16, n - 16);
+ }
+ else
+ u8x16_store_partial (u8x32_extract_lo (r), data, n);
+#endif
+}
+
#endif /* included_vector_avx2_h */
/*
diff --git a/src/vppinfra/vector_avx512.h b/src/vppinfra/vector_avx512.h
index eda65ca..b745b46 100644
--- a/src/vppinfra/vector_avx512.h
+++ b/src/vppinfra/vector_avx512.h
@@ -593,6 +593,18 @@
m[7] = (u64x8) _mm512_permutex2var_epi64 (x, pm4, y);
}
+static_always_inline u8x64
+u8x64_load_partial (u8 *data, uword n)
+{
+ return u8x64_mask_load_zero (data, pow2_mask (n));
+}
+
+static_always_inline void
+u8x64_store_partial (u8x64 r, u8 *data, uword n)
+{
+ u8x64_mask_store (r, data, pow2_mask (n));
+}
+
#endif /* included_vector_avx512_h */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h
index 568b689..70a7bc0 100644
--- a/src/vppinfra/vector_neon.h
+++ b/src/vppinfra/vector_neon.h
@@ -231,6 +231,61 @@
return a ^ b ^ c;
}
+static_always_inline u8x16
+u8x16_load_partial (u8 *data, uword n)
+{
+ u8x16 r = {};
+ if (n > 7)
+ {
+ u64x2 r;
+ r[1] = *(u64u *) (data + n - 8);
+ r >>= (16 - n) * 8;
+ r[0] = *(u64u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 3)
+ {
+ u32x4 r = {};
+ r[1] = *(u32u *) (data + n - 4);
+ r >>= (8 - n) * 8;
+ r[0] = *(u32u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 1)
+ {
+ u16x8 r = {};
+ r[1] = *(u16u *) (data + n - 2);
+ r >>= (4 - n) * 8;
+ r[0] = *(u16u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 0)
+ r[0] = *data;
+ return r;
+}
+
+static_always_inline void
+u8x16_store_partial (u8x16 r, u8 *data, uword n)
+{
+ if (n > 7)
+ {
+ *(u64u *) (data + n - 8) = ((u64x2) r)[1] << ((16 - n) * 8);
+ *(u64u *) data = ((u64x2) r)[0];
+ }
+ else if (n > 3)
+ {
+ *(u32u *) (data + n - 4) = ((u32x4) r)[1] << ((8 - n) * 8);
+ *(u32u *) data = ((u32x4) r)[0];
+ }
+ else if (n > 1)
+ {
+ *(u16u *) (data + n - 2) = ((u16x8) r)[1] << ((4 - n) * 8);
+ *(u16u *) data = ((u16x8) r)[0];
+ }
+ else if (n > 0)
+ data[0] = r[0];
+}
+
#define CLIB_HAVE_VEC128_MSB_MASK
#define CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE
diff --git a/src/vppinfra/vector_sse42.h b/src/vppinfra/vector_sse42.h
index 2b8927b..7c8e625 100644
--- a/src/vppinfra/vector_sse42.h
+++ b/src/vppinfra/vector_sse42.h
@@ -493,6 +493,68 @@
return a ^ b ^ c;
}
+static_always_inline u8x16
+u8x16_load_partial (u8 *data, uword n)
+{
+ u8x16 r = {};
+#if defined(CLIB_HAVE_VEC128_MASK_LOAD_STORE)
+ return u8x16_mask_load_zero (data, pow2_mask (n));
+#endif
+ if (n > 7)
+ {
+ u64x2 r;
+ r[1] = *(u64u *) (data + n - 8);
+ r >>= (16 - n) * 8;
+ r[0] = *(u64u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 3)
+ {
+ u32x4 r = {};
+ r[1] = *(u32u *) (data + n - 4);
+ r >>= (8 - n) * 8;
+ r[0] = *(u32u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 1)
+ {
+ u16x8 r = {};
+ r[1] = *(u16u *) (data + n - 2);
+ r >>= (4 - n) * 8;
+ r[0] = *(u16u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 0)
+ r[0] = *data;
+ return r;
+}
+
+static_always_inline void
+u8x16_store_partial (u8x16 r, u8 *data, uword n)
+{
+#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u8x16_mask_store (r, data, pow2_mask (n));
+#else
+ if (n > 7)
+ {
+ *(u64u *) (data + n - 8) = ((u64x2) r)[1] << ((16 - n) * 8);
+ *(u64u *) data = ((u64x2) r)[0];
+ }
+ else if (n > 3)
+ {
+ *(u32u *) (data + n - 4) = ((u32x4) r)[1] << ((8 - n) * 8);
+ *(u32u *) data = ((u32x4) r)[0];
+ }
+ else if (n > 1)
+ {
+ *(u16u *) (data + n - 2) = ((u16x8) r)[1] << ((4 - n) * 8);
+ *(u16u *) data = ((u16x8) r)[0];
+ }
+ else if (n > 0)
+ data[0] = r[0];
+#endif
+}
+
#endif /* included_vector_sse2_h */
/*