Blame - crypto/gf128mul.c - codeaurora/cp-linux

blob: 5276607c72d04ca0e72f1b74a86c2acef1962073 [file] [log] [blame]

Kyle Swenson	8d8f654	2021-03-15 11:02:55 -0600	[diff] [blame]	1	/* gf128mul.c - GF(2^128) multiplication functions
				2	*
				3	* Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
				4	* Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
				5	*
				6	* Based on Dr Brian Gladman's (GPL'd) work published at
				7	* http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
				8	* See the original copyright notice below.
				9	*
				10	* This program is free software; you can redistribute it and/or modify it
				11	* under the terms of the GNU General Public License as published by the Free
				12	* Software Foundation; either version 2 of the License, or (at your option)
				13	* any later version.
				14	*/
				15
				16	/*
				17	---------------------------------------------------------------------------
				18	Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.
				19
				20	LICENSE TERMS
				21
				22	The free distribution and use of this software in both source and binary
				23	form is allowed (with or without changes) provided that:
				24
				25	1. distributions of this source code include the above copyright
				26	notice, this list of conditions and the following disclaimer;
				27
				28	2. distributions in binary form include the above copyright
				29	notice, this list of conditions and the following disclaimer
				30	in the documentation and/or other associated materials;
				31
				32	3. the copyright holder's name is not used to endorse products
				33	built using this software without specific written permission.
				34
				35	ALTERNATIVELY, provided that this notice is retained in full, this product
				36	may be distributed under the terms of the GNU General Public License (GPL),
				37	in which case the provisions of the GPL apply INSTEAD OF those given above.
				38
				39	DISCLAIMER
				40
				41	This software is provided 'as is' with no explicit or implied warranties
				42	in respect of its properties, including, but not limited to, correctness
				43	and/or fitness for purpose.
				44	---------------------------------------------------------------------------
				45	Issue 31/01/2006
				46
				47	This file provides fast multiplication in GF(128) as required by several
				48	cryptographic authentication modes
				49	*/
				50
				51	#include <crypto/gf128mul.h>
				52	#include <linux/kernel.h>
				53	#include <linux/module.h>
				54	#include <linux/slab.h>
				55
				56	#define gf128mul_dat(q) { \
				57	q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
				58	q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
				59	q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
				60	q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
				61	q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
				62	q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
				63	q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
				64	q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
				65	q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
				66	q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
				67	q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
				68	q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
				69	q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
				70	q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
				71	q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
				72	q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
				73	q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
				74	q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
				75	q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
				76	q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
				77	q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
				78	q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
				79	q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
				80	q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
				81	q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
				82	q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
				83	q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
				84	q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
				85	q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
				86	q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
				87	q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
				88	q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
				89	}
				90
				91	/* Given the value i in 0..255 as the byte overflow when a field element
				92	in GHASH is multiplied by x^8, this function will return the values that
				93	are generated in the lo 16-bit word of the field value by applying the
				94	modular polynomial. The values lo_byte and hi_byte are returned via the
				95	macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
				96	memory as required by a suitable definition of this macro operating on
				97	the table above
				98	*/
				99
				100	#define xx(p, q) 0x##p##q
				101
				102	#define xda_bbe(i) ( \
				103	(i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \
				104	(i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \
				105	(i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \
				106	(i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \
				107	)
				108
				109	#define xda_lle(i) ( \
				110	(i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \
				111	(i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \
				112	(i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \
				113	(i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \
				114	)
				115
				116	static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
				117	static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
				118
				119	/* These functions multiply a field element by x, by x^4 and by x^8
				120	* in the polynomial field representation. It uses 32-bit word operations
				121	* to gain speed but compensates for machine endianess and hence works
				122	* correctly on both styles of machine.
				123	*/
				124
				125	static void gf128mul_x_lle(be128 r, const be128 x)
				126	{
				127	u64 a = be64_to_cpu(x->a);
				128	u64 b = be64_to_cpu(x->b);
				129	u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];
				130
				131	r->b = cpu_to_be64((b >> 1) \| (a << 63));
				132	r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
				133	}
				134
				135	static void gf128mul_x_bbe(be128 r, const be128 x)
				136	{
				137	u64 a = be64_to_cpu(x->a);
				138	u64 b = be64_to_cpu(x->b);
				139	u64 _tt = gf128mul_table_bbe[a >> 63];
				140
				141	r->a = cpu_to_be64((a << 1) \| (b >> 63));
				142	r->b = cpu_to_be64((b << 1) ^ _tt);
				143	}
				144
				145	void gf128mul_x_ble(be128 r, const be128 x)
				146	{
				147	u64 a = le64_to_cpu(x->a);
				148	u64 b = le64_to_cpu(x->b);
				149	u64 _tt = gf128mul_table_bbe[b >> 63];
				150
				151	r->a = cpu_to_le64((a << 1) ^ _tt);
				152	r->b = cpu_to_le64((b << 1) \| (a >> 63));
				153	}
				154	EXPORT_SYMBOL(gf128mul_x_ble);
				155
				156	static void gf128mul_x8_lle(be128 *x)
				157	{
				158	u64 a = be64_to_cpu(x->a);
				159	u64 b = be64_to_cpu(x->b);
				160	u64 _tt = gf128mul_table_lle[b & 0xff];
				161
				162	x->b = cpu_to_be64((b >> 8) \| (a << 56));
				163	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
				164	}
				165
				166	static void gf128mul_x8_bbe(be128 *x)
				167	{
				168	u64 a = be64_to_cpu(x->a);
				169	u64 b = be64_to_cpu(x->b);
				170	u64 _tt = gf128mul_table_bbe[a >> 56];
				171
				172	x->a = cpu_to_be64((a << 8) \| (b >> 56));
				173	x->b = cpu_to_be64((b << 8) ^ _tt);
				174	}
				175
				176	void gf128mul_lle(be128 r, const be128 b)
				177	{
				178	be128 p[8];
				179	int i;
				180
				181	p[0] = *r;
				182	for (i = 0; i < 7; ++i)
				183	gf128mul_x_lle(&p[i + 1], &p[i]);
				184
				185	memset(r, 0, sizeof(*r));
				186	for (i = 0;;) {
				187	u8 ch = ((u8 *)b)[15 - i];
				188
				189	if (ch & 0x80)
				190	be128_xor(r, r, &p[0]);
				191	if (ch & 0x40)
				192	be128_xor(r, r, &p[1]);
				193	if (ch & 0x20)
				194	be128_xor(r, r, &p[2]);
				195	if (ch & 0x10)
				196	be128_xor(r, r, &p[3]);
				197	if (ch & 0x08)
				198	be128_xor(r, r, &p[4]);
				199	if (ch & 0x04)
				200	be128_xor(r, r, &p[5]);
				201	if (ch & 0x02)
				202	be128_xor(r, r, &p[6]);
				203	if (ch & 0x01)
				204	be128_xor(r, r, &p[7]);
				205
				206	if (++i >= 16)
				207	break;
				208
				209	gf128mul_x8_lle(r);
				210	}
				211	}
				212	EXPORT_SYMBOL(gf128mul_lle);
				213
				214	void gf128mul_bbe(be128 r, const be128 b)
				215	{
				216	be128 p[8];
				217	int i;
				218
				219	p[0] = *r;
				220	for (i = 0; i < 7; ++i)
				221	gf128mul_x_bbe(&p[i + 1], &p[i]);
				222
				223	memset(r, 0, sizeof(*r));
				224	for (i = 0;;) {
				225	u8 ch = ((u8 *)b)[i];
				226
				227	if (ch & 0x80)
				228	be128_xor(r, r, &p[7]);
				229	if (ch & 0x40)
				230	be128_xor(r, r, &p[6]);
				231	if (ch & 0x20)
				232	be128_xor(r, r, &p[5]);
				233	if (ch & 0x10)
				234	be128_xor(r, r, &p[4]);
				235	if (ch & 0x08)
				236	be128_xor(r, r, &p[3]);
				237	if (ch & 0x04)
				238	be128_xor(r, r, &p[2]);
				239	if (ch & 0x02)
				240	be128_xor(r, r, &p[1]);
				241	if (ch & 0x01)
				242	be128_xor(r, r, &p[0]);
				243
				244	if (++i >= 16)
				245	break;
				246
				247	gf128mul_x8_bbe(r);
				248	}
				249	}
				250	EXPORT_SYMBOL(gf128mul_bbe);
				251
				252	/* This version uses 64k bytes of table space.
				253	A 16 byte buffer has to be multiplied by a 16 byte key
				254	value in GF(128). If we consider a GF(128) value in
				255	the buffer's lowest byte, we can construct a table of
				256	the 256 16 byte values that result from the 256 values
				257	of this byte. This requires 4096 bytes. But we also
				258	need tables for each of the 16 higher bytes in the
				259	buffer as well, which makes 64 kbytes in total.
				260	*/
				261	/* additional explanation
				262	* t[0][BYTE] contains g*BYTE
				263	* t[1][BYTE] contains gx^8BYTE
				264	* ..
				265	* t[15][BYTE] contains gx^120BYTE */
				266	struct gf128mul_64k gf128mul_init_64k_lle(const be128 g)
				267	{
				268	struct gf128mul_64k *t;
				269	int i, j, k;
				270
				271	t = kzalloc(sizeof(*t), GFP_KERNEL);
				272	if (!t)
				273	goto out;
				274
				275	for (i = 0; i < 16; i++) {
				276	t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
				277	if (!t->t[i]) {
				278	gf128mul_free_64k(t);
				279	t = NULL;
				280	goto out;
				281	}
				282	}
				283
				284	t->t[0]->t[128] = *g;
				285	for (j = 64; j > 0; j >>= 1)
				286	gf128mul_x_lle(&t->t[0]->t[j], &t->t[0]->t[j + j]);
				287
				288	for (i = 0;;) {
				289	for (j = 2; j < 256; j += j)
				290	for (k = 1; k < j; ++k)
				291	be128_xor(&t->t[i]->t[j + k],
				292	&t->t[i]->t[j], &t->t[i]->t[k]);
				293
				294	if (++i >= 16)
				295	break;
				296
				297	for (j = 128; j > 0; j >>= 1) {
				298	t->t[i]->t[j] = t->t[i - 1]->t[j];
				299	gf128mul_x8_lle(&t->t[i]->t[j]);
				300	}
				301	}
				302
				303	out:
				304	return t;
				305	}
				306	EXPORT_SYMBOL(gf128mul_init_64k_lle);
				307
				308	struct gf128mul_64k gf128mul_init_64k_bbe(const be128 g)
				309	{
				310	struct gf128mul_64k *t;
				311	int i, j, k;
				312
				313	t = kzalloc(sizeof(*t), GFP_KERNEL);
				314	if (!t)
				315	goto out;
				316
				317	for (i = 0; i < 16; i++) {
				318	t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
				319	if (!t->t[i]) {
				320	gf128mul_free_64k(t);
				321	t = NULL;
				322	goto out;
				323	}
				324	}
				325
				326	t->t[0]->t[1] = *g;
				327	for (j = 1; j <= 64; j <<= 1)
				328	gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
				329
				330	for (i = 0;;) {
				331	for (j = 2; j < 256; j += j)
				332	for (k = 1; k < j; ++k)
				333	be128_xor(&t->t[i]->t[j + k],
				334	&t->t[i]->t[j], &t->t[i]->t[k]);
				335
				336	if (++i >= 16)
				337	break;
				338
				339	for (j = 128; j > 0; j >>= 1) {
				340	t->t[i]->t[j] = t->t[i - 1]->t[j];
				341	gf128mul_x8_bbe(&t->t[i]->t[j]);
				342	}
				343	}
				344
				345	out:
				346	return t;
				347	}
				348	EXPORT_SYMBOL(gf128mul_init_64k_bbe);
				349
				350	void gf128mul_free_64k(struct gf128mul_64k *t)
				351	{
				352	int i;
				353
				354	for (i = 0; i < 16; i++)
				355	kfree(t->t[i]);
				356	kfree(t);
				357	}
				358	EXPORT_SYMBOL(gf128mul_free_64k);
				359
				360	void gf128mul_64k_lle(be128 a, struct gf128mul_64k t)
				361	{
				362	u8 ap = (u8 )a;
				363	be128 r[1];
				364	int i;
				365
				366	*r = t->t[0]->t[ap[0]];
				367	for (i = 1; i < 16; ++i)
				368	be128_xor(r, r, &t->t[i]->t[ap[i]]);
				369	a = r;
				370	}
				371	EXPORT_SYMBOL(gf128mul_64k_lle);
				372
				373	void gf128mul_64k_bbe(be128 a, struct gf128mul_64k t)
				374	{
				375	u8 ap = (u8 )a;
				376	be128 r[1];
				377	int i;
				378
				379	*r = t->t[0]->t[ap[15]];
				380	for (i = 1; i < 16; ++i)
				381	be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
				382	a = r;
				383	}
				384	EXPORT_SYMBOL(gf128mul_64k_bbe);
				385
				386	/* This version uses 4k bytes of table space.
				387	A 16 byte buffer has to be multiplied by a 16 byte key
				388	value in GF(128). If we consider a GF(128) value in a
				389	single byte, we can construct a table of the 256 16 byte
				390	values that result from the 256 values of this byte.
				391	This requires 4096 bytes. If we take the highest byte in
				392	the buffer and use this table to get the result, we then
				393	have to multiply by x^120 to get the final value. For the
				394	next highest byte the result has to be multiplied by x^112
				395	and so on. But we can do this by accumulating the result
				396	in an accumulator starting with the result for the top
				397	byte. We repeatedly multiply the accumulator value by
				398	x^8 and then add in (i.e. xor) the 16 bytes of the next
				399	lower byte in the buffer, stopping when we reach the
				400	lowest byte. This requires a 4096 byte table.
				401	*/
				402	struct gf128mul_4k gf128mul_init_4k_lle(const be128 g)
				403	{
				404	struct gf128mul_4k *t;
				405	int j, k;
				406
				407	t = kzalloc(sizeof(*t), GFP_KERNEL);
				408	if (!t)
				409	goto out;
				410
				411	t->t[128] = *g;
				412	for (j = 64; j > 0; j >>= 1)
				413	gf128mul_x_lle(&t->t[j], &t->t[j+j]);
				414
				415	for (j = 2; j < 256; j += j)
				416	for (k = 1; k < j; ++k)
				417	be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
				418
				419	out:
				420	return t;
				421	}
				422	EXPORT_SYMBOL(gf128mul_init_4k_lle);
				423
				424	struct gf128mul_4k gf128mul_init_4k_bbe(const be128 g)
				425	{
				426	struct gf128mul_4k *t;
				427	int j, k;
				428
				429	t = kzalloc(sizeof(*t), GFP_KERNEL);
				430	if (!t)
				431	goto out;
				432
				433	t->t[1] = *g;
				434	for (j = 1; j <= 64; j <<= 1)
				435	gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
				436
				437	for (j = 2; j < 256; j += j)
				438	for (k = 1; k < j; ++k)
				439	be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
				440
				441	out:
				442	return t;
				443	}
				444	EXPORT_SYMBOL(gf128mul_init_4k_bbe);
				445
				446	void gf128mul_4k_lle(be128 a, struct gf128mul_4k t)
				447	{
				448	u8 ap = (u8 )a;
				449	be128 r[1];
				450	int i = 15;
				451
				452	*r = t->t[ap[15]];
				453	while (i--) {
				454	gf128mul_x8_lle(r);
				455	be128_xor(r, r, &t->t[ap[i]]);
				456	}
				457	a = r;
				458	}
				459	EXPORT_SYMBOL(gf128mul_4k_lle);
				460
				461	void gf128mul_4k_bbe(be128 a, struct gf128mul_4k t)
				462	{
				463	u8 ap = (u8 )a;
				464	be128 r[1];
				465	int i = 0;
				466
				467	*r = t->t[ap[0]];
				468	while (++i < 16) {
				469	gf128mul_x8_bbe(r);
				470	be128_xor(r, r, &t->t[ap[i]]);
				471	}
				472	a = r;
				473	}
				474	EXPORT_SYMBOL(gf128mul_4k_bbe);
				475
				476	MODULE_LICENSE("GPL");
				477	MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");