Blame - libbb/hash_md5_sha.c - codeaurora/busybox

blob: 880ffab014ac9b57a56988099c79edd8b46f14ca [file] [log] [blame]

"Robert P. J. Day"	63fc1a9	2006-07-02 19:47:05 +0000	[diff] [blame]	1	/* vi: set sw=4 ts=4: */
Rob Landley	5cf7c2d	2006-02-21 06:44:43 +0000	[diff] [blame]	2	/*
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	3	* Utility routines.
				4	*
				5	* Copyright (C) 2010 Denys Vlasenko
				6	*
				7	* Licensed under GPLv2 or later, see file LICENSE in this source tree.
				8	*/
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	9	#include "libbb.h"
				10
Denys Vlasenko	5f68170	2022-01-01 12:21:01 +0100	[diff] [blame]	11	#define STR1(s) #s
				12	#define STR(s) STR1(s)
				13
Denys Vlasenko	b8935d0	2017-01-15 20:16:27 +0100	[diff] [blame]	14	#define NEED_SHA512 (ENABLE_SHA512SUM \|\| ENABLE_USE_BB_CRYPT_SHA)
				15
Denys Vlasenko	6472ac9	2022-02-03 14:15:20 +0100	[diff] [blame]	16	#if ENABLE_SHA1_HWACCEL \|\| ENABLE_SHA256_HWACCEL
				17	# if defined(__GNUC__) && (defined(__i386__) \|\| defined(__x86_64__))
				18	static void cpuid(unsigned eax, unsigned ebx, unsigned ecx, unsigned edx)
				19	{
				20	asm ("cpuid"
				21	: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
				22	: "0"(eax), "1"(ebx), "2"(ecx), "3"(edx)
				23	);
				24	}
				25	static smallint shaNI;
				26	void FAST_FUNC sha1_process_block64_shaNI(sha1_ctx_t *ctx);
				27	void FAST_FUNC sha256_process_block64_shaNI(sha256_ctx_t *ctx);
				28	# if defined(__i386__)
				29	struct ASM_expects_76_shaNI { char t[1 - 2*(offsetof(sha256_ctx_t, hash) != 76)]; };
				30	# endif
				31	# if defined(__x86_64__)
				32	struct ASM_expects_80_shaNI { char t[1 - 2*(offsetof(sha256_ctx_t, hash) != 80)]; };
				33	# endif
				34	# endif
				35	#endif
				36
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	37	/* gcc 4.2.1 optimizes rotr64 better with inline than with macro
				38	* (for rotX32, there is no difference). Why? My guess is that
				39	* macro requires clever common subexpression elimination heuristics
				40	* in gcc, while inline basically forces it to happen.
				41	*/
				42	//#define rotl32(x,n) (((x) << (n)) \| ((x) >> (32 - (n))))
				43	static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n)
				44	{
				45	return (x << n) \| (x >> (32 - n));
				46	}
				47	//#define rotr32(x,n) (((x) >> (n)) \| ((x) << (32 - (n))))
				48	static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n)
				49	{
				50	return (x >> n) \| (x << (32 - n));
				51	}
				52	/* rotr64 in needed for sha512 only: */
				53	//#define rotr64(x,n) (((x) >> (n)) \| ((x) << (64 - (n))))
				54	static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n)
				55	{
				56	return (x >> n) \| (x << (64 - n));
				57	}
				58
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	59	/* rotl64 only used for sha3 currently */
				60	static ALWAYS_INLINE uint64_t rotl64(uint64_t x, unsigned n)
				61	{
				62	return (x << n) \| (x >> (64 - n));
				63	}
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	64
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	65	/* Process the remaining bytes in the buffer */
				66	static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed)
				67	{
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	68	unsigned bufpos = ctx->total64 & 63;
				69	/* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */
				70	ctx->wbuffer[bufpos++] = 0x80;
				71
				72	/* This loop iterates either once or twice, no more, no less */
				73	while (1) {
				74	unsigned remaining = 64 - bufpos;
				75	memset(ctx->wbuffer + bufpos, 0, remaining);
				76	/* Do we have enough space for the length count? */
				77	if (remaining >= 8) {
				78	/* Store the 64-bit counter of bits in the buffer */
				79	uint64_t t = ctx->total64 << 3;
				80	if (swap_needed)
				81	t = bb_bswap_64(t);
				82	/* wbuffer is suitably aligned for this */
Denys Vlasenko	1f5e81f	2013-06-27 01:03:19 +0200	[diff] [blame]	83	(bb__aliased_uint64_t ) (&ctx->wbuffer[64 - 8]) = t;
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	84	}
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	85	ctx->process_block(ctx);
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	86	if (remaining >= 8)
				87	break;
				88	bufpos = 0;
				89	}
				90	}
				91
				92
				93	/*
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	94	* Compute MD5 checksum of strings according to the
				95	* definition of MD5 in RFC 1321 from April 1992.
				96	*
				97	* Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
				98	*
				99	* Copyright (C) 1995-1999 Free Software Foundation, Inc.
				100	* Copyright (C) 2001 Manuel Novoa III
				101	* Copyright (C) 2003 Glenn L. McGrath
				102	* Copyright (C) 2003 Erik Andersen
				103	*
				104	* Licensed under GPLv2 or later, see file LICENSE in this source tree.
				105	*/
				106
				107	/* 0: fastest, 3: smallest */
Denys Vlasenko	522041e	2011-09-10 13:25:57 +0200	[diff] [blame]	108	#if CONFIG_MD5_SMALL < 0
				109	# define MD5_SMALL 0
				110	#elif CONFIG_MD5_SMALL > 3
				111	# define MD5_SMALL 3
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	112	#else
Denys Vlasenko	522041e	2011-09-10 13:25:57 +0200	[diff] [blame]	113	# define MD5_SMALL CONFIG_MD5_SMALL
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	114	#endif
				115
				116	/* These are the four functions used in the four steps of the MD5 algorithm
				117	* and defined in the RFC 1321. The first function is a little bit optimized
				118	* (as found in Colin Plumbs public domain implementation).
				119	* #define FF(b, c, d) ((b & c) \| (~b & d))
				120	*/
				121	#undef FF
				122	#undef FG
				123	#undef FH
				124	#undef FI
				125	#define FF(b, c, d) (d ^ (b & (c ^ d)))
				126	#define FG(b, c, d) FF(d, b, c)
				127	#define FH(b, c, d) (b ^ c ^ d)
				128	#define FI(b, c, d) (c ^ (b \| ~d))
				129
				130	/* Hash a single block, 64 bytes long and 4-byte aligned */
				131	static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
				132	{
Denys Vlasenko	522041e	2011-09-10 13:25:57 +0200	[diff] [blame]	133	#if MD5_SMALL > 0
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	134	/* Before we start, one word to the strange constants.
				135	They are defined in RFC 1321 as
Denys Vlasenko	305958d	2015-10-07 19:17:01 +0200	[diff] [blame]	136	T[i] = (int)(2^32 * fabs(sin(i))), i=1..64
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	137	*/
Denys Vlasenko	965b795	2020-11-30 13:03:03 +0100	[diff] [blame]	138	static const uint32_t C_array[] ALIGN4 = {
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	139	/* round 1 */
				140	0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
				141	0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
				142	0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
				143	0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
				144	/* round 2 */
				145	0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
				146	0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
				147	0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
				148	0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
				149	/* round 3 */
				150	0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
				151	0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
				152	0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
				153	0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
				154	/* round 4 */
				155	0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
				156	0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
				157	0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
				158	0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
				159	};
				160	static const char P_array[] ALIGN1 = {
Denys Vlasenko	522041e	2011-09-10 13:25:57 +0200	[diff] [blame]	161	# if MD5_SMALL > 1
Denys Vlasenko	fb132e4	2010-10-29 11:46:52 +0200	[diff] [blame]	162	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	163	# endif
Denys Vlasenko	fb132e4	2010-10-29 11:46:52 +0200	[diff] [blame]	164	1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */
				165	5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */
				166	0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 /* 4 */
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	167	};
				168	#endif
				169	uint32_t words = (void) ctx->wbuffer;
				170	uint32_t A = ctx->hash[0];
				171	uint32_t B = ctx->hash[1];
				172	uint32_t C = ctx->hash[2];
				173	uint32_t D = ctx->hash[3];
				174
Denys Vlasenko	522041e	2011-09-10 13:25:57 +0200	[diff] [blame]	175	#if MD5_SMALL >= 2 /* 2 or 3 */
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	176
				177	static const char S_array[] ALIGN1 = {
				178	7, 12, 17, 22,
				179	5, 9, 14, 20,
				180	4, 11, 16, 23,
				181	6, 10, 15, 21
				182	};
				183	const uint32_t *pc;
				184	const char *pp;
				185	const char *ps;
				186	int i;
				187	uint32_t temp;
				188
Denys Vlasenko	5368fe5	2013-01-16 02:20:31 +0100	[diff] [blame]	189	if (BB_BIG_ENDIAN)
				190	for (i = 0; i < 16; i++)
				191	words[i] = SWAP_LE32(words[i]);
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	192
Denys Vlasenko	522041e	2011-09-10 13:25:57 +0200	[diff] [blame]	193	# if MD5_SMALL == 3
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	194	pc = C_array;
				195	pp = P_array;
				196	ps = S_array - 4;
				197
				198	for (i = 0; i < 64; i++) {
				199	if ((i & 0x0f) == 0)
				200	ps += 4;
				201	temp = A;
				202	switch (i >> 4) {
				203	case 0:
				204	temp += FF(B, C, D);
				205	break;
				206	case 1:
				207	temp += FG(B, C, D);
				208	break;
				209	case 2:
				210	temp += FH(B, C, D);
				211	break;
Denys Vlasenko	305958d	2015-10-07 19:17:01 +0200	[diff] [blame]	212	default: /* case 3 */
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	213	temp += FI(B, C, D);
				214	}
				215	temp += words[(int) (pp++)] + pc++;
				216	temp = rotl32(temp, ps[i & 3]);
				217	temp += B;
				218	A = D;
				219	D = C;
				220	C = B;
				221	B = temp;
				222	}
Denys Vlasenko	522041e	2011-09-10 13:25:57 +0200	[diff] [blame]	223	# else /* MD5_SMALL == 2 */
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	224	pc = C_array;
				225	pp = P_array;
				226	ps = S_array;
				227
				228	for (i = 0; i < 16; i++) {
				229	temp = A + FF(B, C, D) + words[(int) (pp++)] + pc++;
				230	temp = rotl32(temp, ps[i & 3]);
				231	temp += B;
				232	A = D;
				233	D = C;
				234	C = B;
				235	B = temp;
				236	}
				237	ps += 4;
				238	for (i = 0; i < 16; i++) {
				239	temp = A + FG(B, C, D) + words[(int) (pp++)] + pc++;
				240	temp = rotl32(temp, ps[i & 3]);
				241	temp += B;
				242	A = D;
				243	D = C;
				244	C = B;
				245	B = temp;
				246	}
				247	ps += 4;
				248	for (i = 0; i < 16; i++) {
				249	temp = A + FH(B, C, D) + words[(int) (pp++)] + pc++;
				250	temp = rotl32(temp, ps[i & 3]);
				251	temp += B;
				252	A = D;
				253	D = C;
				254	C = B;
				255	B = temp;
				256	}
				257	ps += 4;
				258	for (i = 0; i < 16; i++) {
				259	temp = A + FI(B, C, D) + words[(int) (pp++)] + pc++;
				260	temp = rotl32(temp, ps[i & 3]);
				261	temp += B;
				262	A = D;
				263	D = C;
				264	C = B;
				265	B = temp;
				266	}
				267	# endif
				268	/* Add checksum to the starting values */
				269	ctx->hash[0] += A;
				270	ctx->hash[1] += B;
				271	ctx->hash[2] += C;
				272	ctx->hash[3] += D;
				273
Denys Vlasenko	522041e	2011-09-10 13:25:57 +0200	[diff] [blame]	274	#else /* MD5_SMALL == 0 or 1 */
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	275
Denys Vlasenko	522041e	2011-09-10 13:25:57 +0200	[diff] [blame]	276	# if MD5_SMALL == 1
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	277	const uint32_t *pc;
				278	const char *pp;
				279	int i;
				280	# endif
				281
				282	/* First round: using the given function, the context and a constant
				283	the next context is computed. Because the algorithm's processing
				284	unit is a 32-bit word and it is determined to work on words in
				285	little endian byte order we perhaps have to change the byte order
				286	before the computation. To reduce the work for the next steps
				287	we save swapped words in WORDS array. */
				288	# undef OP
				289	# define OP(a, b, c, d, s, T) \
				290	do { \
				291	a += FF(b, c, d) + (words IF_BIG_ENDIAN(= SWAP_LE32(words))) + T; \
				292	words++; \
				293	a = rotl32(a, s); \
				294	a += b; \
				295	} while (0)
				296
				297	/* Round 1 */
Denys Vlasenko	522041e	2011-09-10 13:25:57 +0200	[diff] [blame]	298	# if MD5_SMALL == 1
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	299	pc = C_array;
				300	for (i = 0; i < 4; i++) {
				301	OP(A, B, C, D, 7, *pc++);
				302	OP(D, A, B, C, 12, *pc++);
				303	OP(C, D, A, B, 17, *pc++);
				304	OP(B, C, D, A, 22, *pc++);
				305	}
				306	# else
				307	OP(A, B, C, D, 7, 0xd76aa478);
				308	OP(D, A, B, C, 12, 0xe8c7b756);
				309	OP(C, D, A, B, 17, 0x242070db);
				310	OP(B, C, D, A, 22, 0xc1bdceee);
				311	OP(A, B, C, D, 7, 0xf57c0faf);
				312	OP(D, A, B, C, 12, 0x4787c62a);
				313	OP(C, D, A, B, 17, 0xa8304613);
				314	OP(B, C, D, A, 22, 0xfd469501);
				315	OP(A, B, C, D, 7, 0x698098d8);
				316	OP(D, A, B, C, 12, 0x8b44f7af);
				317	OP(C, D, A, B, 17, 0xffff5bb1);
				318	OP(B, C, D, A, 22, 0x895cd7be);
				319	OP(A, B, C, D, 7, 0x6b901122);
				320	OP(D, A, B, C, 12, 0xfd987193);
				321	OP(C, D, A, B, 17, 0xa679438e);
				322	OP(B, C, D, A, 22, 0x49b40821);
				323	# endif
				324	words -= 16;
				325
				326	/* For the second to fourth round we have the possibly swapped words
				327	in WORDS. Redefine the macro to take an additional first
				328	argument specifying the function to use. */
				329	# undef OP
				330	# define OP(f, a, b, c, d, k, s, T) \
				331	do { \
				332	a += f(b, c, d) + words[k] + T; \
				333	a = rotl32(a, s); \
				334	a += b; \
				335	} while (0)
				336
				337	/* Round 2 */
Denys Vlasenko	522041e	2011-09-10 13:25:57 +0200	[diff] [blame]	338	# if MD5_SMALL == 1
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	339	pp = P_array;
				340	for (i = 0; i < 4; i++) {
				341	OP(FG, A, B, C, D, (int) (pp++), 5, pc++);
				342	OP(FG, D, A, B, C, (int) (pp++), 9, pc++);
				343	OP(FG, C, D, A, B, (int) (pp++), 14, pc++);
				344	OP(FG, B, C, D, A, (int) (pp++), 20, pc++);
				345	}
				346	# else
				347	OP(FG, A, B, C, D, 1, 5, 0xf61e2562);
				348	OP(FG, D, A, B, C, 6, 9, 0xc040b340);
				349	OP(FG, C, D, A, B, 11, 14, 0x265e5a51);
				350	OP(FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
				351	OP(FG, A, B, C, D, 5, 5, 0xd62f105d);
				352	OP(FG, D, A, B, C, 10, 9, 0x02441453);
				353	OP(FG, C, D, A, B, 15, 14, 0xd8a1e681);
				354	OP(FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
				355	OP(FG, A, B, C, D, 9, 5, 0x21e1cde6);
				356	OP(FG, D, A, B, C, 14, 9, 0xc33707d6);
				357	OP(FG, C, D, A, B, 3, 14, 0xf4d50d87);
				358	OP(FG, B, C, D, A, 8, 20, 0x455a14ed);
				359	OP(FG, A, B, C, D, 13, 5, 0xa9e3e905);
				360	OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8);
				361	OP(FG, C, D, A, B, 7, 14, 0x676f02d9);
				362	OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
				363	# endif
				364
				365	/* Round 3 */
Denys Vlasenko	522041e	2011-09-10 13:25:57 +0200	[diff] [blame]	366	# if MD5_SMALL == 1
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	367	for (i = 0; i < 4; i++) {
				368	OP(FH, A, B, C, D, (int) (pp++), 4, pc++);
				369	OP(FH, D, A, B, C, (int) (pp++), 11, pc++);
				370	OP(FH, C, D, A, B, (int) (pp++), 16, pc++);
				371	OP(FH, B, C, D, A, (int) (pp++), 23, pc++);
				372	}
				373	# else
				374	OP(FH, A, B, C, D, 5, 4, 0xfffa3942);
				375	OP(FH, D, A, B, C, 8, 11, 0x8771f681);
				376	OP(FH, C, D, A, B, 11, 16, 0x6d9d6122);
				377	OP(FH, B, C, D, A, 14, 23, 0xfde5380c);
				378	OP(FH, A, B, C, D, 1, 4, 0xa4beea44);
				379	OP(FH, D, A, B, C, 4, 11, 0x4bdecfa9);
				380	OP(FH, C, D, A, B, 7, 16, 0xf6bb4b60);
				381	OP(FH, B, C, D, A, 10, 23, 0xbebfbc70);
				382	OP(FH, A, B, C, D, 13, 4, 0x289b7ec6);
				383	OP(FH, D, A, B, C, 0, 11, 0xeaa127fa);
				384	OP(FH, C, D, A, B, 3, 16, 0xd4ef3085);
				385	OP(FH, B, C, D, A, 6, 23, 0x04881d05);
				386	OP(FH, A, B, C, D, 9, 4, 0xd9d4d039);
				387	OP(FH, D, A, B, C, 12, 11, 0xe6db99e5);
				388	OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8);
				389	OP(FH, B, C, D, A, 2, 23, 0xc4ac5665);
				390	# endif
				391
				392	/* Round 4 */
Denys Vlasenko	522041e	2011-09-10 13:25:57 +0200	[diff] [blame]	393	# if MD5_SMALL == 1
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	394	for (i = 0; i < 4; i++) {
				395	OP(FI, A, B, C, D, (int) (pp++), 6, pc++);
				396	OP(FI, D, A, B, C, (int) (pp++), 10, pc++);
				397	OP(FI, C, D, A, B, (int) (pp++), 15, pc++);
				398	OP(FI, B, C, D, A, (int) (pp++), 21, pc++);
				399	}
				400	# else
				401	OP(FI, A, B, C, D, 0, 6, 0xf4292244);
				402	OP(FI, D, A, B, C, 7, 10, 0x432aff97);
				403	OP(FI, C, D, A, B, 14, 15, 0xab9423a7);
				404	OP(FI, B, C, D, A, 5, 21, 0xfc93a039);
				405	OP(FI, A, B, C, D, 12, 6, 0x655b59c3);
				406	OP(FI, D, A, B, C, 3, 10, 0x8f0ccc92);
				407	OP(FI, C, D, A, B, 10, 15, 0xffeff47d);
				408	OP(FI, B, C, D, A, 1, 21, 0x85845dd1);
				409	OP(FI, A, B, C, D, 8, 6, 0x6fa87e4f);
				410	OP(FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
				411	OP(FI, C, D, A, B, 6, 15, 0xa3014314);
				412	OP(FI, B, C, D, A, 13, 21, 0x4e0811a1);
				413	OP(FI, A, B, C, D, 4, 6, 0xf7537e82);
				414	OP(FI, D, A, B, C, 11, 10, 0xbd3af235);
				415	OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
				416	OP(FI, B, C, D, A, 9, 21, 0xeb86d391);
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	417	# endif
				418	/* Add checksum to the starting values */
Denys Vlasenko	305958d	2015-10-07 19:17:01 +0200	[diff] [blame]	419	ctx->hash[0] += A;
				420	ctx->hash[1] += B;
				421	ctx->hash[2] += C;
				422	ctx->hash[3] += D;
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	423	#endif
				424	}
Denys Vlasenko	25aadc8	2021-12-30 13:07:12 +0100	[diff] [blame]	425	#undef OP
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	426	#undef FF
				427	#undef FG
				428	#undef FH
				429	#undef FI
				430
				431	/* Initialize structure containing state of computation.
				432	* (RFC 1321, 3.3: Step 3)
				433	*/
				434	void FAST_FUNC md5_begin(md5_ctx_t *ctx)
				435	{
				436	ctx->hash[0] = 0x67452301;
				437	ctx->hash[1] = 0xefcdab89;
				438	ctx->hash[2] = 0x98badcfe;
				439	ctx->hash[3] = 0x10325476;
				440	ctx->total64 = 0;
				441	ctx->process_block = md5_process_block64;
				442	}
				443
				444	/* Used also for sha1 and sha256 */
				445	void FAST_FUNC md5_hash(md5_ctx_t ctx, const void buffer, size_t len)
				446	{
Denys Vlasenko	abefc3c	2020-09-30 22:22:04 +0200	[diff] [blame]	447	unsigned bufpos = ctx->total64 & 63;
				448
				449	ctx->total64 += len;
				450
				451	while (1) {
				452	unsigned remaining = 64 - bufpos;
				453	if (remaining > len)
				454	remaining = len;
				455	/* Copy data into aligned buffer */
				456	memcpy(ctx->wbuffer + bufpos, buffer, remaining);
				457	len -= remaining;
				458	buffer = (const char *)buffer + remaining;
				459	bufpos += remaining;
				460
				461	/* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
				462	bufpos -= 64;
				463	if (bufpos != 0)
				464	break;
				465
				466	/* Buffer is filled up, process it */
				467	ctx->process_block(ctx);
				468	/bufpos = 0; - already is /
				469	}
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	470	}
				471
				472	/* Process the remaining bytes in the buffer and put result from CTX
				473	* in first 16 bytes following RESBUF. The result is always in little
				474	* endian byte order, so that a byte-wise output yields to the wanted
				475	* ASCII representation of the message digest.
				476	*/
Denys Vlasenko	49ecee0	2017-01-24 16:00:54 +0100	[diff] [blame]	477	unsigned FAST_FUNC md5_end(md5_ctx_t ctx, void resbuf)
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	478	{
				479	/* MD5 stores total in LE, need to swap on BE arches: */
				480	common64_end(ctx, /swap_needed:/ BB_BIG_ENDIAN);
				481
Denys Vlasenko	7ab94ca	2010-10-19 02:33:39 +0200	[diff] [blame]	482	/* The MD5 result is in little endian byte order */
Denys Vlasenko	5368fe5	2013-01-16 02:20:31 +0100	[diff] [blame]	483	if (BB_BIG_ENDIAN) {
				484	ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
				485	ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
				486	ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
				487	ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
				488	}
				489
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	490	memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4);
Denys Vlasenko	49ecee0	2017-01-24 16:00:54 +0100	[diff] [blame]	491	return sizeof(ctx->hash[0]) * 4;
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	492	}
				493
				494
				495	/*
Denys Vlasenko	f4c93ab	2010-10-24 19:27:30 +0200	[diff] [blame]	496	* SHA1 part is:
				497	* Copyright 2007 Rob Landley <rob@landley.net>
Rob Landley	5cf7c2d	2006-02-21 06:44:43 +0000	[diff] [blame]	498	*
Denys Vlasenko	f4c93ab	2010-10-24 19:27:30 +0200	[diff] [blame]	499	* Based on the public domain SHA-1 in C by Steve Reid <steve@edmweb.com>
				500	* from http://www.mirrors.wiretapped.net/security/cryptography/hashes/sha1/
Denis Vlasenko	9213a9e	2006-09-17 16:28:10 +0000	[diff] [blame]	501	*
Denys Vlasenko	f4c93ab	2010-10-24 19:27:30 +0200	[diff] [blame]	502	* Licensed under GPLv2, see file LICENSE in this source tree.
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	503	*
				504	* ---------------------------------------------------------------------------
				505	*
				506	* SHA256 and SHA512 parts are:
				507	* Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>.
Denis Vlasenko	ddb1b85	2009-03-12 16:05:02 +0000	[diff] [blame]	508	* Shrank by Denys Vlasenko.
				509	*
				510	* ---------------------------------------------------------------------------
				511	*
				512	* The best way to test random blocksizes is to go to coreutils/md5_sha1_sum.c
				513	* and replace "4096" with something like "2000 + time(NULL) % 2097",
				514	* then rebuild and compare "shaNNNsum bigfile" results.
Rob Landley	5cf7c2d	2006-02-21 06:44:43 +0000	[diff] [blame]	515	*/
				516
Denys Vlasenko	25aadc8	2021-12-30 13:07:12 +0100	[diff] [blame]	517	#if CONFIG_SHA1_SMALL == 0
Denys Vlasenko	5f68170	2022-01-01 12:21:01 +0100	[diff] [blame]	518	# if defined(__GNUC__) && defined(__i386__)
				519	static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM)
				520	{
				521	BUILD_BUG_ON(offsetof(sha1_ctx_t, hash) != 76);
				522	asm(
				523	"\n\
				524	pushl %ebp # \n\
				525	pushl %edi # \n\
				526	pushl %esi # \n\
				527	pushl %ebx # \n\
				528	pushl %eax \n\
				529	movl $15, %edi \n\
				530	1: \n\
				531	movl (%eax,%edi,4), %esi \n\
				532	bswap %esi \n\
				533	pushl %esi \n\
				534	decl %edi \n\
				535	jns 1b \n\
				536	movl 80(%eax), %ebx # b = ctx->hash[1] \n\
				537	movl 84(%eax), %ecx # c = ctx->hash[2] \n\
				538	movl 88(%eax), %edx # d = ctx->hash[3] \n\
				539	movl 92(%eax), %ebp # e = ctx->hash[4] \n\
				540	movl 76(%eax), %eax # a = ctx->hash[0] \n\
				541	#Register and stack use: \n\
				542	# eax..edx: a..d \n\
				543	# ebp: e \n\
				544	# esi,edi: temps \n\
				545	# 4*n(%esp): W[n] \n\
				546	"
				547	#define RD1As(a,b,c,d,e, n, RCONST) \
				548	"\n\
				549	##movl 4*"n"(%esp), %esi # n=0, W[0] already in %esi \n\
				550	movl "c", %edi # c \n\
				551	xorl "d", %edi # ^d \n\
				552	andl "b", %edi # &b \n\
				553	xorl "d", %edi # (((c ^ d) & b) ^ d) \n\
				554	leal "RCONST"("e",%esi), "e" # e += RCONST + W[n] \n\
				555	addl %edi, "e" # e += (((c ^ d) & b) ^ d) \n\
				556	movl "a", %esi # \n\
				557	roll $5, %esi # rotl32(a,5) \n\
				558	addl %esi, "e" # e += rotl32(a,5) \n\
				559	rorl $2, "b" # b = rotl32(b,30) \n\
				560	"
				561	#define RD1Bs(a,b,c,d,e, n, RCONST) \
				562	"\n\
				563	movl 4*"n"(%esp), %esi # W[n] \n\
				564	movl "c", %edi # c \n\
				565	xorl "d", %edi # ^d \n\
				566	andl "b", %edi # &b \n\
				567	xorl "d", %edi # (((c ^ d) & b) ^ d) \n\
				568	leal "RCONST"("e",%esi), "e" # e += RCONST + W[n] \n\
				569	addl %edi, "e" # e += (((c ^ d) & b) ^ d) \n\
				570	movl "a", %esi # \n\
				571	roll $5, %esi # rotl32(a,5) \n\
				572	addl %esi, "e" # e += rotl32(a,5) \n\
				573	rorl $2, "b" # b = rotl32(b,30) \n\
				574	"
				575	#define RD1Cs(a,b,c,d,e, n13,n8,n2,n, RCONST) \
				576	"\n\
				577	movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\
				578	xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\
				579	xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\
				580	xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\
				581	roll %esi # \n\
				582	movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\
				583	movl "c", %edi # c \n\
				584	xorl "d", %edi # ^d \n\
				585	andl "b", %edi # &b \n\
				586	xorl "d", %edi # (((c ^ d) & b) ^ d) \n\
				587	leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\
				588	addl %edi, "e" # e += (((c ^ d) & b) ^ d) \n\
				589	movl "a", %esi # \n\
				590	roll $5, %esi # rotl32(a,5) \n\
				591	addl %esi, "e" # e += rotl32(a,5) \n\
				592	rorl $2, "b" # b = rotl32(b,30) \n\
				593	"
				594	#define RD1A(a,b,c,d,e, n) RD1As("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR((n)), STR(RCONST))
				595	#define RD1B(a,b,c,d,e, n) RD1Bs("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR((n)), STR(RCONST))
				596	#define RD1C(a,b,c,d,e, n) RD1Cs("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((n+13)&15)), STR(((n+8)&15)), STR(((n+2)&15)), STR(((n)&15)), STR(RCONST))
				597	#undef RCONST
				598	#define RCONST 0x5A827999
				599	RD1A(ax,bx,cx,dx,bp, 0) RD1B(bp,ax,bx,cx,dx, 1) RD1B(dx,bp,ax,bx,cx, 2) RD1B(cx,dx,bp,ax,bx, 3) RD1B(bx,cx,dx,bp,ax, 4)
				600	RD1B(ax,bx,cx,dx,bp, 5) RD1B(bp,ax,bx,cx,dx, 6) RD1B(dx,bp,ax,bx,cx, 7) RD1B(cx,dx,bp,ax,bx, 8) RD1B(bx,cx,dx,bp,ax, 9)
				601	RD1B(ax,bx,cx,dx,bp,10) RD1B(bp,ax,bx,cx,dx,11) RD1B(dx,bp,ax,bx,cx,12) RD1B(cx,dx,bp,ax,bx,13) RD1B(bx,cx,dx,bp,ax,14)
				602	RD1B(ax,bx,cx,dx,bp,15) RD1C(bp,ax,bx,cx,dx,16) RD1C(dx,bp,ax,bx,cx,17) RD1C(cx,dx,bp,ax,bx,18) RD1C(bx,cx,dx,bp,ax,19)
				603	#define RD2s(a,b,c,d,e, n13,n8,n2,n, RCONST) \
				604	"\n\
				605	movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\
				606	xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\
				607	xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\
				608	xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\
				609	roll %esi # \n\
				610	movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\
				611	movl "c", %edi # c \n\
				612	xorl "d", %edi # ^d \n\
				613	xorl "b", %edi # ^b \n\
				614	leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\
				615	addl %edi, "e" # e += (c ^ d ^ b) \n\
				616	movl "a", %esi # \n\
				617	roll $5, %esi # rotl32(a,5) \n\
				618	addl %esi, "e" # e += rotl32(a,5) \n\
				619	rorl $2, "b" # b = rotl32(b,30) \n\
				620	"
				621	#define RD2(a,b,c,d,e, n) RD2s("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((20+n+13)&15)), STR(((20+n+8)&15)), STR(((20+n+2)&15)), STR(((20+n)&15)), STR(RCONST))
				622	#undef RCONST
				623	#define RCONST 0x6ED9EBA1
				624	RD2(ax,bx,cx,dx,bp, 0) RD2(bp,ax,bx,cx,dx, 1) RD2(dx,bp,ax,bx,cx, 2) RD2(cx,dx,bp,ax,bx, 3) RD2(bx,cx,dx,bp,ax, 4)
				625	RD2(ax,bx,cx,dx,bp, 5) RD2(bp,ax,bx,cx,dx, 6) RD2(dx,bp,ax,bx,cx, 7) RD2(cx,dx,bp,ax,bx, 8) RD2(bx,cx,dx,bp,ax, 9)
				626	RD2(ax,bx,cx,dx,bp,10) RD2(bp,ax,bx,cx,dx,11) RD2(dx,bp,ax,bx,cx,12) RD2(cx,dx,bp,ax,bx,13) RD2(bx,cx,dx,bp,ax,14)
				627	RD2(ax,bx,cx,dx,bp,15) RD2(bp,ax,bx,cx,dx,16) RD2(dx,bp,ax,bx,cx,17) RD2(cx,dx,bp,ax,bx,18) RD2(bx,cx,dx,bp,ax,19)
				628
				629	#define RD3s(a,b,c,d,e, n13,n8,n2,n, RCONST) \
				630	"\n\
				631	movl "b", %edi # di: b \n\
				632	movl "b", %esi # si: b \n\
				633	orl "c", %edi # di: b \| c \n\
				634	andl "c", %esi # si: b & c \n\
				635	andl "d", %edi # di: (b \| c) & d \n\
				636	orl %esi, %edi # ((b \| c) & d) \| (b & c) \n\
				637	movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\
				638	xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\
				639	xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\
				640	xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\
				641	roll %esi # \n\
				642	movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\
				643	addl %edi, "e" # += ((b \| c) & d) \| (b & c)\n\
				644	leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\
				645	movl "a", %esi # \n\
				646	roll $5, %esi # rotl32(a,5) \n\
				647	addl %esi, "e" # e += rotl32(a,5) \n\
				648	rorl $2, "b" # b = rotl32(b,30) \n\
				649	"
				650	#define RD3(a,b,c,d,e, n) RD3s("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((40+n+13)&15)), STR(((40+n+8)&15)), STR(((40+n+2)&15)), STR(((40+n)&15)), STR(RCONST))
				651	#undef RCONST
				652	#define RCONST 0x8F1BBCDC
				653	RD3(ax,bx,cx,dx,bp, 0) RD3(bp,ax,bx,cx,dx, 1) RD3(dx,bp,ax,bx,cx, 2) RD3(cx,dx,bp,ax,bx, 3) RD3(bx,cx,dx,bp,ax, 4)
				654	RD3(ax,bx,cx,dx,bp, 5) RD3(bp,ax,bx,cx,dx, 6) RD3(dx,bp,ax,bx,cx, 7) RD3(cx,dx,bp,ax,bx, 8) RD3(bx,cx,dx,bp,ax, 9)
				655	RD3(ax,bx,cx,dx,bp,10) RD3(bp,ax,bx,cx,dx,11) RD3(dx,bp,ax,bx,cx,12) RD3(cx,dx,bp,ax,bx,13) RD3(bx,cx,dx,bp,ax,14)
				656	RD3(ax,bx,cx,dx,bp,15) RD3(bp,ax,bx,cx,dx,16) RD3(dx,bp,ax,bx,cx,17) RD3(cx,dx,bp,ax,bx,18) RD3(bx,cx,dx,bp,ax,19)
				657
				658	#define RD4As(a,b,c,d,e, n13,n8,n2,n, RCONST) \
				659	"\n\
				660	movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\
				661	xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\
				662	xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\
				663	xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\
				664	roll %esi # \n\
				665	movl %esi, 4*"n"(%esp) # store to W[n & 15] \n\
				666	movl "c", %edi # c \n\
				667	xorl "d", %edi # ^d \n\
				668	xorl "b", %edi # ^b \n\
				669	leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\
				670	addl %edi, "e" # e += (c ^ d ^ b) \n\
				671	movl "a", %esi # \n\
				672	roll $5, %esi # rotl32(a,5) \n\
				673	addl %esi, "e" # e += rotl32(a,5) \n\
				674	rorl $2, "b" # b = rotl32(b,30) \n\
				675	"
				676	#define RD4Bs(a,b,c,d,e, n13,n8,n2,n, RCONST) \
				677	"\n\
				678	movl 4*"n13"(%esp), %esi # W[(n+13) & 15] \n\
				679	xorl 4*"n8"(%esp), %esi # ^W[(n+8) & 15] \n\
				680	xorl 4*"n2"(%esp), %esi # ^W[(n+2) & 15] \n\
				681	xorl 4*"n"(%esp), %esi # ^W[n & 15] \n\
				682	roll %esi # \n\
				683	##movl %esi, 4*"n"(%esp) # store to W[n & 15] elided \n\
				684	movl "c", %edi # c \n\
				685	xorl "d", %edi # ^d \n\
				686	xorl "b", %edi # ^b \n\
				687	leal "RCONST"("e",%esi), "e" # e += RCONST + mixed_W \n\
				688	addl %edi, "e" # e += (c ^ d ^ b) \n\
				689	movl "a", %esi # \n\
				690	roll $5, %esi # rotl32(a,5) \n\
				691	addl %esi, "e" # e += rotl32(a,5) \n\
				692	rorl $2, "b" # b = rotl32(b,30) \n\
				693	"
				694	#define RD4A(a,b,c,d,e, n) RD4As("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST))
				695	#define RD4B(a,b,c,d,e, n) RD4Bs("%e"STR(a),"%e"STR(b),"%e"STR(c),"%e"STR(d),"%e"STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST))
				696	#undef RCONST
				697	#define RCONST 0xCA62C1D6
				698	RD4A(ax,bx,cx,dx,bp, 0) RD4A(bp,ax,bx,cx,dx, 1) RD4A(dx,bp,ax,bx,cx, 2) RD4A(cx,dx,bp,ax,bx, 3) RD4A(bx,cx,dx,bp,ax, 4)
				699	RD4A(ax,bx,cx,dx,bp, 5) RD4A(bp,ax,bx,cx,dx, 6) RD4A(dx,bp,ax,bx,cx, 7) RD4A(cx,dx,bp,ax,bx, 8) RD4A(bx,cx,dx,bp,ax, 9)
				700	RD4A(ax,bx,cx,dx,bp,10) RD4A(bp,ax,bx,cx,dx,11) RD4A(dx,bp,ax,bx,cx,12) RD4A(cx,dx,bp,ax,bx,13) RD4A(bx,cx,dx,bp,ax,14)
				701	RD4A(ax,bx,cx,dx,bp,15) RD4A(bp,ax,bx,cx,dx,16) RD4B(dx,bp,ax,bx,cx,17) RD4B(cx,dx,bp,ax,bx,18) RD4B(bx,cx,dx,bp,ax,19)
				702
				703	"\n\
				704	movl 4*16(%esp), %esi # \n\
				705	addl $4*(16+1), %esp # \n\
				706	addl %eax, 76(%esi) # ctx->hash[0] += a \n\
				707	addl %ebx, 80(%esi) # ctx->hash[1] += b \n\
				708	addl %ecx, 84(%esi) # ctx->hash[2] += c \n\
				709	addl %edx, 88(%esi) # ctx->hash[3] += d \n\
				710	addl %ebp, 92(%esi) # ctx->hash[4] += e \n\
				711	popl %ebx # \n\
				712	popl %esi # \n\
				713	popl %edi # \n\
				714	popl %ebp # \n\
				715	"
				716	); /* asm */
				717	#undef RCONST
				718	}
				719	# elif defined(__GNUC__) && defined(__x86_64__)
Denys Vlasenko	5f68170	2022-01-01 12:21:01 +0100	[diff] [blame]	720
Denys Vlasenko	05fd13e	2022-01-03 01:57:29 +0100	[diff] [blame]	721	/* in hash_md5_sha_x86-64.S */
				722	struct ASM_expects_80 { char t[1 - 2*(offsetof(sha1_ctx_t, hash) != 80)]; };
Denys Vlasenko	711e20e	2022-01-07 00:43:59 +0100	[diff] [blame]	723	void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx);
Denys Vlasenko	5f68170	2022-01-01 12:21:01 +0100	[diff] [blame]	724
Denys Vlasenko	5f68170	2022-01-01 12:21:01 +0100	[diff] [blame]	725	# else
Denys Vlasenko	25aadc8	2021-12-30 13:07:12 +0100	[diff] [blame]	726	/* Fast, fully-unrolled SHA1. +3800 bytes of code on x86.
				727	* It seems further speedup can be achieved by handling more than
				728	* 64 bytes per one function call (coreutils does that).
				729	*/
				730	static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
				731	{
				732	static const uint32_t rconsts[] ALIGN4 = {
				733	0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
				734	};
				735	uint32_t W[16];
				736	uint32_t a, b, c, d, e;
				737
				738	a = ctx->hash[0];
				739	b = ctx->hash[1];
				740	c = ctx->hash[2];
				741	d = ctx->hash[3];
				742	e = ctx->hash[4];
				743
Denys Vlasenko	f09d088	2021-12-31 17:06:00 +0100	[diff] [blame]	744	/* From kernel source comments:
				745	* """
				746	* If you have 32 registers or more, the compiler can (and should)
				747	* try to change the array[] accesses into registers. However, on
				748	* machines with less than ~25 registers, that won't really work,
				749	* and at least gcc will make an unholy mess of it.
				750	*
				751	* So to avoid that mess which just slows things down, we force
				752	* the stores to memory to actually happen (we might be better off
				753	* with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as
				754	* suggested by Artur Skawina - that will also make gcc unable to
				755	* try to do the silly "optimize away loads" part because it won't
				756	* see what the value will be).
				757	* """
				758	*/
Denys Vlasenko	5c0c558	2022-01-02 01:56:35 +0100	[diff] [blame]	759	#if defined(__GNUC__) && defined(__i386__)
Denys Vlasenko	f09d088	2021-12-31 17:06:00 +0100	[diff] [blame]	760	# define DO_NOT_TRY_PROPAGATING(m) asm("":"+m"(m))
				761	#else
				762	# define DO_NOT_TRY_PROPAGATING(m) ((void)0)
				763	#endif
				764
Denys Vlasenko	25aadc8	2021-12-30 13:07:12 +0100	[diff] [blame]	765	#undef OP
				766	#define OP(A,B,C,D,E, n) \
				767	do { \
				768	uint32_t work = EXPR(B, C, D); \
				769	if (n <= 15) \
Denys Vlasenko	0b62a08	2021-12-30 18:54:02 +0100	[diff] [blame]	770	work += W[n & 15] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]); \
Denys Vlasenko	25aadc8	2021-12-30 13:07:12 +0100	[diff] [blame]	771	if (n >= 16) \
Denys Vlasenko	0b62a08	2021-12-30 18:54:02 +0100	[diff] [blame]	772	work += W[n & 15] = rotl32(W[(n+13) & 15] ^ W[(n+8) & 15] ^ W[(n+2) & 15] ^ W[n & 15], 1); \
Denys Vlasenko	f09d088	2021-12-31 17:06:00 +0100	[diff] [blame]	773	DO_NOT_TRY_PROPAGATING(W[n & 15]); \
Denys Vlasenko	25aadc8	2021-12-30 13:07:12 +0100	[diff] [blame]	774	E += work + rotl32(A, 5) + rconsts[n / 20]; \
				775	B = rotl32(B, 30); \
				776	} while (0)
				777	#define OP20(n) \
				778	OP(a,b,c,d,e, (n+ 0)); OP(e,a,b,c,d, (n+ 1)); OP(d,e,a,b,c, (n+ 2)); OP(c,d,e,a,b, (n+ 3)); OP(b,c,d,e,a, (n+ 4)); \
				779	OP(a,b,c,d,e, (n+ 5)); OP(e,a,b,c,d, (n+ 6)); OP(d,e,a,b,c, (n+ 7)); OP(c,d,e,a,b, (n+ 8)); OP(b,c,d,e,a, (n+ 9)); \
				780	OP(a,b,c,d,e, (n+10)); OP(e,a,b,c,d, (n+11)); OP(d,e,a,b,c, (n+12)); OP(c,d,e,a,b, (n+13)); OP(b,c,d,e,a, (n+14)); \
				781	OP(a,b,c,d,e, (n+15)); OP(e,a,b,c,d, (n+16)); OP(d,e,a,b,c, (n+17)); OP(c,d,e,a,b, (n+18)); OP(b,c,d,e,a, (n+19))
				782
				783	/* 4 rounds of 20 operations each */
				784	#define EXPR(b,c,d) (((c ^ d) & b) ^ d)
				785	OP20(0);
				786	#undef EXPR
				787	#define EXPR(b,c,d) (c ^ d ^ b)
				788	OP20(20);
				789	#undef EXPR
				790	#define EXPR(b,c,d) (((b \| c) & d) \| (b & c))
				791	OP20(40);
				792	#undef EXPR
				793	#define EXPR(b,c,d) (c ^ d ^ b)
				794	OP20(60);
				795
				796	#undef EXPR
				797	#undef OP
				798	#undef OP20
				799
				800	ctx->hash[0] += a;
				801	ctx->hash[1] += b;
				802	ctx->hash[2] += c;
				803	ctx->hash[3] += d;
				804	ctx->hash[4] += e;
				805	}
Denys Vlasenko	5f68170	2022-01-01 12:21:01 +0100	[diff] [blame]	806	# endif
Denys Vlasenko	0b62a08	2021-12-30 18:54:02 +0100	[diff] [blame]	807	#elif CONFIG_SHA1_SMALL == 1
				808	/* Middle-sized version, +300 bytes of code on x86. */
				809	static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
				810	{
				811	static const uint32_t rconsts[] ALIGN4 = {
				812	0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
				813	};
				814	int j;
				815	int n;
				816	uint32_t W[16+16];
				817	uint32_t a, b, c, d, e;
Denys Vlasenko	25aadc8	2021-12-30 13:07:12 +0100	[diff] [blame]	818
Denys Vlasenko	0b62a08	2021-12-30 18:54:02 +0100	[diff] [blame]	819	a = ctx->hash[0];
				820	b = ctx->hash[1];
				821	c = ctx->hash[2];
				822	d = ctx->hash[3];
				823	e = ctx->hash[4];
				824
				825	/* 1st round of 20 operations */
				826	n = 0;
				827	do {
				828	uint32_t work = ((c ^ d) & b) ^ d;
				829	W[n] = W[n+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]);
				830	work += W[n];
				831	work += e + rotl32(a, 5) + rconsts[0];
				832	/* Rotate by one for next time */
				833	e = d;
				834	d = c;
				835	c = rotl32(b, 30);
				836	b = a;
				837	a = work;
				838	n = (n + 1) & 15;
				839	} while (n != 0);
				840	do {
				841	uint32_t work = ((c ^ d) & b) ^ d;
				842	W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
				843	work += W[n];
				844	work += e + rotl32(a, 5) + rconsts[0];
				845	e = d;
				846	d = c;
				847	c = rotl32(b, 30);
				848	b = a;
				849	a = work;
Denys Vlasenko	5c0c558	2022-01-02 01:56:35 +0100	[diff] [blame]	850	n = (n + 1) /* & 15*/;
Denys Vlasenko	0b62a08	2021-12-30 18:54:02 +0100	[diff] [blame]	851	} while (n != 4);
				852	/* 2nd round of 20 operations */
				853	j = 19;
				854	do {
				855	uint32_t work = c ^ d ^ b;
				856	W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
				857	work += W[n];
				858	work += e + rotl32(a, 5) + rconsts[1];
				859	e = d;
				860	d = c;
				861	c = rotl32(b, 30);
				862	b = a;
				863	a = work;
				864	n = (n + 1) & 15;
				865	} while (--j >= 0);
				866	/* 3rd round */
				867	j = 19;
				868	do {
				869	uint32_t work = ((b \| c) & d) \| (b & c);
				870	W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
				871	work += W[n];
				872	work += e + rotl32(a, 5) + rconsts[2];
				873	e = d;
				874	d = c;
				875	c = rotl32(b, 30);
				876	b = a;
				877	a = work;
				878	n = (n + 1) & 15;
				879	} while (--j >= 0);
				880	/* 4th round */
				881	j = 19;
				882	do {
				883	uint32_t work = c ^ d ^ b;
				884	W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
				885	work += W[n];
				886	work += e + rotl32(a, 5) + rconsts[3];
				887	e = d;
				888	d = c;
				889	c = rotl32(b, 30);
				890	b = a;
				891	a = work;
				892	n = (n + 1) & 15;
				893	} while (--j >= 0);
				894
				895	ctx->hash[0] += a;
				896	ctx->hash[1] += b;
				897	ctx->hash[2] += c;
				898	ctx->hash[3] += d;
				899	ctx->hash[4] += e;
				900	}
				901	#else
Denys Vlasenko	25aadc8	2021-12-30 13:07:12 +0100	[diff] [blame]	902	/* Compact version, almost twice as slow as fully unrolled */
Denis Vlasenko	8ec8d5e	2009-03-15 02:56:00 +0000	[diff] [blame]	903	static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
Rob Landley	5cf7c2d	2006-02-21 06:44:43 +0000	[diff] [blame]	904	{
Denys Vlasenko	965b795	2020-11-30 13:03:03 +0100	[diff] [blame]	905	static const uint32_t rconsts[] ALIGN4 = {
Denys Vlasenko	f4c93ab	2010-10-24 19:27:30 +0200	[diff] [blame]	906	0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
				907	};
				908	int i, j;
Denys Vlasenko	25aadc8	2021-12-30 13:07:12 +0100	[diff] [blame]	909	int n;
Denys Vlasenko	f4c93ab	2010-10-24 19:27:30 +0200	[diff] [blame]	910	uint32_t W[16+16];
				911	uint32_t a, b, c, d, e;
Rob Landley	5cf7c2d	2006-02-21 06:44:43 +0000	[diff] [blame]	912
Denys Vlasenko	f4c93ab	2010-10-24 19:27:30 +0200	[diff] [blame]	913	/* On-stack work buffer frees up one register in the main loop
Denys Vlasenko	25aadc8	2021-12-30 13:07:12 +0100	[diff] [blame]	914	* which otherwise will be needed to hold ctx pointer.
				915	*
				916	* The compiler is not smart enough to realize it, though. :(
				917	* If __attribute__((optimize("2"))) is added to the function,
				918	* only then gcc-9.3.1 spills "ctx" to stack and uses the freed
				919	* register (making code 6 bytes smaller, not just faster).
				920	*/
Denys Vlasenko	f4c93ab	2010-10-24 19:27:30 +0200	[diff] [blame]	921	for (i = 0; i < 16; i++)
				922	W[i] = W[i+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[i]);
Rob Landley	5cf7c2d	2006-02-21 06:44:43 +0000	[diff] [blame]	923
				924	a = ctx->hash[0];
				925	b = ctx->hash[1];
				926	c = ctx->hash[2];
				927	d = ctx->hash[3];
				928	e = ctx->hash[4];
				929
Denys Vlasenko	f4c93ab	2010-10-24 19:27:30 +0200	[diff] [blame]	930	/* 4 rounds of 20 operations each */
Denys Vlasenko	25aadc8	2021-12-30 13:07:12 +0100	[diff] [blame]	931	n = 0;
Denys Vlasenko	f4c93ab	2010-10-24 19:27:30 +0200	[diff] [blame]	932	for (i = 0; i < 4; i++) {
				933	j = 19;
				934	do {
				935	uint32_t work;
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	936
Denys Vlasenko	f4c93ab	2010-10-24 19:27:30 +0200	[diff] [blame]	937	work = c ^ d;
				938	if (i == 0) {
				939	work = (work & b) ^ d;
				940	if (j <= 3)
				941	goto ge16;
Denys Vlasenko	f4c93ab	2010-10-24 19:27:30 +0200	[diff] [blame]	942	} else {
				943	if (i == 2)
				944	work = ((b \| c) & d) \| (b & c);
				945	else /* i = 1 or 3 */
				946	work ^= b;
				947	ge16:
Denys Vlasenko	25aadc8	2021-12-30 13:07:12 +0100	[diff] [blame]	948	W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
Denys Vlasenko	f4c93ab	2010-10-24 19:27:30 +0200	[diff] [blame]	949	}
Denys Vlasenko	25aadc8	2021-12-30 13:07:12 +0100	[diff] [blame]	950	work += W[n];
Denys Vlasenko	03a5fe3	2010-10-24 20:51:28 +0200	[diff] [blame]	951	work += e + rotl32(a, 5) + rconsts[i];
Rob Landley	5cf7c2d	2006-02-21 06:44:43 +0000	[diff] [blame]	952
Denys Vlasenko	f4c93ab	2010-10-24 19:27:30 +0200	[diff] [blame]	953	/* Rotate by one for next time */
				954	e = d;
				955	d = c;
Denys Vlasenko	f1d0646	2021-12-28 09:05:12 +0100	[diff] [blame]	956	c = rotl32(b, 30);
Denys Vlasenko	f4c93ab	2010-10-24 19:27:30 +0200	[diff] [blame]	957	b = a;
				958	a = work;
Denys Vlasenko	25aadc8	2021-12-30 13:07:12 +0100	[diff] [blame]	959	n = (n + 1) & 15;
Denys Vlasenko	f4c93ab	2010-10-24 19:27:30 +0200	[diff] [blame]	960	} while (--j >= 0);
				961	}
Rob Landley	5cf7c2d	2006-02-21 06:44:43 +0000	[diff] [blame]	962
				963	ctx->hash[0] += a;
				964	ctx->hash[1] += b;
				965	ctx->hash[2] += c;
				966	ctx->hash[3] += d;
				967	ctx->hash[4] += e;
				968	}
Denys Vlasenko	25aadc8	2021-12-30 13:07:12 +0100	[diff] [blame]	969	#endif
Rob Landley	5cf7c2d	2006-02-21 06:44:43 +0000	[diff] [blame]	970
Denis Vlasenko	823f10b	2009-03-15 04:56:51 +0000	[diff] [blame]	971	/* Constants for SHA512 from FIPS 180-2:4.2.3.
				972	* SHA256 constants from FIPS 180-2:4.2.2
				973	* are the most significant half of first 64 elements
				974	* of the same array.
				975	*/
Denys Vlasenko	b8935d0	2017-01-15 20:16:27 +0100	[diff] [blame]	976	#undef K
				977	#if NEED_SHA512
				978	typedef uint64_t sha_K_int;
				979	# define K(v) v
				980	#else
				981	typedef uint32_t sha_K_int;
				982	# define K(v) (uint32_t)(v >> 32)
				983	#endif
Denys Vlasenko	965b795	2020-11-30 13:03:03 +0100	[diff] [blame]	984	static const sha_K_int sha_K[] ALIGN8 = {
Denys Vlasenko	b8935d0	2017-01-15 20:16:27 +0100	[diff] [blame]	985	K(0x428a2f98d728ae22ULL), K(0x7137449123ef65cdULL),
				986	K(0xb5c0fbcfec4d3b2fULL), K(0xe9b5dba58189dbbcULL),
				987	K(0x3956c25bf348b538ULL), K(0x59f111f1b605d019ULL),
				988	K(0x923f82a4af194f9bULL), K(0xab1c5ed5da6d8118ULL),
				989	K(0xd807aa98a3030242ULL), K(0x12835b0145706fbeULL),
				990	K(0x243185be4ee4b28cULL), K(0x550c7dc3d5ffb4e2ULL),
				991	K(0x72be5d74f27b896fULL), K(0x80deb1fe3b1696b1ULL),
				992	K(0x9bdc06a725c71235ULL), K(0xc19bf174cf692694ULL),
				993	K(0xe49b69c19ef14ad2ULL), K(0xefbe4786384f25e3ULL),
				994	K(0x0fc19dc68b8cd5b5ULL), K(0x240ca1cc77ac9c65ULL),
				995	K(0x2de92c6f592b0275ULL), K(0x4a7484aa6ea6e483ULL),
				996	K(0x5cb0a9dcbd41fbd4ULL), K(0x76f988da831153b5ULL),
				997	K(0x983e5152ee66dfabULL), K(0xa831c66d2db43210ULL),
				998	K(0xb00327c898fb213fULL), K(0xbf597fc7beef0ee4ULL),
				999	K(0xc6e00bf33da88fc2ULL), K(0xd5a79147930aa725ULL),
				1000	K(0x06ca6351e003826fULL), K(0x142929670a0e6e70ULL),
				1001	K(0x27b70a8546d22ffcULL), K(0x2e1b21385c26c926ULL),
				1002	K(0x4d2c6dfc5ac42aedULL), K(0x53380d139d95b3dfULL),
				1003	K(0x650a73548baf63deULL), K(0x766a0abb3c77b2a8ULL),
				1004	K(0x81c2c92e47edaee6ULL), K(0x92722c851482353bULL),
				1005	K(0xa2bfe8a14cf10364ULL), K(0xa81a664bbc423001ULL),
				1006	K(0xc24b8b70d0f89791ULL), K(0xc76c51a30654be30ULL),
				1007	K(0xd192e819d6ef5218ULL), K(0xd69906245565a910ULL),
				1008	K(0xf40e35855771202aULL), K(0x106aa07032bbd1b8ULL),
				1009	K(0x19a4c116b8d2d0c8ULL), K(0x1e376c085141ab53ULL),
				1010	K(0x2748774cdf8eeb99ULL), K(0x34b0bcb5e19b48a8ULL),
				1011	K(0x391c0cb3c5c95a63ULL), K(0x4ed8aa4ae3418acbULL),
				1012	K(0x5b9cca4f7763e373ULL), K(0x682e6ff3d6b2b8a3ULL),
				1013	K(0x748f82ee5defb2fcULL), K(0x78a5636f43172f60ULL),
				1014	K(0x84c87814a1f0ab72ULL), K(0x8cc702081a6439ecULL),
				1015	K(0x90befffa23631e28ULL), K(0xa4506cebde82bde9ULL),
				1016	K(0xbef9a3f7b2c67915ULL), K(0xc67178f2e372532bULL),
				1017	#if NEED_SHA512 /* [64]+ are used for sha512 only */
				1018	K(0xca273eceea26619cULL), K(0xd186b8c721c0c207ULL),
				1019	K(0xeada7dd6cde0eb1eULL), K(0xf57d4f7fee6ed178ULL),
				1020	K(0x06f067aa72176fbaULL), K(0x0a637dc5a2c898a6ULL),
				1021	K(0x113f9804bef90daeULL), K(0x1b710b35131c471bULL),
				1022	K(0x28db77f523047d84ULL), K(0x32caab7b40c72493ULL),
				1023	K(0x3c9ebe0a15c9bebcULL), K(0x431d67c49c100d4cULL),
				1024	K(0x4cc5d4becb3e42b6ULL), K(0x597f299cfc657e2aULL),
				1025	K(0x5fcb6fab3ad6faecULL), K(0x6c44198c4a475817ULL),
				1026	#endif
Denis Vlasenko	98c87f7	2009-03-11 21:15:51 +0000	[diff] [blame]	1027	};
Denys Vlasenko	b8935d0	2017-01-15 20:16:27 +0100	[diff] [blame]	1028	#undef K
Denis Vlasenko	98c87f7	2009-03-11 21:15:51 +0000	[diff] [blame]	1029
Denys Vlasenko	fe4ef36	2009-07-05 20:34:38 +0200	[diff] [blame]	1030	#undef Ch
				1031	#undef Maj
				1032	#undef S0
				1033	#undef S1
				1034	#undef R0
				1035	#undef R1
				1036
Denis Vlasenko	8ec8d5e	2009-03-15 02:56:00 +0000	[diff] [blame]	1037	static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1038	{
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1039	unsigned t;
Denis Vlasenko	823f10b	2009-03-15 04:56:51 +0000	[diff] [blame]	1040	uint32_t W[64], a, b, c, d, e, f, g, h;
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1041	const uint32_t words = (uint32_t) ctx->wbuffer;
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1042
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1043	/* Operators defined in FIPS 180-2:4.1.2. */
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1044	#define Ch(x, y, z) ((x & y) ^ (~x & z))
				1045	#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
				1046	#define S0(x) (rotr32(x, 2) ^ rotr32(x, 13) ^ rotr32(x, 22))
				1047	#define S1(x) (rotr32(x, 6) ^ rotr32(x, 11) ^ rotr32(x, 25))
				1048	#define R0(x) (rotr32(x, 7) ^ rotr32(x, 18) ^ (x >> 3))
				1049	#define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
				1050
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1051	/* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */
Denys Vlasenko	4bc3b85	2010-10-16 23:31:15 +0200	[diff] [blame]	1052	for (t = 0; t < 16; ++t)
Denys Vlasenko	b102e12	2010-10-18 11:39:47 +0200	[diff] [blame]	1053	W[t] = SWAP_BE32(words[t]);
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1054	for (/t = 16/; t < 64; ++t)
				1055	W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1056
Denis Vlasenko	823f10b	2009-03-15 04:56:51 +0000	[diff] [blame]	1057	a = ctx->hash[0];
				1058	b = ctx->hash[1];
				1059	c = ctx->hash[2];
				1060	d = ctx->hash[3];
				1061	e = ctx->hash[4];
				1062	f = ctx->hash[5];
				1063	g = ctx->hash[6];
				1064	h = ctx->hash[7];
				1065
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1066	/* The actual computation according to FIPS 180-2:6.2.2 step 3. */
				1067	for (t = 0; t < 64; ++t) {
Denis Vlasenko	a2333c8	2009-03-28 19:08:23 +0000	[diff] [blame]	1068	/* Need to fetch upper half of sha_K[t]
				1069	* (I hope compiler is clever enough to just fetch
				1070	* upper half)
				1071	*/
Denys Vlasenko	b8935d0	2017-01-15 20:16:27 +0100	[diff] [blame]	1072	uint32_t K_t = NEED_SHA512 ? (sha_K[t] >> 32) : sha_K[t];
Denis Vlasenko	823f10b	2009-03-15 04:56:51 +0000	[diff] [blame]	1073	uint32_t T1 = h + S1(e) + Ch(e, f, g) + K_t + W[t];
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1074	uint32_t T2 = S0(a) + Maj(a, b, c);
				1075	h = g;
				1076	g = f;
				1077	f = e;
				1078	e = d + T1;
				1079	d = c;
				1080	c = b;
				1081	b = a;
				1082	a = T1 + T2;
				1083	}
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1084	#undef Ch
				1085	#undef Maj
				1086	#undef S0
				1087	#undef S1
				1088	#undef R0
				1089	#undef R1
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1090	/* Add the starting values of the context according to FIPS 180-2:6.2.2
				1091	step 4. */
				1092	ctx->hash[0] += a;
				1093	ctx->hash[1] += b;
				1094	ctx->hash[2] += c;
				1095	ctx->hash[3] += d;
				1096	ctx->hash[4] += e;
				1097	ctx->hash[5] += f;
				1098	ctx->hash[6] += g;
				1099	ctx->hash[7] += h;
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1100	}
Denis Vlasenko	823f10b	2009-03-15 04:56:51 +0000	[diff] [blame]	1101
Denys Vlasenko	b8935d0	2017-01-15 20:16:27 +0100	[diff] [blame]	1102	#if NEED_SHA512
Denis Vlasenko	8ec8d5e	2009-03-15 02:56:00 +0000	[diff] [blame]	1103	static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1104	{
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1105	unsigned t;
				1106	uint64_t W[80];
Denis Vlasenko	823f10b	2009-03-15 04:56:51 +0000	[diff] [blame]	1107	/* On i386, having assignments here (not later as sha256 does)
				1108	* produces 99 bytes smaller code with gcc 4.3.1
				1109	*/
Denis Vlasenko	cd2cd31	2009-03-12 15:40:27 +0000	[diff] [blame]	1110	uint64_t a = ctx->hash[0];
				1111	uint64_t b = ctx->hash[1];
				1112	uint64_t c = ctx->hash[2];
				1113	uint64_t d = ctx->hash[3];
				1114	uint64_t e = ctx->hash[4];
				1115	uint64_t f = ctx->hash[5];
				1116	uint64_t g = ctx->hash[6];
				1117	uint64_t h = ctx->hash[7];
Denis Vlasenko	8ec8d5e	2009-03-15 02:56:00 +0000	[diff] [blame]	1118	const uint64_t words = (uint64_t) ctx->wbuffer;
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1119
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1120	/* Operators defined in FIPS 180-2:4.1.2. */
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1121	#define Ch(x, y, z) ((x & y) ^ (~x & z))
				1122	#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
				1123	#define S0(x) (rotr64(x, 28) ^ rotr64(x, 34) ^ rotr64(x, 39))
				1124	#define S1(x) (rotr64(x, 14) ^ rotr64(x, 18) ^ rotr64(x, 41))
				1125	#define R0(x) (rotr64(x, 1) ^ rotr64(x, 8) ^ (x >> 7))
				1126	#define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
				1127
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1128	/* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */
Denys Vlasenko	4bc3b85	2010-10-16 23:31:15 +0200	[diff] [blame]	1129	for (t = 0; t < 16; ++t)
Denys Vlasenko	9ff50b8	2010-10-18 11:40:26 +0200	[diff] [blame]	1130	W[t] = SWAP_BE64(words[t]);
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1131	for (/t = 16/; t < 80; ++t)
				1132	W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1133
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1134	/* The actual computation according to FIPS 180-2:6.3.2 step 3. */
				1135	for (t = 0; t < 80; ++t) {
Denis Vlasenko	823f10b	2009-03-15 04:56:51 +0000	[diff] [blame]	1136	uint64_t T1 = h + S1(e) + Ch(e, f, g) + sha_K[t] + W[t];
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1137	uint64_t T2 = S0(a) + Maj(a, b, c);
				1138	h = g;
				1139	g = f;
				1140	f = e;
				1141	e = d + T1;
				1142	d = c;
				1143	c = b;
				1144	b = a;
				1145	a = T1 + T2;
				1146	}
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1147	#undef Ch
				1148	#undef Maj
				1149	#undef S0
				1150	#undef S1
				1151	#undef R0
				1152	#undef R1
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1153	/* Add the starting values of the context according to FIPS 180-2:6.3.2
				1154	step 4. */
				1155	ctx->hash[0] += a;
				1156	ctx->hash[1] += b;
				1157	ctx->hash[2] += c;
				1158	ctx->hash[3] += d;
				1159	ctx->hash[4] += e;
				1160	ctx->hash[5] += f;
				1161	ctx->hash[6] += g;
				1162	ctx->hash[7] += h;
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1163	}
Denys Vlasenko	b8935d0	2017-01-15 20:16:27 +0100	[diff] [blame]	1164	#endif /* NEED_SHA512 */
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1165
Denis Vlasenko	defc1ea	2008-06-27 02:52:20 +0000	[diff] [blame]	1166	void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
Rob Landley	5cf7c2d	2006-02-21 06:44:43 +0000	[diff] [blame]	1167	{
Rob Landley	5cf7c2d	2006-02-21 06:44:43 +0000	[diff] [blame]	1168	ctx->hash[0] = 0x67452301;
				1169	ctx->hash[1] = 0xefcdab89;
				1170	ctx->hash[2] = 0x98badcfe;
				1171	ctx->hash[3] = 0x10325476;
				1172	ctx->hash[4] = 0xc3d2e1f0;
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1173	ctx->total64 = 0;
				1174	ctx->process_block = sha1_process_block64;
Denys Vlasenko	711e20e	2022-01-07 00:43:59 +0100	[diff] [blame]	1175	#if ENABLE_SHA1_HWACCEL
Denys Vlasenko	a96ccbe	2022-01-07 01:32:13 +0100	[diff] [blame]	1176	# if defined(__GNUC__) && (defined(__i386__) \|\| defined(__x86_64__))
Denys Vlasenko	711e20e	2022-01-07 00:43:59 +0100	[diff] [blame]	1177	{
Denys Vlasenko	711e20e	2022-01-07 00:43:59 +0100	[diff] [blame]	1178	if (!shaNI) {
				1179	unsigned eax = 7, ebx = ebx, ecx = 0, edx = edx;
				1180	cpuid(&eax, &ebx, &ecx, &edx);
Denys Vlasenko	e7ff294	2022-01-08 01:25:23 +0100	[diff] [blame]	1181	shaNI = ((ebx >> 29) << 1) - 1;
Denys Vlasenko	711e20e	2022-01-07 00:43:59 +0100	[diff] [blame]	1182	}
				1183	if (shaNI > 0)
				1184	ctx->process_block = sha1_process_block64_shaNI;
				1185	}
				1186	# endif
				1187	#endif
Rob Landley	5cf7c2d	2006-02-21 06:44:43 +0000	[diff] [blame]	1188	}
				1189
Denys Vlasenko	965b795	2020-11-30 13:03:03 +0100	[diff] [blame]	1190	static const uint32_t init256[] ALIGN4 = {
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	1191	0,
				1192	0,
Denis Vlasenko	98c87f7	2009-03-11 21:15:51 +0000	[diff] [blame]	1193	0x6a09e667,
				1194	0xbb67ae85,
				1195	0x3c6ef372,
				1196	0xa54ff53a,
				1197	0x510e527f,
				1198	0x9b05688c,
				1199	0x1f83d9ab,
Denys Vlasenko	a971a19	2010-10-17 01:35:16 +0200	[diff] [blame]	1200	0x5be0cd19,
Denis Vlasenko	98c87f7	2009-03-11 21:15:51 +0000	[diff] [blame]	1201	};
Denys Vlasenko	b8935d0	2017-01-15 20:16:27 +0100	[diff] [blame]	1202	#if NEED_SHA512
Denys Vlasenko	965b795	2020-11-30 13:03:03 +0100	[diff] [blame]	1203	static const uint32_t init512_lo[] ALIGN4 = {
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	1204	0,
				1205	0,
Denis Vlasenko	98c87f7	2009-03-11 21:15:51 +0000	[diff] [blame]	1206	0xf3bcc908,
				1207	0x84caa73b,
				1208	0xfe94f82b,
				1209	0x5f1d36f1,
				1210	0xade682d1,
				1211	0x2b3e6c1f,
				1212	0xfb41bd6b,
Denys Vlasenko	a971a19	2010-10-17 01:35:16 +0200	[diff] [blame]	1213	0x137e2179,
Denis Vlasenko	98c87f7	2009-03-11 21:15:51 +0000	[diff] [blame]	1214	};
Denys Vlasenko	b8935d0	2017-01-15 20:16:27 +0100	[diff] [blame]	1215	#endif /* NEED_SHA512 */
Denis Vlasenko	823f10b	2009-03-15 04:56:51 +0000	[diff] [blame]	1216
Denys Vlasenko	f69f207	2018-11-26 13:00:28 +0100	[diff] [blame]	1217	// Note: SHA-384 is identical to SHA-512, except that initial hash values are
				1218	// 0xcbbb9d5dc1059ed8, 0x629a292a367cd507, 0x9159015a3070dd17, 0x152fecd8f70e5939,
				1219	// 0x67332667ffc00b31, 0x8eb44a8768581511, 0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4,
				1220	// and the output is constructed by omitting last two 64-bit words of it.
				1221
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1222	/* Initialize structure containing state of computation.
				1223	(FIPS 180-2:5.3.2) */
				1224	void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
				1225	{
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	1226	memcpy(&ctx->total64, init256, sizeof(init256));
				1227	/ctx->total64 = 0; - done by prepending two 32-bit zeros to init256 /
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1228	ctx->process_block = sha256_process_block64;
Denys Vlasenko	6472ac9	2022-02-03 14:15:20 +0100	[diff] [blame]	1229	#if ENABLE_SHA256_HWACCEL
				1230	# if defined(__GNUC__) && (defined(__i386__) \|\| defined(__x86_64__))
				1231	{
				1232	if (!shaNI) {
				1233	unsigned eax = 7, ebx = ebx, ecx = 0, edx = edx;
				1234	cpuid(&eax, &ebx, &ecx, &edx);
				1235	shaNI = ((ebx >> 29) << 1) - 1;
				1236	}
				1237	if (shaNI > 0)
				1238	ctx->process_block = sha256_process_block64_shaNI;
				1239	}
				1240	# endif
				1241	#endif
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1242	}
Denis Vlasenko	823f10b	2009-03-15 04:56:51 +0000	[diff] [blame]	1243
Denys Vlasenko	b8935d0	2017-01-15 20:16:27 +0100	[diff] [blame]	1244	#if NEED_SHA512
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1245	/* Initialize structure containing state of computation.
				1246	(FIPS 180-2:5.3.3) */
				1247	void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
				1248	{
Denis Vlasenko	98c87f7	2009-03-11 21:15:51 +0000	[diff] [blame]	1249	int i;
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	1250	/* Two extra iterations zero out ctx->total64[2] */
				1251	uint64_t *tp = ctx->total64;
Denys Vlasenko	abefc3c	2020-09-30 22:22:04 +0200	[diff] [blame]	1252	for (i = 0; i < 8 + 2; i++)
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	1253	tp[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i];
Denys Vlasenko	a971a19	2010-10-17 01:35:16 +0200	[diff] [blame]	1254	/ctx->total64[0] = ctx->total64[1] = 0; - already done /
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1255	}
				1256
Denys Vlasenko	c0683ac	2010-10-16 20:45:27 +0200	[diff] [blame]	1257	void FAST_FUNC sha512_hash(sha512_ctx_t ctx, const void buffer, size_t len)
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1258	{
Denys Vlasenko	273abcb	2010-10-16 22:43:34 +0200	[diff] [blame]	1259	unsigned bufpos = ctx->total64[0] & 127;
Denys Vlasenko	4bc3b85	2010-10-16 23:31:15 +0200	[diff] [blame]	1260	unsigned remaining;
Denis Vlasenko	cd2cd31	2009-03-12 15:40:27 +0000	[diff] [blame]	1261
				1262	/* First increment the byte count. FIPS 180-2 specifies the possible
				1263	length of the file up to 2^128 _bits_.
				1264	We compute the number of _bytes_ and convert to bits later. */
				1265	ctx->total64[0] += len;
				1266	if (ctx->total64[0] < len)
				1267	ctx->total64[1]++;
				1268
Denys Vlasenko	273abcb	2010-10-16 22:43:34 +0200	[diff] [blame]	1269	while (1) {
Denys Vlasenko	4bc3b85	2010-10-16 23:31:15 +0200	[diff] [blame]	1270	remaining = 128 - bufpos;
Denys Vlasenko	273abcb	2010-10-16 22:43:34 +0200	[diff] [blame]	1271	if (remaining > len)
				1272	remaining = len;
Denys Vlasenko	4bc3b85	2010-10-16 23:31:15 +0200	[diff] [blame]	1273	/* Copy data into aligned buffer */
Denys Vlasenko	273abcb	2010-10-16 22:43:34 +0200	[diff] [blame]	1274	memcpy(ctx->wbuffer + bufpos, buffer, remaining);
				1275	len -= remaining;
				1276	buffer = (const char *)buffer + remaining;
				1277	bufpos += remaining;
Denys Vlasenko	abefc3c	2020-09-30 22:22:04 +0200	[diff] [blame]	1278
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1279	/* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
Denys Vlasenko	273abcb	2010-10-16 22:43:34 +0200	[diff] [blame]	1280	bufpos -= 128;
				1281	if (bufpos != 0)
				1282	break;
Denys Vlasenko	abefc3c	2020-09-30 22:22:04 +0200	[diff] [blame]	1283
Denys Vlasenko	4bc3b85	2010-10-16 23:31:15 +0200	[diff] [blame]	1284	/* Buffer is filled up, process it */
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1285	sha512_process_block128(ctx);
Denys Vlasenko	273abcb	2010-10-16 22:43:34 +0200	[diff] [blame]	1286	/bufpos = 0; - already is /
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1287	}
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1288	}
Denys Vlasenko	b8935d0	2017-01-15 20:16:27 +0100	[diff] [blame]	1289	#endif /* NEED_SHA512 */
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1290
Denis Vlasenko	823f10b	2009-03-15 04:56:51 +0000	[diff] [blame]	1291	/* Used also for sha256 */
Denys Vlasenko	49ecee0	2017-01-24 16:00:54 +0100	[diff] [blame]	1292	unsigned FAST_FUNC sha1_end(sha1_ctx_t ctx, void resbuf)
Rob Landley	5cf7c2d	2006-02-21 06:44:43 +0000	[diff] [blame]	1293	{
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	1294	unsigned hash_size;
Rob Landley	5cf7c2d	2006-02-21 06:44:43 +0000	[diff] [blame]	1295
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	1296	/* SHA stores total in BE, need to swap on LE arches: */
Denys Vlasenko	302ad14	2010-10-19 02:16:12 +0200	[diff] [blame]	1297	common64_end(ctx, /swap_needed:/ BB_LITTLE_ENDIAN);
Rob Landley	5cf7c2d	2006-02-21 06:44:43 +0000	[diff] [blame]	1298
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	1299	hash_size = (ctx->process_block == sha1_process_block64) ? 5 : 8;
Denis Vlasenko	c8329c9	2009-03-12 19:06:18 +0000	[diff] [blame]	1300	/* This way we do not impose alignment constraints on resbuf: */
Denys Vlasenko	245a4f8	2009-11-07 01:31:14 +0100	[diff] [blame]	1301	if (BB_LITTLE_ENDIAN) {
				1302	unsigned i;
Denys Vlasenko	c48a5c6	2010-10-18 14:48:30 +0200	[diff] [blame]	1303	for (i = 0; i < hash_size; ++i)
Denys Vlasenko	b102e12	2010-10-18 11:39:47 +0200	[diff] [blame]	1304	ctx->hash[i] = SWAP_BE32(ctx->hash[i]);
Denys Vlasenko	245a4f8	2009-11-07 01:31:14 +0100	[diff] [blame]	1305	}
Denys Vlasenko	49ecee0	2017-01-24 16:00:54 +0100	[diff] [blame]	1306	hash_size *= sizeof(ctx->hash[0]);
				1307	memcpy(resbuf, ctx->hash, hash_size);
				1308	return hash_size;
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1309	}
				1310
Denys Vlasenko	b8935d0	2017-01-15 20:16:27 +0100	[diff] [blame]	1311	#if NEED_SHA512
Denys Vlasenko	49ecee0	2017-01-24 16:00:54 +0100	[diff] [blame]	1312	unsigned FAST_FUNC sha512_end(sha512_ctx_t ctx, void resbuf)
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1313	{
Denys Vlasenko	36ab585	2010-10-17 03:00:36 +0200	[diff] [blame]	1314	unsigned bufpos = ctx->total64[0] & 127;
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1315
Denys Vlasenko	36ab585	2010-10-17 03:00:36 +0200	[diff] [blame]	1316	/* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... */
Denys Vlasenko	273abcb	2010-10-16 22:43:34 +0200	[diff] [blame]	1317	ctx->wbuffer[bufpos++] = 0x80;
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1318
Denis Vlasenko	c8329c9	2009-03-12 19:06:18 +0000	[diff] [blame]	1319	while (1) {
Denys Vlasenko	f6dacc2	2010-10-17 03:21:51 +0200	[diff] [blame]	1320	unsigned remaining = 128 - bufpos;
				1321	memset(ctx->wbuffer + bufpos, 0, remaining);
				1322	if (remaining >= 16) {
Denis Vlasenko	c8329c9	2009-03-12 19:06:18 +0000	[diff] [blame]	1323	/* Store the 128-bit counter of bits in the buffer in BE format */
				1324	uint64_t t;
				1325	t = ctx->total64[0] << 3;
Denys Vlasenko	9ff50b8	2010-10-18 11:40:26 +0200	[diff] [blame]	1326	t = SWAP_BE64(t);
Denys Vlasenko	1f5e81f	2013-06-27 01:03:19 +0200	[diff] [blame]	1327	(bb__aliased_uint64_t ) (&ctx->wbuffer[128 - 8]) = t;
Denis Vlasenko	c8329c9	2009-03-12 19:06:18 +0000	[diff] [blame]	1328	t = (ctx->total64[1] << 3) \| (ctx->total64[0] >> 61);
Denys Vlasenko	9ff50b8	2010-10-18 11:40:26 +0200	[diff] [blame]	1329	t = SWAP_BE64(t);
Denys Vlasenko	1f5e81f	2013-06-27 01:03:19 +0200	[diff] [blame]	1330	(bb__aliased_uint64_t ) (&ctx->wbuffer[128 - 16]) = t;
Denis Vlasenko	c8329c9	2009-03-12 19:06:18 +0000	[diff] [blame]	1331	}
Denis Vlasenko	e9afc46	2009-03-15 02:28:05 +0000	[diff] [blame]	1332	sha512_process_block128(ctx);
Denys Vlasenko	f6dacc2	2010-10-17 03:21:51 +0200	[diff] [blame]	1333	if (remaining >= 16)
Denis Vlasenko	c8329c9	2009-03-12 19:06:18 +0000	[diff] [blame]	1334	break;
Denys Vlasenko	36ab585	2010-10-17 03:00:36 +0200	[diff] [blame]	1335	bufpos = 0;
Denis Vlasenko	c8329c9	2009-03-12 19:06:18 +0000	[diff] [blame]	1336	}
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1337
Denys Vlasenko	245a4f8	2009-11-07 01:31:14 +0100	[diff] [blame]	1338	if (BB_LITTLE_ENDIAN) {
				1339	unsigned i;
				1340	for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
Denys Vlasenko	9ff50b8	2010-10-18 11:40:26 +0200	[diff] [blame]	1341	ctx->hash[i] = SWAP_BE64(ctx->hash[i]);
Denys Vlasenko	245a4f8	2009-11-07 01:31:14 +0100	[diff] [blame]	1342	}
Denis Vlasenko	cd2cd31	2009-03-12 15:40:27 +0000	[diff] [blame]	1343	memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
Denys Vlasenko	49ecee0	2017-01-24 16:00:54 +0100	[diff] [blame]	1344	return sizeof(ctx->hash);
Denis Vlasenko	56dceb9	2008-11-10 13:32:50 +0000	[diff] [blame]	1345	}
Denys Vlasenko	b8935d0	2017-01-15 20:16:27 +0100	[diff] [blame]	1346	#endif /* NEED_SHA512 */
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1347
				1348
				1349	/*
				1350	* The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
				1351	* Michael Peeters and Gilles Van Assche. For more information, feedback or
				1352	* questions, please refer to our website: http://keccak.noekeon.org/
				1353	*
				1354	* Implementation by Ronny Van Keer,
				1355	* hereby denoted as "the implementer".
				1356	*
				1357	* To the extent possible under law, the implementer has waived all copyright
				1358	* and related or neighboring rights to the source code in this file.
				1359	* http://creativecommons.org/publicdomain/zero/1.0/
				1360	*
				1361	* Busybox modifications (C) Lauri Kasanen, under the GPLv2.
				1362	*/
				1363
Denys Vlasenko	30a8652	2013-01-15 01:12:26 +0100	[diff] [blame]	1364	#if CONFIG_SHA3_SMALL < 0
				1365	# define SHA3_SMALL 0
				1366	#elif CONFIG_SHA3_SMALL > 1
				1367	# define SHA3_SMALL 1
				1368	#else
				1369	# define SHA3_SMALL CONFIG_SHA3_SMALL
				1370	#endif
				1371
Denys Vlasenko	2a563ea	2014-07-25 17:24:13 +0200	[diff] [blame]	1372	#define OPTIMIZE_SHA3_FOR_32 0
				1373	/*
				1374	* SHA3 can be optimized for 32-bit CPUs with bit-slicing:
				1375	* every 64-bit word of state[] can be split into two 32-bit words
				1376	* by even/odd bits. In this form, all rotations of sha3 round
				1377	* are 32-bit - and there are lots of them.
				1378	* However, it requires either splitting/combining state words
				1379	* before/after sha3 round (code does this now)
				1380	* or shuffling bits before xor'ing them into state and in sha3_end.
				1381	* Without shuffling, bit-slicing results in -130 bytes of code
				1382	* and marginal speedup (but of course it gives wrong result).
				1383	* With shuffling it works, but +260 code bytes, and slower.
				1384	* Disabled for now:
				1385	*/
				1386	#if 0 /* LONG_MAX == 0x7fffffff */
				1387	# undef OPTIMIZE_SHA3_FOR_32
				1388	# define OPTIMIZE_SHA3_FOR_32 1
				1389	#endif
				1390
Denys Vlasenko	2a563ea	2014-07-25 17:24:13 +0200	[diff] [blame]	1391	#if OPTIMIZE_SHA3_FOR_32
				1392	/* This splits every 64-bit word into a pair of 32-bit words,
				1393	* even bits go into first word, odd bits go to second one.
				1394	* The conversion is done in-place.
				1395	*/
				1396	static void split_halves(uint64_t *state)
				1397	{
				1398	/* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
				1399	uint32_t s32 = (uint32_t)state;
				1400	uint32_t t, x0, x1;
				1401	int i;
				1402	for (i = 24; i >= 0; --i) {
				1403	x0 = s32[0];
				1404	t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
				1405	t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
				1406	t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
				1407	t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
				1408	x1 = s32[1];
				1409	t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
				1410	t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
				1411	t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
				1412	t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
				1413	*s32++ = (x0 & 0x0000FFFF) \| (x1 << 16);
				1414	*s32++ = (x0 >> 16) \| (x1 & 0xFFFF0000);
				1415	}
				1416	}
				1417	/* The reverse operation */
				1418	static void combine_halves(uint64_t *state)
				1419	{
				1420	uint32_t s32 = (uint32_t)state;
				1421	uint32_t t, x0, x1;
				1422	int i;
				1423	for (i = 24; i >= 0; --i) {
				1424	x0 = s32[0];
				1425	x1 = s32[1];
				1426	t = (x0 & 0x0000FFFF) \| (x1 << 16);
				1427	x1 = (x0 >> 16) \| (x1 & 0xFFFF0000);
				1428	x0 = t;
				1429	t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
				1430	t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
				1431	t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
				1432	t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
				1433	*s32++ = x0;
				1434	t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
				1435	t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
				1436	t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
				1437	t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
				1438	*s32++ = x1;
				1439	}
				1440	}
				1441	#endif
				1442
Denys Vlasenko	5368fe5	2013-01-16 02:20:31 +0100	[diff] [blame]	1443	/*
				1444	* In the crypto literature this function is usually called Keccak-f().
Denys Vlasenko	ac4100e	2013-01-15 16:27:39 +0100	[diff] [blame]	1445	*/
Denys Vlasenko	e4f0f26	2013-01-16 12:23:23 +0100	[diff] [blame]	1446	static void sha3_process_block72(uint64_t *state)
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1447	{
Denys Vlasenko	5368fe5	2013-01-16 02:20:31 +0100	[diff] [blame]	1448	enum { NROUNDS = 24 };
				1449
Denys Vlasenko	2a563ea	2014-07-25 17:24:13 +0200	[diff] [blame]	1450	#if OPTIMIZE_SHA3_FOR_32
				1451	/*
Denys Vlasenko	965b795	2020-11-30 13:03:03 +0100	[diff] [blame]	1452	static const uint32_t IOTA_CONST_0[NROUNDS] ALIGN4 = {
Denys Vlasenko	2a563ea	2014-07-25 17:24:13 +0200	[diff] [blame]	1453	0x00000001UL,
				1454	0x00000000UL,
				1455	0x00000000UL,
				1456	0x00000000UL,
				1457	0x00000001UL,
				1458	0x00000001UL,
				1459	0x00000001UL,
				1460	0x00000001UL,
				1461	0x00000000UL,
				1462	0x00000000UL,
				1463	0x00000001UL,
				1464	0x00000000UL,
				1465	0x00000001UL,
				1466	0x00000001UL,
				1467	0x00000001UL,
				1468	0x00000001UL,
				1469	0x00000000UL,
				1470	0x00000000UL,
				1471	0x00000000UL,
				1472	0x00000000UL,
				1473	0x00000001UL,
				1474	0x00000000UL,
				1475	0x00000001UL,
				1476	0x00000000UL,
				1477	};
				1478	** bits are in lsb: 0101 0000 1111 0100 1111 0001
				1479	*/
				1480	uint32_t IOTA_CONST_0bits = (uint32_t)(0x0050f4f1);
Denys Vlasenko	965b795	2020-11-30 13:03:03 +0100	[diff] [blame]	1481	static const uint32_t IOTA_CONST_1[NROUNDS] ALIGN4 = {
Denys Vlasenko	2a563ea	2014-07-25 17:24:13 +0200	[diff] [blame]	1482	0x00000000UL,
				1483	0x00000089UL,
				1484	0x8000008bUL,
				1485	0x80008080UL,
				1486	0x0000008bUL,
				1487	0x00008000UL,
				1488	0x80008088UL,
				1489	0x80000082UL,
				1490	0x0000000bUL,
				1491	0x0000000aUL,
				1492	0x00008082UL,
				1493	0x00008003UL,
				1494	0x0000808bUL,
				1495	0x8000000bUL,
				1496	0x8000008aUL,
				1497	0x80000081UL,
				1498	0x80000081UL,
				1499	0x80000008UL,
				1500	0x00000083UL,
				1501	0x80008003UL,
				1502	0x80008088UL,
				1503	0x80000088UL,
				1504	0x00008000UL,
				1505	0x80008082UL,
				1506	};
				1507
				1508	uint32_t const s32 = (uint32_t)state;
				1509	unsigned round;
				1510
				1511	split_halves(state);
				1512
				1513	for (round = 0; round < NROUNDS; round++) {
				1514	unsigned x;
				1515
				1516	/* Theta */
				1517	{
				1518	uint32_t BC[20];
				1519	for (x = 0; x < 10; ++x) {
				1520	BC[x+10] = BC[x] = s32[x]^s32[x+10]^s32[x+20]^s32[x+30]^s32[x+40];
				1521	}
				1522	for (x = 0; x < 10; x += 2) {
				1523	uint32_t ta, tb;
				1524	ta = BC[x+8] ^ rotl32(BC[x+3], 1);
				1525	tb = BC[x+9] ^ BC[x+2];
				1526	s32[x+0] ^= ta;
				1527	s32[x+1] ^= tb;
				1528	s32[x+10] ^= ta;
				1529	s32[x+11] ^= tb;
				1530	s32[x+20] ^= ta;
				1531	s32[x+21] ^= tb;
				1532	s32[x+30] ^= ta;
				1533	s32[x+31] ^= tb;
				1534	s32[x+40] ^= ta;
				1535	s32[x+41] ^= tb;
				1536	}
				1537	}
				1538	/* RhoPi */
				1539	{
				1540	uint32_t t0a,t0b, t1a,t1b;
				1541	t1a = s32[1*2+0];
				1542	t1b = s32[1*2+1];
				1543
				1544	#define RhoPi(PI_LANE, ROT_CONST) \
				1545	t0a = s32[PI_LANE*2+0];\
				1546	t0b = s32[PI_LANE*2+1];\
				1547	if (ROT_CONST & 1) {\
				1548	s32[PI_LANE*2+0] = rotl32(t1b, ROT_CONST/2+1);\
				1549	s32[PI_LANE*2+1] = ROT_CONST == 1 ? t1a : rotl32(t1a, ROT_CONST/2+0);\
				1550	} else {\
				1551	s32[PI_LANE*2+0] = rotl32(t1a, ROT_CONST/2);\
				1552	s32[PI_LANE*2+1] = rotl32(t1b, ROT_CONST/2);\
				1553	}\
				1554	t1a = t0a; t1b = t0b;
				1555
				1556	RhoPi(10, 1)
				1557	RhoPi( 7, 3)
				1558	RhoPi(11, 6)
				1559	RhoPi(17,10)
				1560	RhoPi(18,15)
				1561	RhoPi( 3,21)
				1562	RhoPi( 5,28)
				1563	RhoPi(16,36)
				1564	RhoPi( 8,45)
				1565	RhoPi(21,55)
				1566	RhoPi(24, 2)
				1567	RhoPi( 4,14)
				1568	RhoPi(15,27)
				1569	RhoPi(23,41)
				1570	RhoPi(19,56)
				1571	RhoPi(13, 8)
				1572	RhoPi(12,25)
				1573	RhoPi( 2,43)
				1574	RhoPi(20,62)
				1575	RhoPi(14,18)
				1576	RhoPi(22,39)
				1577	RhoPi( 9,61)
				1578	RhoPi( 6,20)
				1579	RhoPi( 1,44)
				1580	#undef RhoPi
				1581	}
				1582	/* Chi */
Denys Vlasenko	4ff933c	2014-07-30 14:18:57 +0200	[diff] [blame]	1583	for (x = 0; x <= 40;) {
				1584	uint32_t BC0, BC1, BC2, BC3, BC4;
				1585	BC0 = s32[x + 0*2];
				1586	BC1 = s32[x + 1*2];
				1587	BC2 = s32[x + 2*2];
				1588	s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
				1589	BC3 = s32[x + 3*2];
				1590	s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
				1591	BC4 = s32[x + 4*2];
				1592	s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
				1593	s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
				1594	s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
				1595	x++;
				1596	BC0 = s32[x + 0*2];
				1597	BC1 = s32[x + 1*2];
				1598	BC2 = s32[x + 2*2];
				1599	s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
				1600	BC3 = s32[x + 3*2];
				1601	s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
				1602	BC4 = s32[x + 4*2];
				1603	s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
				1604	s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
				1605	s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
				1606	x += 9;
Denys Vlasenko	2a563ea	2014-07-25 17:24:13 +0200	[diff] [blame]	1607	}
				1608	/* Iota */
				1609	s32[0] ^= IOTA_CONST_0bits & 1;
				1610	IOTA_CONST_0bits >>= 1;
				1611	s32[1] ^= IOTA_CONST_1[round];
				1612	}
				1613
				1614	combine_halves(state);
				1615	#else
Denys Vlasenko	09a0e22	2014-07-30 16:26:09 +0200	[diff] [blame]	1616	/* Native 64-bit algorithm */
Denys Vlasenko	965b795	2020-11-30 13:03:03 +0100	[diff] [blame]	1617	static const uint16_t IOTA_CONST[NROUNDS] ALIGN2 = {
Denys Vlasenko	09a0e22	2014-07-30 16:26:09 +0200	[diff] [blame]	1618	/* Elements should be 64-bit, but top half is always zero
				1619	* or 0x80000000. We encode 63rd bits in a separate word below.
				1620	* Same is true for 31th bits, which lets us use 16-bit table
				1621	* instead of 64-bit. The speed penalty is lost in the noise.
				1622	*/
Denys Vlasenko	5368fe5	2013-01-16 02:20:31 +0100	[diff] [blame]	1623	0x0001,
				1624	0x8082,
				1625	0x808a,
				1626	0x8000,
				1627	0x808b,
				1628	0x0001,
				1629	0x8081,
				1630	0x8009,
				1631	0x008a,
				1632	0x0088,
				1633	0x8009,
				1634	0x000a,
				1635	0x808b,
				1636	0x008b,
				1637	0x8089,
				1638	0x8003,
				1639	0x8002,
				1640	0x0080,
				1641	0x800a,
				1642	0x000a,
				1643	0x8081,
				1644	0x8080,
				1645	0x0001,
				1646	0x8008,
				1647	};
				1648	/* bit for CONST[0] is in msb: 0011 0011 0000 0111 1101 1101 */
				1649	const uint32_t IOTA_CONST_bit63 = (uint32_t)(0x3307dd00);
				1650	/* bit for CONST[0] is in msb: 0001 0110 0011 1000 0001 1011 */
				1651	const uint32_t IOTA_CONST_bit31 = (uint32_t)(0x16381b00);
				1652
Denys Vlasenko	965b795	2020-11-30 13:03:03 +0100	[diff] [blame]	1653	static const uint8_t ROT_CONST[24] ALIGN1 = {
Denys Vlasenko	5368fe5	2013-01-16 02:20:31 +0100	[diff] [blame]	1654	1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
				1655	27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
				1656	};
Denys Vlasenko	965b795	2020-11-30 13:03:03 +0100	[diff] [blame]	1657	static const uint8_t PI_LANE[24] ALIGN1 = {
Denys Vlasenko	5368fe5	2013-01-16 02:20:31 +0100	[diff] [blame]	1658	10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
				1659	15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
				1660	};
Denys Vlasenko	965b795	2020-11-30 13:03:03 +0100	[diff] [blame]	1661	/static const uint8_t MOD5[10] ALIGN1 = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, };/
Denys Vlasenko	8fb3ab5	2013-01-15 22:07:48 +0100	[diff] [blame]	1662
Denys Vlasenko	2a563ea	2014-07-25 17:24:13 +0200	[diff] [blame]	1663	unsigned x;
Denys Vlasenko	5b7f50f	2013-01-15 19:52:30 +0100	[diff] [blame]	1664	unsigned round;
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1665
				1666	if (BB_BIG_ENDIAN) {
				1667	for (x = 0; x < 25; x++) {
				1668	state[x] = SWAP_LE64(state[x]);
				1669	}
				1670	}
				1671
Denys Vlasenko	5368fe5	2013-01-16 02:20:31 +0100	[diff] [blame]	1672	for (round = 0; round < NROUNDS; ++round) {
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1673	/* Theta */
Denys Vlasenko	30a8652	2013-01-15 01:12:26 +0100	[diff] [blame]	1674	{
Denys Vlasenko	a55df27	2013-01-15 15:22:30 +0100	[diff] [blame]	1675	uint64_t BC[10];
Denys Vlasenko	30a8652	2013-01-15 01:12:26 +0100	[diff] [blame]	1676	for (x = 0; x < 5; ++x) {
Denys Vlasenko	a55df27	2013-01-15 15:22:30 +0100	[diff] [blame]	1677	BC[x + 5] = BC[x] = state[x]
				1678	^ state[x + 5] ^ state[x + 10]
				1679	^ state[x + 15] ^ state[x + 20];
Denys Vlasenko	30a8652	2013-01-15 01:12:26 +0100	[diff] [blame]	1680	}
Denys Vlasenko	a55df27	2013-01-15 15:22:30 +0100	[diff] [blame]	1681	/* Using 2x5 vector above eliminates the need to use
Denys Vlasenko	5b7f50f	2013-01-15 19:52:30 +0100	[diff] [blame]	1682	* BC[MOD5[x+N]] trick below to fetch BC[(x+N) % 5],
Denys Vlasenko	a55df27	2013-01-15 15:22:30 +0100	[diff] [blame]	1683	* and the code is a bit _smaller_.
				1684	*/
Denys Vlasenko	30a8652	2013-01-15 01:12:26 +0100	[diff] [blame]	1685	for (x = 0; x < 5; ++x) {
Denys Vlasenko	a55df27	2013-01-15 15:22:30 +0100	[diff] [blame]	1686	uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1);
Denys Vlasenko	8fb3ab5	2013-01-15 22:07:48 +0100	[diff] [blame]	1687	state[x] ^= temp;
				1688	state[x + 5] ^= temp;
				1689	state[x + 10] ^= temp;
				1690	state[x + 15] ^= temp;
				1691	state[x + 20] ^= temp;
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1692	}
				1693	}
				1694
				1695	/* Rho Pi */
Denys Vlasenko	30a8652	2013-01-15 01:12:26 +0100	[diff] [blame]	1696	if (SHA3_SMALL) {
				1697	uint64_t t1 = state[1];
				1698	for (x = 0; x < 24; ++x) {
Denys Vlasenko	5368fe5	2013-01-16 02:20:31 +0100	[diff] [blame]	1699	uint64_t t0 = state[PI_LANE[x]];
				1700	state[PI_LANE[x]] = rotl64(t1, ROT_CONST[x]);
Denys Vlasenko	30a8652	2013-01-15 01:12:26 +0100	[diff] [blame]	1701	t1 = t0;
				1702	}
				1703	} else {
Denys Vlasenko	a55df27	2013-01-15 15:22:30 +0100	[diff] [blame]	1704	/* Especially large benefit for 32-bit arch (75% faster):
Denys Vlasenko	30a8652	2013-01-15 01:12:26 +0100	[diff] [blame]	1705	* 64-bit rotations by non-constant usually are SLOW on those.
				1706	* We resort to unrolling here.
Denys Vlasenko	5368fe5	2013-01-16 02:20:31 +0100	[diff] [blame]	1707	* This optimizes out PI_LANE[] and ROT_CONST[],
Denys Vlasenko	30a8652	2013-01-15 01:12:26 +0100	[diff] [blame]	1708	* but generates 300-500 more bytes of code.
				1709	*/
				1710	uint64_t t0;
				1711	uint64_t t1 = state[1];
				1712	#define RhoPi_twice(x) \
Denys Vlasenko	5368fe5	2013-01-16 02:20:31 +0100	[diff] [blame]	1713	t0 = state[PI_LANE[x ]]; \
				1714	state[PI_LANE[x ]] = rotl64(t1, ROT_CONST[x ]); \
				1715	t1 = state[PI_LANE[x+1]]; \
				1716	state[PI_LANE[x+1]] = rotl64(t0, ROT_CONST[x+1]);
Denys Vlasenko	30a8652	2013-01-15 01:12:26 +0100	[diff] [blame]	1717	RhoPi_twice(0); RhoPi_twice(2);
				1718	RhoPi_twice(4); RhoPi_twice(6);
				1719	RhoPi_twice(8); RhoPi_twice(10);
				1720	RhoPi_twice(12); RhoPi_twice(14);
				1721	RhoPi_twice(16); RhoPi_twice(18);
				1722	RhoPi_twice(20); RhoPi_twice(22);
				1723	#undef RhoPi_twice
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1724	}
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1725	/* Chi */
Denys Vlasenko	09a0e22	2014-07-30 16:26:09 +0200	[diff] [blame]	1726	# if LONG_MAX > 0x7fffffff
Denys Vlasenko	2a563ea	2014-07-25 17:24:13 +0200	[diff] [blame]	1727	for (x = 0; x <= 20; x += 5) {
Denys Vlasenko	8fb3ab5	2013-01-15 22:07:48 +0100	[diff] [blame]	1728	uint64_t BC0, BC1, BC2, BC3, BC4;
Denys Vlasenko	2a563ea	2014-07-25 17:24:13 +0200	[diff] [blame]	1729	BC0 = state[x + 0];
				1730	BC1 = state[x + 1];
				1731	BC2 = state[x + 2];
				1732	state[x + 0] = BC0 ^ ((~BC1) & BC2);
				1733	BC3 = state[x + 3];
				1734	state[x + 1] = BC1 ^ ((~BC2) & BC3);
				1735	BC4 = state[x + 4];
				1736	state[x + 2] = BC2 ^ ((~BC3) & BC4);
				1737	state[x + 3] = BC3 ^ ((~BC4) & BC0);
				1738	state[x + 4] = BC4 ^ ((~BC0) & BC1);
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1739	}
Denys Vlasenko	09a0e22	2014-07-30 16:26:09 +0200	[diff] [blame]	1740	# else
Denys Vlasenko	4ff933c	2014-07-30 14:18:57 +0200	[diff] [blame]	1741	/* Reduced register pressure version
				1742	* for register-starved 32-bit arches
				1743	* (i386: -95 bytes, and it is _faster_)
				1744	*/
				1745	for (x = 0; x <= 40;) {
				1746	uint32_t BC0, BC1, BC2, BC3, BC4;
				1747	uint32_t const s32 = (uint32_t)state;
Denys Vlasenko	09a0e22	2014-07-30 16:26:09 +0200	[diff] [blame]	1748	# if SHA3_SMALL
Denys Vlasenko	4ff933c	2014-07-30 14:18:57 +0200	[diff] [blame]	1749	do_half:
Denys Vlasenko	09a0e22	2014-07-30 16:26:09 +0200	[diff] [blame]	1750	# endif
Denys Vlasenko	4ff933c	2014-07-30 14:18:57 +0200	[diff] [blame]	1751	BC0 = s32[x + 0*2];
				1752	BC1 = s32[x + 1*2];
				1753	BC2 = s32[x + 2*2];
				1754	s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
				1755	BC3 = s32[x + 3*2];
				1756	s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
				1757	BC4 = s32[x + 4*2];
				1758	s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
				1759	s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
				1760	s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
				1761	x++;
Denys Vlasenko	09a0e22	2014-07-30 16:26:09 +0200	[diff] [blame]	1762	# if SHA3_SMALL
Denys Vlasenko	4ff933c	2014-07-30 14:18:57 +0200	[diff] [blame]	1763	if (x & 1)
				1764	goto do_half;
				1765	x += 8;
Denys Vlasenko	09a0e22	2014-07-30 16:26:09 +0200	[diff] [blame]	1766	# else
Denys Vlasenko	4ff933c	2014-07-30 14:18:57 +0200	[diff] [blame]	1767	BC0 = s32[x + 0*2];
				1768	BC1 = s32[x + 1*2];
				1769	BC2 = s32[x + 2*2];
				1770	s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
				1771	BC3 = s32[x + 3*2];
				1772	s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
				1773	BC4 = s32[x + 4*2];
				1774	s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
				1775	s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
				1776	s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
				1777	x += 9;
Denys Vlasenko	09a0e22	2014-07-30 16:26:09 +0200	[diff] [blame]	1778	# endif
Denys Vlasenko	4ff933c	2014-07-30 14:18:57 +0200	[diff] [blame]	1779	}
Denys Vlasenko	09a0e22	2014-07-30 16:26:09 +0200	[diff] [blame]	1780	# endif /* long is 32-bit */
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1781	/* Iota */
Denys Vlasenko	5368fe5	2013-01-16 02:20:31 +0100	[diff] [blame]	1782	state[0] ^= IOTA_CONST[round]
				1783	\| (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000)
				1784	\| (uint64_t)((IOTA_CONST_bit63 << round) & 0x80000000) << 32;
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1785	}
				1786
				1787	if (BB_BIG_ENDIAN) {
				1788	for (x = 0; x < 25; x++) {
				1789	state[x] = SWAP_LE64(state[x]);
				1790	}
				1791	}
Denys Vlasenko	2a563ea	2014-07-25 17:24:13 +0200	[diff] [blame]	1792	#endif
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1793	}
				1794
				1795	void FAST_FUNC sha3_begin(sha3_ctx_t *ctx)
				1796	{
				1797	memset(ctx, 0, sizeof(*ctx));
Denys Vlasenko	71a090f	2016-08-29 14:05:25 +0200	[diff] [blame]	1798	/* SHA3-512, user can override */
				1799	ctx->input_block_bytes = (1600 - 5122) / 8; / 72 bytes */
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1800	}
				1801
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1802	void FAST_FUNC sha3_hash(sha3_ctx_t ctx, const void buffer, size_t len)
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1803	{
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1804	#if SHA3_SMALL
				1805	const uint8_t *data = buffer;
				1806	unsigned bufpos = ctx->bytes_queued;
				1807
				1808	while (1) {
Denys Vlasenko	71a090f	2016-08-29 14:05:25 +0200	[diff] [blame]	1809	unsigned remaining = ctx->input_block_bytes - bufpos;
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1810	if (remaining > len)
				1811	remaining = len;
				1812	len -= remaining;
				1813	/* XOR data into buffer */
				1814	while (remaining != 0) {
				1815	uint8_t buf = (uint8_t)ctx->state;
				1816	buf[bufpos] ^= *data++;
				1817	bufpos++;
				1818	remaining--;
				1819	}
Denys Vlasenko	abefc3c	2020-09-30 22:22:04 +0200	[diff] [blame]	1820
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1821	/* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
Denys Vlasenko	71a090f	2016-08-29 14:05:25 +0200	[diff] [blame]	1822	bufpos -= ctx->input_block_bytes;
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1823	if (bufpos != 0)
				1824	break;
Denys Vlasenko	abefc3c	2020-09-30 22:22:04 +0200	[diff] [blame]	1825
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1826	/* Buffer is filled up, process it */
				1827	sha3_process_block72(ctx->state);
				1828	/bufpos = 0; - already is /
				1829	}
Denys Vlasenko	71a090f	2016-08-29 14:05:25 +0200	[diff] [blame]	1830	ctx->bytes_queued = bufpos + ctx->input_block_bytes;
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1831	#else
				1832	/* +50 bytes code size, but a bit faster because of long-sized XORs */
				1833	const uint8_t *data = buffer;
				1834	unsigned bufpos = ctx->bytes_queued;
Denys Vlasenko	71a090f	2016-08-29 14:05:25 +0200	[diff] [blame]	1835	unsigned iblk_bytes = ctx->input_block_bytes;
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1836
				1837	/* If already data in queue, continue queuing first */
Denys Vlasenko	71a090f	2016-08-29 14:05:25 +0200	[diff] [blame]	1838	if (bufpos != 0) {
				1839	while (len != 0) {
				1840	uint8_t buf = (uint8_t)ctx->state;
				1841	buf[bufpos] ^= *data++;
				1842	len--;
				1843	bufpos++;
				1844	if (bufpos == iblk_bytes) {
				1845	bufpos = 0;
				1846	goto do_block;
				1847	}
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1848	}
				1849	}
				1850
				1851	/* Absorb complete blocks */
Denys Vlasenko	71a090f	2016-08-29 14:05:25 +0200	[diff] [blame]	1852	while (len >= iblk_bytes) {
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1853	/* XOR data onto beginning of state[].
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1854	* We try to be efficient - operate one word at a time, not byte.
				1855	* Careful wrt unaligned access: can't just use "(long)data"!
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1856	*/
Denys Vlasenko	71a090f	2016-08-29 14:05:25 +0200	[diff] [blame]	1857	unsigned count = iblk_bytes / sizeof(long);
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1858	long buf = (long)ctx->state;
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1859	do {
				1860	long v;
				1861	move_from_unaligned_long(v, (long*)data);
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1862	*buf++ ^= v;
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1863	data += sizeof(long);
				1864	} while (--count);
Denys Vlasenko	71a090f	2016-08-29 14:05:25 +0200	[diff] [blame]	1865	len -= iblk_bytes;
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1866	do_block:
Denys Vlasenko	e4f0f26	2013-01-16 12:23:23 +0100	[diff] [blame]	1867	sha3_process_block72(ctx->state);
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1868	}
				1869
				1870	/* Queue remaining data bytes */
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1871	while (len != 0) {
				1872	uint8_t buf = (uint8_t)ctx->state;
				1873	buf[bufpos] ^= *data++;
				1874	bufpos++;
				1875	len--;
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1876	}
Denys Vlasenko	970aa6b	2013-01-15 22:19:24 +0100	[diff] [blame]	1877
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1878	ctx->bytes_queued = bufpos;
				1879	#endif
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1880	}
				1881
Denys Vlasenko	49ecee0	2017-01-24 16:00:54 +0100	[diff] [blame]	1882	unsigned FAST_FUNC sha3_end(sha3_ctx_t ctx, void resbuf)
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1883	{
				1884	/* Padding */
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1885	uint8_t buf = (uint8_t)ctx->state;
Denys Vlasenko	71a090f	2016-08-29 14:05:25 +0200	[diff] [blame]	1886	/*
				1887	* Keccak block padding is: add 1 bit after last bit of input,
				1888	* then add zero bits until the end of block, and add the last 1 bit
				1889	* (the last bit in the block) - the "10*1" pattern.
				1890	* SHA3 standard appends additional two bits, 01, before that padding:
				1891	*
				1892	* SHA3-224(M) = KECCAK[448](M\|\|01, 224)
				1893	* SHA3-256(M) = KECCAK[512](M\|\|01, 256)
				1894	* SHA3-384(M) = KECCAK[768](M\|\|01, 384)
				1895	* SHA3-512(M) = KECCAK[1024](M\|\|01, 512)
				1896	* (M is the input, \|\| is bit concatenation)
				1897	*
				1898	* The 6 below contains 01 "SHA3" bits and the first 1 "Keccak" bit:
				1899	*/
				1900	buf[ctx->bytes_queued] ^= 6; /* bit pattern 00000110 */
				1901	buf[ctx->input_block_bytes - 1] ^= 0x80;
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1902
Denys Vlasenko	e4f0f26	2013-01-16 12:23:23 +0100	[diff] [blame]	1903	sha3_process_block72(ctx->state);
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1904
				1905	/* Output */
Denys Vlasenko	2cfcc9e	2013-01-20 00:38:09 +0100	[diff] [blame]	1906	memcpy(resbuf, ctx->state, 64);
Denys Vlasenko	49ecee0	2017-01-24 16:00:54 +0100	[diff] [blame]	1907	return 64;
Lauri Kasanen	b8173b6	2013-01-14 05:20:50 +0100	[diff] [blame]	1908	}