blob: 2a7247430cc30feb492203fc625bc4c177eb45b3 [file] [log] [blame]
"Robert P. J. Day"63fc1a92006-07-02 19:47:05 +00001/* vi: set sw=4 ts=4: */
Rob Landley5cf7c2d2006-02-21 06:44:43 +00002/*
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +02003 * Utility routines.
4 *
5 * Copyright (C) 2010 Denys Vlasenko
6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
9
10#include "libbb.h"
11
Denys Vlasenkob8935d02017-01-15 20:16:27 +010012#define NEED_SHA512 (ENABLE_SHA512SUM || ENABLE_USE_BB_CRYPT_SHA)
13
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020014/* gcc 4.2.1 optimizes rotr64 better with inline than with macro
15 * (for rotX32, there is no difference). Why? My guess is that
16 * macro requires clever common subexpression elimination heuristics
17 * in gcc, while inline basically forces it to happen.
18 */
19//#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
20static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n)
21{
22 return (x << n) | (x >> (32 - n));
23}
24//#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n))))
25static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n)
26{
27 return (x >> n) | (x << (32 - n));
28}
29/* rotr64 in needed for sha512 only: */
30//#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n))))
31static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n)
32{
33 return (x >> n) | (x << (64 - n));
34}
35
Lauri Kasanenb8173b62013-01-14 05:20:50 +010036/* rotl64 only used for sha3 currently */
37static ALWAYS_INLINE uint64_t rotl64(uint64_t x, unsigned n)
38{
39 return (x << n) | (x >> (64 - n));
40}
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020041
Denys Vlasenko302ad142010-10-19 02:16:12 +020042/* Feed data through a temporary buffer.
43 * The internal buffer remembers previous data until it has 64
44 * bytes worth to pass on.
45 */
46static void FAST_FUNC common64_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020047{
Denys Vlasenko302ad142010-10-19 02:16:12 +020048 unsigned bufpos = ctx->total64 & 63;
49
50 ctx->total64 += len;
51
52 while (1) {
53 unsigned remaining = 64 - bufpos;
54 if (remaining > len)
55 remaining = len;
56 /* Copy data into aligned buffer */
57 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
58 len -= remaining;
59 buffer = (const char *)buffer + remaining;
60 bufpos += remaining;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +010061 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
Denys Vlasenko302ad142010-10-19 02:16:12 +020062 bufpos -= 64;
63 if (bufpos != 0)
64 break;
65 /* Buffer is filled up, process it */
66 ctx->process_block(ctx);
67 /*bufpos = 0; - already is */
68 }
69}
70
71/* Process the remaining bytes in the buffer */
72static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed)
73{
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020074 unsigned bufpos = ctx->total64 & 63;
75 /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */
76 ctx->wbuffer[bufpos++] = 0x80;
77
78 /* This loop iterates either once or twice, no more, no less */
79 while (1) {
80 unsigned remaining = 64 - bufpos;
81 memset(ctx->wbuffer + bufpos, 0, remaining);
82 /* Do we have enough space for the length count? */
83 if (remaining >= 8) {
84 /* Store the 64-bit counter of bits in the buffer */
85 uint64_t t = ctx->total64 << 3;
86 if (swap_needed)
87 t = bb_bswap_64(t);
88 /* wbuffer is suitably aligned for this */
Denys Vlasenko1f5e81f2013-06-27 01:03:19 +020089 *(bb__aliased_uint64_t *) (&ctx->wbuffer[64 - 8]) = t;
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020090 }
Denys Vlasenko302ad142010-10-19 02:16:12 +020091 ctx->process_block(ctx);
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020092 if (remaining >= 8)
93 break;
94 bufpos = 0;
95 }
96}
97
98
99/*
Denys Vlasenko302ad142010-10-19 02:16:12 +0200100 * Compute MD5 checksum of strings according to the
101 * definition of MD5 in RFC 1321 from April 1992.
102 *
103 * Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
104 *
105 * Copyright (C) 1995-1999 Free Software Foundation, Inc.
106 * Copyright (C) 2001 Manuel Novoa III
107 * Copyright (C) 2003 Glenn L. McGrath
108 * Copyright (C) 2003 Erik Andersen
109 *
110 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
111 */
112
113/* 0: fastest, 3: smallest */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200114#if CONFIG_MD5_SMALL < 0
115# define MD5_SMALL 0
116#elif CONFIG_MD5_SMALL > 3
117# define MD5_SMALL 3
Denys Vlasenko302ad142010-10-19 02:16:12 +0200118#else
Denys Vlasenko522041e2011-09-10 13:25:57 +0200119# define MD5_SMALL CONFIG_MD5_SMALL
Denys Vlasenko302ad142010-10-19 02:16:12 +0200120#endif
121
122/* These are the four functions used in the four steps of the MD5 algorithm
123 * and defined in the RFC 1321. The first function is a little bit optimized
124 * (as found in Colin Plumbs public domain implementation).
125 * #define FF(b, c, d) ((b & c) | (~b & d))
126 */
127#undef FF
128#undef FG
129#undef FH
130#undef FI
131#define FF(b, c, d) (d ^ (b & (c ^ d)))
132#define FG(b, c, d) FF(d, b, c)
133#define FH(b, c, d) (b ^ c ^ d)
134#define FI(b, c, d) (c ^ (b | ~d))
135
136/* Hash a single block, 64 bytes long and 4-byte aligned */
137static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
138{
Denys Vlasenko522041e2011-09-10 13:25:57 +0200139#if MD5_SMALL > 0
Denys Vlasenko302ad142010-10-19 02:16:12 +0200140 /* Before we start, one word to the strange constants.
141 They are defined in RFC 1321 as
Denys Vlasenko305958d2015-10-07 19:17:01 +0200142 T[i] = (int)(2^32 * fabs(sin(i))), i=1..64
Denys Vlasenko302ad142010-10-19 02:16:12 +0200143 */
144 static const uint32_t C_array[] = {
145 /* round 1 */
146 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
147 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
148 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
149 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
150 /* round 2 */
151 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
152 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
153 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
154 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
155 /* round 3 */
156 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
157 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
158 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
159 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
160 /* round 4 */
161 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
162 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
163 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
164 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
165 };
166 static const char P_array[] ALIGN1 = {
Denys Vlasenko522041e2011-09-10 13:25:57 +0200167# if MD5_SMALL > 1
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200168 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200169# endif
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200170 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */
171 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */
172 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 /* 4 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200173 };
174#endif
175 uint32_t *words = (void*) ctx->wbuffer;
176 uint32_t A = ctx->hash[0];
177 uint32_t B = ctx->hash[1];
178 uint32_t C = ctx->hash[2];
179 uint32_t D = ctx->hash[3];
180
Denys Vlasenko522041e2011-09-10 13:25:57 +0200181#if MD5_SMALL >= 2 /* 2 or 3 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200182
183 static const char S_array[] ALIGN1 = {
184 7, 12, 17, 22,
185 5, 9, 14, 20,
186 4, 11, 16, 23,
187 6, 10, 15, 21
188 };
189 const uint32_t *pc;
190 const char *pp;
191 const char *ps;
192 int i;
193 uint32_t temp;
194
Denys Vlasenko5368fe52013-01-16 02:20:31 +0100195 if (BB_BIG_ENDIAN)
196 for (i = 0; i < 16; i++)
197 words[i] = SWAP_LE32(words[i]);
Denys Vlasenko302ad142010-10-19 02:16:12 +0200198
Denys Vlasenko522041e2011-09-10 13:25:57 +0200199# if MD5_SMALL == 3
Denys Vlasenko302ad142010-10-19 02:16:12 +0200200 pc = C_array;
201 pp = P_array;
202 ps = S_array - 4;
203
204 for (i = 0; i < 64; i++) {
205 if ((i & 0x0f) == 0)
206 ps += 4;
207 temp = A;
208 switch (i >> 4) {
209 case 0:
210 temp += FF(B, C, D);
211 break;
212 case 1:
213 temp += FG(B, C, D);
214 break;
215 case 2:
216 temp += FH(B, C, D);
217 break;
Denys Vlasenko305958d2015-10-07 19:17:01 +0200218 default: /* case 3 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200219 temp += FI(B, C, D);
220 }
221 temp += words[(int) (*pp++)] + *pc++;
222 temp = rotl32(temp, ps[i & 3]);
223 temp += B;
224 A = D;
225 D = C;
226 C = B;
227 B = temp;
228 }
Denys Vlasenko522041e2011-09-10 13:25:57 +0200229# else /* MD5_SMALL == 2 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200230 pc = C_array;
231 pp = P_array;
232 ps = S_array;
233
234 for (i = 0; i < 16; i++) {
235 temp = A + FF(B, C, D) + words[(int) (*pp++)] + *pc++;
236 temp = rotl32(temp, ps[i & 3]);
237 temp += B;
238 A = D;
239 D = C;
240 C = B;
241 B = temp;
242 }
243 ps += 4;
244 for (i = 0; i < 16; i++) {
245 temp = A + FG(B, C, D) + words[(int) (*pp++)] + *pc++;
246 temp = rotl32(temp, ps[i & 3]);
247 temp += B;
248 A = D;
249 D = C;
250 C = B;
251 B = temp;
252 }
253 ps += 4;
254 for (i = 0; i < 16; i++) {
255 temp = A + FH(B, C, D) + words[(int) (*pp++)] + *pc++;
256 temp = rotl32(temp, ps[i & 3]);
257 temp += B;
258 A = D;
259 D = C;
260 C = B;
261 B = temp;
262 }
263 ps += 4;
264 for (i = 0; i < 16; i++) {
265 temp = A + FI(B, C, D) + words[(int) (*pp++)] + *pc++;
266 temp = rotl32(temp, ps[i & 3]);
267 temp += B;
268 A = D;
269 D = C;
270 C = B;
271 B = temp;
272 }
273# endif
274 /* Add checksum to the starting values */
275 ctx->hash[0] += A;
276 ctx->hash[1] += B;
277 ctx->hash[2] += C;
278 ctx->hash[3] += D;
279
Denys Vlasenko522041e2011-09-10 13:25:57 +0200280#else /* MD5_SMALL == 0 or 1 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200281
Denys Vlasenko522041e2011-09-10 13:25:57 +0200282# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200283 const uint32_t *pc;
284 const char *pp;
285 int i;
286# endif
287
288 /* First round: using the given function, the context and a constant
289 the next context is computed. Because the algorithm's processing
290 unit is a 32-bit word and it is determined to work on words in
291 little endian byte order we perhaps have to change the byte order
292 before the computation. To reduce the work for the next steps
293 we save swapped words in WORDS array. */
294# undef OP
295# define OP(a, b, c, d, s, T) \
296 do { \
297 a += FF(b, c, d) + (*words IF_BIG_ENDIAN(= SWAP_LE32(*words))) + T; \
298 words++; \
299 a = rotl32(a, s); \
300 a += b; \
301 } while (0)
302
303 /* Round 1 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200304# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200305 pc = C_array;
306 for (i = 0; i < 4; i++) {
307 OP(A, B, C, D, 7, *pc++);
308 OP(D, A, B, C, 12, *pc++);
309 OP(C, D, A, B, 17, *pc++);
310 OP(B, C, D, A, 22, *pc++);
311 }
312# else
313 OP(A, B, C, D, 7, 0xd76aa478);
314 OP(D, A, B, C, 12, 0xe8c7b756);
315 OP(C, D, A, B, 17, 0x242070db);
316 OP(B, C, D, A, 22, 0xc1bdceee);
317 OP(A, B, C, D, 7, 0xf57c0faf);
318 OP(D, A, B, C, 12, 0x4787c62a);
319 OP(C, D, A, B, 17, 0xa8304613);
320 OP(B, C, D, A, 22, 0xfd469501);
321 OP(A, B, C, D, 7, 0x698098d8);
322 OP(D, A, B, C, 12, 0x8b44f7af);
323 OP(C, D, A, B, 17, 0xffff5bb1);
324 OP(B, C, D, A, 22, 0x895cd7be);
325 OP(A, B, C, D, 7, 0x6b901122);
326 OP(D, A, B, C, 12, 0xfd987193);
327 OP(C, D, A, B, 17, 0xa679438e);
328 OP(B, C, D, A, 22, 0x49b40821);
329# endif
330 words -= 16;
331
332 /* For the second to fourth round we have the possibly swapped words
333 in WORDS. Redefine the macro to take an additional first
334 argument specifying the function to use. */
335# undef OP
336# define OP(f, a, b, c, d, k, s, T) \
337 do { \
338 a += f(b, c, d) + words[k] + T; \
339 a = rotl32(a, s); \
340 a += b; \
341 } while (0)
342
343 /* Round 2 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200344# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200345 pp = P_array;
346 for (i = 0; i < 4; i++) {
347 OP(FG, A, B, C, D, (int) (*pp++), 5, *pc++);
348 OP(FG, D, A, B, C, (int) (*pp++), 9, *pc++);
349 OP(FG, C, D, A, B, (int) (*pp++), 14, *pc++);
350 OP(FG, B, C, D, A, (int) (*pp++), 20, *pc++);
351 }
352# else
353 OP(FG, A, B, C, D, 1, 5, 0xf61e2562);
354 OP(FG, D, A, B, C, 6, 9, 0xc040b340);
355 OP(FG, C, D, A, B, 11, 14, 0x265e5a51);
356 OP(FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
357 OP(FG, A, B, C, D, 5, 5, 0xd62f105d);
358 OP(FG, D, A, B, C, 10, 9, 0x02441453);
359 OP(FG, C, D, A, B, 15, 14, 0xd8a1e681);
360 OP(FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
361 OP(FG, A, B, C, D, 9, 5, 0x21e1cde6);
362 OP(FG, D, A, B, C, 14, 9, 0xc33707d6);
363 OP(FG, C, D, A, B, 3, 14, 0xf4d50d87);
364 OP(FG, B, C, D, A, 8, 20, 0x455a14ed);
365 OP(FG, A, B, C, D, 13, 5, 0xa9e3e905);
366 OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8);
367 OP(FG, C, D, A, B, 7, 14, 0x676f02d9);
368 OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
369# endif
370
371 /* Round 3 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200372# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200373 for (i = 0; i < 4; i++) {
374 OP(FH, A, B, C, D, (int) (*pp++), 4, *pc++);
375 OP(FH, D, A, B, C, (int) (*pp++), 11, *pc++);
376 OP(FH, C, D, A, B, (int) (*pp++), 16, *pc++);
377 OP(FH, B, C, D, A, (int) (*pp++), 23, *pc++);
378 }
379# else
380 OP(FH, A, B, C, D, 5, 4, 0xfffa3942);
381 OP(FH, D, A, B, C, 8, 11, 0x8771f681);
382 OP(FH, C, D, A, B, 11, 16, 0x6d9d6122);
383 OP(FH, B, C, D, A, 14, 23, 0xfde5380c);
384 OP(FH, A, B, C, D, 1, 4, 0xa4beea44);
385 OP(FH, D, A, B, C, 4, 11, 0x4bdecfa9);
386 OP(FH, C, D, A, B, 7, 16, 0xf6bb4b60);
387 OP(FH, B, C, D, A, 10, 23, 0xbebfbc70);
388 OP(FH, A, B, C, D, 13, 4, 0x289b7ec6);
389 OP(FH, D, A, B, C, 0, 11, 0xeaa127fa);
390 OP(FH, C, D, A, B, 3, 16, 0xd4ef3085);
391 OP(FH, B, C, D, A, 6, 23, 0x04881d05);
392 OP(FH, A, B, C, D, 9, 4, 0xd9d4d039);
393 OP(FH, D, A, B, C, 12, 11, 0xe6db99e5);
394 OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8);
395 OP(FH, B, C, D, A, 2, 23, 0xc4ac5665);
396# endif
397
398 /* Round 4 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200399# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200400 for (i = 0; i < 4; i++) {
401 OP(FI, A, B, C, D, (int) (*pp++), 6, *pc++);
402 OP(FI, D, A, B, C, (int) (*pp++), 10, *pc++);
403 OP(FI, C, D, A, B, (int) (*pp++), 15, *pc++);
404 OP(FI, B, C, D, A, (int) (*pp++), 21, *pc++);
405 }
406# else
407 OP(FI, A, B, C, D, 0, 6, 0xf4292244);
408 OP(FI, D, A, B, C, 7, 10, 0x432aff97);
409 OP(FI, C, D, A, B, 14, 15, 0xab9423a7);
410 OP(FI, B, C, D, A, 5, 21, 0xfc93a039);
411 OP(FI, A, B, C, D, 12, 6, 0x655b59c3);
412 OP(FI, D, A, B, C, 3, 10, 0x8f0ccc92);
413 OP(FI, C, D, A, B, 10, 15, 0xffeff47d);
414 OP(FI, B, C, D, A, 1, 21, 0x85845dd1);
415 OP(FI, A, B, C, D, 8, 6, 0x6fa87e4f);
416 OP(FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
417 OP(FI, C, D, A, B, 6, 15, 0xa3014314);
418 OP(FI, B, C, D, A, 13, 21, 0x4e0811a1);
419 OP(FI, A, B, C, D, 4, 6, 0xf7537e82);
420 OP(FI, D, A, B, C, 11, 10, 0xbd3af235);
421 OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
422 OP(FI, B, C, D, A, 9, 21, 0xeb86d391);
423# undef OP
424# endif
425 /* Add checksum to the starting values */
Denys Vlasenko305958d2015-10-07 19:17:01 +0200426 ctx->hash[0] += A;
427 ctx->hash[1] += B;
428 ctx->hash[2] += C;
429 ctx->hash[3] += D;
Denys Vlasenko302ad142010-10-19 02:16:12 +0200430#endif
431}
432#undef FF
433#undef FG
434#undef FH
435#undef FI
436
437/* Initialize structure containing state of computation.
438 * (RFC 1321, 3.3: Step 3)
439 */
440void FAST_FUNC md5_begin(md5_ctx_t *ctx)
441{
442 ctx->hash[0] = 0x67452301;
443 ctx->hash[1] = 0xefcdab89;
444 ctx->hash[2] = 0x98badcfe;
445 ctx->hash[3] = 0x10325476;
446 ctx->total64 = 0;
447 ctx->process_block = md5_process_block64;
448}
449
450/* Used also for sha1 and sha256 */
451void FAST_FUNC md5_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
452{
453 common64_hash(ctx, buffer, len);
454}
455
456/* Process the remaining bytes in the buffer and put result from CTX
457 * in first 16 bytes following RESBUF. The result is always in little
458 * endian byte order, so that a byte-wise output yields to the wanted
459 * ASCII representation of the message digest.
460 */
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100461unsigned FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf)
Denys Vlasenko302ad142010-10-19 02:16:12 +0200462{
463 /* MD5 stores total in LE, need to swap on BE arches: */
464 common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN);
465
Denys Vlasenko7ab94ca2010-10-19 02:33:39 +0200466 /* The MD5 result is in little endian byte order */
Denys Vlasenko5368fe52013-01-16 02:20:31 +0100467 if (BB_BIG_ENDIAN) {
468 ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
469 ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
470 ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
471 ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
472 }
473
Denys Vlasenko302ad142010-10-19 02:16:12 +0200474 memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4);
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100475 return sizeof(ctx->hash[0]) * 4;
Denys Vlasenko302ad142010-10-19 02:16:12 +0200476}
477
478
479/*
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200480 * SHA1 part is:
481 * Copyright 2007 Rob Landley <rob@landley.net>
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000482 *
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200483 * Based on the public domain SHA-1 in C by Steve Reid <steve@edmweb.com>
484 * from http://www.mirrors.wiretapped.net/security/cryptography/hashes/sha1/
Denis Vlasenko9213a9e2006-09-17 16:28:10 +0000485 *
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200486 * Licensed under GPLv2, see file LICENSE in this source tree.
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000487 *
488 * ---------------------------------------------------------------------------
489 *
490 * SHA256 and SHA512 parts are:
491 * Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>.
Denis Vlasenkoddb1b852009-03-12 16:05:02 +0000492 * Shrank by Denys Vlasenko.
493 *
494 * ---------------------------------------------------------------------------
495 *
496 * The best way to test random blocksizes is to go to coreutils/md5_sha1_sum.c
497 * and replace "4096" with something like "2000 + time(NULL) % 2097",
498 * then rebuild and compare "shaNNNsum bigfile" results.
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000499 */
500
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000501static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000502{
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200503 static const uint32_t rconsts[] = {
504 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
505 };
506 int i, j;
507 int cnt;
508 uint32_t W[16+16];
509 uint32_t a, b, c, d, e;
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000510
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200511 /* On-stack work buffer frees up one register in the main loop
512 * which otherwise will be needed to hold ctx pointer */
513 for (i = 0; i < 16; i++)
514 W[i] = W[i+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[i]);
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000515
516 a = ctx->hash[0];
517 b = ctx->hash[1];
518 c = ctx->hash[2];
519 d = ctx->hash[3];
520 e = ctx->hash[4];
521
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200522 /* 4 rounds of 20 operations each */
523 cnt = 0;
524 for (i = 0; i < 4; i++) {
525 j = 19;
526 do {
527 uint32_t work;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000528
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200529 work = c ^ d;
530 if (i == 0) {
531 work = (work & b) ^ d;
532 if (j <= 3)
533 goto ge16;
534 /* Used to do SWAP_BE32 here, but this
535 * requires ctx (see comment above) */
536 work += W[cnt];
537 } else {
538 if (i == 2)
539 work = ((b | c) & d) | (b & c);
540 else /* i = 1 or 3 */
541 work ^= b;
542 ge16:
543 W[cnt] = W[cnt+16] = rotl32(W[cnt+13] ^ W[cnt+8] ^ W[cnt+2] ^ W[cnt], 1);
544 work += W[cnt];
545 }
Denys Vlasenko03a5fe32010-10-24 20:51:28 +0200546 work += e + rotl32(a, 5) + rconsts[i];
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000547
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200548 /* Rotate by one for next time */
549 e = d;
550 d = c;
551 c = /* b = */ rotl32(b, 30);
552 b = a;
553 a = work;
554 cnt = (cnt + 1) & 15;
555 } while (--j >= 0);
556 }
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000557
558 ctx->hash[0] += a;
559 ctx->hash[1] += b;
560 ctx->hash[2] += c;
561 ctx->hash[3] += d;
562 ctx->hash[4] += e;
563}
564
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000565/* Constants for SHA512 from FIPS 180-2:4.2.3.
566 * SHA256 constants from FIPS 180-2:4.2.2
567 * are the most significant half of first 64 elements
568 * of the same array.
569 */
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100570#undef K
571#if NEED_SHA512
572typedef uint64_t sha_K_int;
573# define K(v) v
574#else
575typedef uint32_t sha_K_int;
576# define K(v) (uint32_t)(v >> 32)
577#endif
578static const sha_K_int sha_K[] = {
579 K(0x428a2f98d728ae22ULL), K(0x7137449123ef65cdULL),
580 K(0xb5c0fbcfec4d3b2fULL), K(0xe9b5dba58189dbbcULL),
581 K(0x3956c25bf348b538ULL), K(0x59f111f1b605d019ULL),
582 K(0x923f82a4af194f9bULL), K(0xab1c5ed5da6d8118ULL),
583 K(0xd807aa98a3030242ULL), K(0x12835b0145706fbeULL),
584 K(0x243185be4ee4b28cULL), K(0x550c7dc3d5ffb4e2ULL),
585 K(0x72be5d74f27b896fULL), K(0x80deb1fe3b1696b1ULL),
586 K(0x9bdc06a725c71235ULL), K(0xc19bf174cf692694ULL),
587 K(0xe49b69c19ef14ad2ULL), K(0xefbe4786384f25e3ULL),
588 K(0x0fc19dc68b8cd5b5ULL), K(0x240ca1cc77ac9c65ULL),
589 K(0x2de92c6f592b0275ULL), K(0x4a7484aa6ea6e483ULL),
590 K(0x5cb0a9dcbd41fbd4ULL), K(0x76f988da831153b5ULL),
591 K(0x983e5152ee66dfabULL), K(0xa831c66d2db43210ULL),
592 K(0xb00327c898fb213fULL), K(0xbf597fc7beef0ee4ULL),
593 K(0xc6e00bf33da88fc2ULL), K(0xd5a79147930aa725ULL),
594 K(0x06ca6351e003826fULL), K(0x142929670a0e6e70ULL),
595 K(0x27b70a8546d22ffcULL), K(0x2e1b21385c26c926ULL),
596 K(0x4d2c6dfc5ac42aedULL), K(0x53380d139d95b3dfULL),
597 K(0x650a73548baf63deULL), K(0x766a0abb3c77b2a8ULL),
598 K(0x81c2c92e47edaee6ULL), K(0x92722c851482353bULL),
599 K(0xa2bfe8a14cf10364ULL), K(0xa81a664bbc423001ULL),
600 K(0xc24b8b70d0f89791ULL), K(0xc76c51a30654be30ULL),
601 K(0xd192e819d6ef5218ULL), K(0xd69906245565a910ULL),
602 K(0xf40e35855771202aULL), K(0x106aa07032bbd1b8ULL),
603 K(0x19a4c116b8d2d0c8ULL), K(0x1e376c085141ab53ULL),
604 K(0x2748774cdf8eeb99ULL), K(0x34b0bcb5e19b48a8ULL),
605 K(0x391c0cb3c5c95a63ULL), K(0x4ed8aa4ae3418acbULL),
606 K(0x5b9cca4f7763e373ULL), K(0x682e6ff3d6b2b8a3ULL),
607 K(0x748f82ee5defb2fcULL), K(0x78a5636f43172f60ULL),
608 K(0x84c87814a1f0ab72ULL), K(0x8cc702081a6439ecULL),
609 K(0x90befffa23631e28ULL), K(0xa4506cebde82bde9ULL),
610 K(0xbef9a3f7b2c67915ULL), K(0xc67178f2e372532bULL),
611#if NEED_SHA512 /* [64]+ are used for sha512 only */
612 K(0xca273eceea26619cULL), K(0xd186b8c721c0c207ULL),
613 K(0xeada7dd6cde0eb1eULL), K(0xf57d4f7fee6ed178ULL),
614 K(0x06f067aa72176fbaULL), K(0x0a637dc5a2c898a6ULL),
615 K(0x113f9804bef90daeULL), K(0x1b710b35131c471bULL),
616 K(0x28db77f523047d84ULL), K(0x32caab7b40c72493ULL),
617 K(0x3c9ebe0a15c9bebcULL), K(0x431d67c49c100d4cULL),
618 K(0x4cc5d4becb3e42b6ULL), K(0x597f299cfc657e2aULL),
619 K(0x5fcb6fab3ad6faecULL), K(0x6c44198c4a475817ULL),
620#endif
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000621};
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100622#undef K
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000623
Denys Vlasenkofe4ef362009-07-05 20:34:38 +0200624#undef Ch
625#undef Maj
626#undef S0
627#undef S1
628#undef R0
629#undef R1
630
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000631static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000632{
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000633 unsigned t;
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000634 uint32_t W[64], a, b, c, d, e, f, g, h;
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000635 const uint32_t *words = (uint32_t*) ctx->wbuffer;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000636
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000637 /* Operators defined in FIPS 180-2:4.1.2. */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000638#define Ch(x, y, z) ((x & y) ^ (~x & z))
639#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
640#define S0(x) (rotr32(x, 2) ^ rotr32(x, 13) ^ rotr32(x, 22))
641#define S1(x) (rotr32(x, 6) ^ rotr32(x, 11) ^ rotr32(x, 25))
642#define R0(x) (rotr32(x, 7) ^ rotr32(x, 18) ^ (x >> 3))
643#define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
644
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000645 /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200646 for (t = 0; t < 16; ++t)
Denys Vlasenkob102e122010-10-18 11:39:47 +0200647 W[t] = SWAP_BE32(words[t]);
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000648 for (/*t = 16*/; t < 64; ++t)
649 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000650
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000651 a = ctx->hash[0];
652 b = ctx->hash[1];
653 c = ctx->hash[2];
654 d = ctx->hash[3];
655 e = ctx->hash[4];
656 f = ctx->hash[5];
657 g = ctx->hash[6];
658 h = ctx->hash[7];
659
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000660 /* The actual computation according to FIPS 180-2:6.2.2 step 3. */
661 for (t = 0; t < 64; ++t) {
Denis Vlasenkoa2333c82009-03-28 19:08:23 +0000662 /* Need to fetch upper half of sha_K[t]
663 * (I hope compiler is clever enough to just fetch
664 * upper half)
665 */
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100666 uint32_t K_t = NEED_SHA512 ? (sha_K[t] >> 32) : sha_K[t];
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000667 uint32_t T1 = h + S1(e) + Ch(e, f, g) + K_t + W[t];
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000668 uint32_t T2 = S0(a) + Maj(a, b, c);
669 h = g;
670 g = f;
671 f = e;
672 e = d + T1;
673 d = c;
674 c = b;
675 b = a;
676 a = T1 + T2;
677 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000678#undef Ch
679#undef Maj
680#undef S0
681#undef S1
682#undef R0
683#undef R1
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000684 /* Add the starting values of the context according to FIPS 180-2:6.2.2
685 step 4. */
686 ctx->hash[0] += a;
687 ctx->hash[1] += b;
688 ctx->hash[2] += c;
689 ctx->hash[3] += d;
690 ctx->hash[4] += e;
691 ctx->hash[5] += f;
692 ctx->hash[6] += g;
693 ctx->hash[7] += h;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000694}
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000695
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100696#if NEED_SHA512
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000697static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000698{
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000699 unsigned t;
700 uint64_t W[80];
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000701 /* On i386, having assignments here (not later as sha256 does)
702 * produces 99 bytes smaller code with gcc 4.3.1
703 */
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000704 uint64_t a = ctx->hash[0];
705 uint64_t b = ctx->hash[1];
706 uint64_t c = ctx->hash[2];
707 uint64_t d = ctx->hash[3];
708 uint64_t e = ctx->hash[4];
709 uint64_t f = ctx->hash[5];
710 uint64_t g = ctx->hash[6];
711 uint64_t h = ctx->hash[7];
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000712 const uint64_t *words = (uint64_t*) ctx->wbuffer;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000713
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000714 /* Operators defined in FIPS 180-2:4.1.2. */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000715#define Ch(x, y, z) ((x & y) ^ (~x & z))
716#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
717#define S0(x) (rotr64(x, 28) ^ rotr64(x, 34) ^ rotr64(x, 39))
718#define S1(x) (rotr64(x, 14) ^ rotr64(x, 18) ^ rotr64(x, 41))
719#define R0(x) (rotr64(x, 1) ^ rotr64(x, 8) ^ (x >> 7))
720#define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
721
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000722 /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200723 for (t = 0; t < 16; ++t)
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200724 W[t] = SWAP_BE64(words[t]);
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000725 for (/*t = 16*/; t < 80; ++t)
726 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000727
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000728 /* The actual computation according to FIPS 180-2:6.3.2 step 3. */
729 for (t = 0; t < 80; ++t) {
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000730 uint64_t T1 = h + S1(e) + Ch(e, f, g) + sha_K[t] + W[t];
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000731 uint64_t T2 = S0(a) + Maj(a, b, c);
732 h = g;
733 g = f;
734 f = e;
735 e = d + T1;
736 d = c;
737 c = b;
738 b = a;
739 a = T1 + T2;
740 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000741#undef Ch
742#undef Maj
743#undef S0
744#undef S1
745#undef R0
746#undef R1
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000747 /* Add the starting values of the context according to FIPS 180-2:6.3.2
748 step 4. */
749 ctx->hash[0] += a;
750 ctx->hash[1] += b;
751 ctx->hash[2] += c;
752 ctx->hash[3] += d;
753 ctx->hash[4] += e;
754 ctx->hash[5] += f;
755 ctx->hash[6] += g;
756 ctx->hash[7] += h;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000757}
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100758#endif /* NEED_SHA512 */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000759
Denis Vlasenkodefc1ea2008-06-27 02:52:20 +0000760void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000761{
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000762 ctx->hash[0] = 0x67452301;
763 ctx->hash[1] = 0xefcdab89;
764 ctx->hash[2] = 0x98badcfe;
765 ctx->hash[3] = 0x10325476;
766 ctx->hash[4] = 0xc3d2e1f0;
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000767 ctx->total64 = 0;
768 ctx->process_block = sha1_process_block64;
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000769}
770
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000771static const uint32_t init256[] = {
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200772 0,
773 0,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000774 0x6a09e667,
775 0xbb67ae85,
776 0x3c6ef372,
777 0xa54ff53a,
778 0x510e527f,
779 0x9b05688c,
780 0x1f83d9ab,
Denys Vlasenkoa971a192010-10-17 01:35:16 +0200781 0x5be0cd19,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000782};
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100783#if NEED_SHA512
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000784static const uint32_t init512_lo[] = {
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200785 0,
786 0,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000787 0xf3bcc908,
788 0x84caa73b,
789 0xfe94f82b,
790 0x5f1d36f1,
791 0xade682d1,
792 0x2b3e6c1f,
793 0xfb41bd6b,
Denys Vlasenkoa971a192010-10-17 01:35:16 +0200794 0x137e2179,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000795};
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100796#endif /* NEED_SHA512 */
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000797
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000798/* Initialize structure containing state of computation.
799 (FIPS 180-2:5.3.2) */
800void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
801{
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200802 memcpy(&ctx->total64, init256, sizeof(init256));
803 /*ctx->total64 = 0; - done by prepending two 32-bit zeros to init256 */
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000804 ctx->process_block = sha256_process_block64;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000805}
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000806
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100807#if NEED_SHA512
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000808/* Initialize structure containing state of computation.
809 (FIPS 180-2:5.3.3) */
810void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
811{
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000812 int i;
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200813 /* Two extra iterations zero out ctx->total64[2] */
814 uint64_t *tp = ctx->total64;
815 for (i = 0; i < 2+8; i++)
816 tp[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i];
Denys Vlasenkoa971a192010-10-17 01:35:16 +0200817 /*ctx->total64[0] = ctx->total64[1] = 0; - already done */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000818}
819
Denys Vlasenkoc0683ac2010-10-16 20:45:27 +0200820void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000821{
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200822 unsigned bufpos = ctx->total64[0] & 127;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200823 unsigned remaining;
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000824
825 /* First increment the byte count. FIPS 180-2 specifies the possible
826 length of the file up to 2^128 _bits_.
827 We compute the number of _bytes_ and convert to bits later. */
828 ctx->total64[0] += len;
829 if (ctx->total64[0] < len)
830 ctx->total64[1]++;
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100831# if 0
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200832 remaining = 128 - bufpos;
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000833
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200834 /* Hash whole blocks */
835 while (len >= remaining) {
836 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
837 buffer = (const char *)buffer + remaining;
838 len -= remaining;
839 remaining = 128;
840 bufpos = 0;
841 sha512_process_block128(ctx);
842 }
843
844 /* Save last, partial blosk */
845 memcpy(ctx->wbuffer + bufpos, buffer, len);
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100846# else
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200847 while (1) {
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200848 remaining = 128 - bufpos;
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200849 if (remaining > len)
850 remaining = len;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200851 /* Copy data into aligned buffer */
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200852 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
853 len -= remaining;
854 buffer = (const char *)buffer + remaining;
855 bufpos += remaining;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +0100856 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200857 bufpos -= 128;
858 if (bufpos != 0)
859 break;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200860 /* Buffer is filled up, process it */
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000861 sha512_process_block128(ctx);
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200862 /*bufpos = 0; - already is */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000863 }
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100864# endif
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000865}
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100866#endif /* NEED_SHA512 */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000867
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000868/* Used also for sha256 */
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100869unsigned FAST_FUNC sha1_end(sha1_ctx_t *ctx, void *resbuf)
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000870{
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200871 unsigned hash_size;
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000872
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200873 /* SHA stores total in BE, need to swap on LE arches: */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200874 common64_end(ctx, /*swap_needed:*/ BB_LITTLE_ENDIAN);
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000875
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200876 hash_size = (ctx->process_block == sha1_process_block64) ? 5 : 8;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000877 /* This way we do not impose alignment constraints on resbuf: */
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100878 if (BB_LITTLE_ENDIAN) {
879 unsigned i;
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200880 for (i = 0; i < hash_size; ++i)
Denys Vlasenkob102e122010-10-18 11:39:47 +0200881 ctx->hash[i] = SWAP_BE32(ctx->hash[i]);
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100882 }
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100883 hash_size *= sizeof(ctx->hash[0]);
884 memcpy(resbuf, ctx->hash, hash_size);
885 return hash_size;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000886}
887
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100888#if NEED_SHA512
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100889unsigned FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000890{
Denys Vlasenko36ab5852010-10-17 03:00:36 +0200891 unsigned bufpos = ctx->total64[0] & 127;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000892
Denys Vlasenko36ab5852010-10-17 03:00:36 +0200893 /* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... */
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200894 ctx->wbuffer[bufpos++] = 0x80;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000895
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000896 while (1) {
Denys Vlasenkof6dacc22010-10-17 03:21:51 +0200897 unsigned remaining = 128 - bufpos;
898 memset(ctx->wbuffer + bufpos, 0, remaining);
899 if (remaining >= 16) {
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000900 /* Store the 128-bit counter of bits in the buffer in BE format */
901 uint64_t t;
902 t = ctx->total64[0] << 3;
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200903 t = SWAP_BE64(t);
Denys Vlasenko1f5e81f2013-06-27 01:03:19 +0200904 *(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 8]) = t;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000905 t = (ctx->total64[1] << 3) | (ctx->total64[0] >> 61);
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200906 t = SWAP_BE64(t);
Denys Vlasenko1f5e81f2013-06-27 01:03:19 +0200907 *(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 16]) = t;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000908 }
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000909 sha512_process_block128(ctx);
Denys Vlasenkof6dacc22010-10-17 03:21:51 +0200910 if (remaining >= 16)
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000911 break;
Denys Vlasenko36ab5852010-10-17 03:00:36 +0200912 bufpos = 0;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000913 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000914
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100915 if (BB_LITTLE_ENDIAN) {
916 unsigned i;
917 for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200918 ctx->hash[i] = SWAP_BE64(ctx->hash[i]);
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100919 }
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000920 memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100921 return sizeof(ctx->hash);
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000922}
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100923#endif /* NEED_SHA512 */
Lauri Kasanenb8173b62013-01-14 05:20:50 +0100924
925
926/*
927 * The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
928 * Michael Peeters and Gilles Van Assche. For more information, feedback or
929 * questions, please refer to our website: http://keccak.noekeon.org/
930 *
931 * Implementation by Ronny Van Keer,
932 * hereby denoted as "the implementer".
933 *
934 * To the extent possible under law, the implementer has waived all copyright
935 * and related or neighboring rights to the source code in this file.
936 * http://creativecommons.org/publicdomain/zero/1.0/
937 *
938 * Busybox modifications (C) Lauri Kasanen, under the GPLv2.
939 */
940
Denys Vlasenko30a86522013-01-15 01:12:26 +0100941#if CONFIG_SHA3_SMALL < 0
942# define SHA3_SMALL 0
943#elif CONFIG_SHA3_SMALL > 1
944# define SHA3_SMALL 1
945#else
946# define SHA3_SMALL CONFIG_SHA3_SMALL
947#endif
948
Denys Vlasenko2a563ea2014-07-25 17:24:13 +0200949#define OPTIMIZE_SHA3_FOR_32 0
950/*
951 * SHA3 can be optimized for 32-bit CPUs with bit-slicing:
952 * every 64-bit word of state[] can be split into two 32-bit words
953 * by even/odd bits. In this form, all rotations of sha3 round
954 * are 32-bit - and there are lots of them.
955 * However, it requires either splitting/combining state words
956 * before/after sha3 round (code does this now)
957 * or shuffling bits before xor'ing them into state and in sha3_end.
958 * Without shuffling, bit-slicing results in -130 bytes of code
959 * and marginal speedup (but of course it gives wrong result).
960 * With shuffling it works, but +260 code bytes, and slower.
961 * Disabled for now:
962 */
963#if 0 /* LONG_MAX == 0x7fffffff */
964# undef OPTIMIZE_SHA3_FOR_32
965# define OPTIMIZE_SHA3_FOR_32 1
966#endif
967
Denys Vlasenko2a563ea2014-07-25 17:24:13 +0200968#if OPTIMIZE_SHA3_FOR_32
969/* This splits every 64-bit word into a pair of 32-bit words,
970 * even bits go into first word, odd bits go to second one.
971 * The conversion is done in-place.
972 */
973static void split_halves(uint64_t *state)
974{
975 /* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
976 uint32_t *s32 = (uint32_t*)state;
977 uint32_t t, x0, x1;
978 int i;
979 for (i = 24; i >= 0; --i) {
980 x0 = s32[0];
981 t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
982 t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
983 t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
984 t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
985 x1 = s32[1];
986 t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
987 t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
988 t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
989 t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
990 *s32++ = (x0 & 0x0000FFFF) | (x1 << 16);
991 *s32++ = (x0 >> 16) | (x1 & 0xFFFF0000);
992 }
993}
994/* The reverse operation */
995static void combine_halves(uint64_t *state)
996{
997 uint32_t *s32 = (uint32_t*)state;
998 uint32_t t, x0, x1;
999 int i;
1000 for (i = 24; i >= 0; --i) {
1001 x0 = s32[0];
1002 x1 = s32[1];
1003 t = (x0 & 0x0000FFFF) | (x1 << 16);
1004 x1 = (x0 >> 16) | (x1 & 0xFFFF0000);
1005 x0 = t;
1006 t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
1007 t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
1008 t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
1009 t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
1010 *s32++ = x0;
1011 t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
1012 t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
1013 t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
1014 t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
1015 *s32++ = x1;
1016 }
1017}
1018#endif
1019
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001020/*
1021 * In the crypto literature this function is usually called Keccak-f().
Denys Vlasenkoac4100e2013-01-15 16:27:39 +01001022 */
Denys Vlasenkoe4f0f262013-01-16 12:23:23 +01001023static void sha3_process_block72(uint64_t *state)
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001024{
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001025 enum { NROUNDS = 24 };
1026
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001027#if OPTIMIZE_SHA3_FOR_32
1028 /*
1029 static const uint32_t IOTA_CONST_0[NROUNDS] = {
1030 0x00000001UL,
1031 0x00000000UL,
1032 0x00000000UL,
1033 0x00000000UL,
1034 0x00000001UL,
1035 0x00000001UL,
1036 0x00000001UL,
1037 0x00000001UL,
1038 0x00000000UL,
1039 0x00000000UL,
1040 0x00000001UL,
1041 0x00000000UL,
1042 0x00000001UL,
1043 0x00000001UL,
1044 0x00000001UL,
1045 0x00000001UL,
1046 0x00000000UL,
1047 0x00000000UL,
1048 0x00000000UL,
1049 0x00000000UL,
1050 0x00000001UL,
1051 0x00000000UL,
1052 0x00000001UL,
1053 0x00000000UL,
1054 };
1055 ** bits are in lsb: 0101 0000 1111 0100 1111 0001
1056 */
1057 uint32_t IOTA_CONST_0bits = (uint32_t)(0x0050f4f1);
1058 static const uint32_t IOTA_CONST_1[NROUNDS] = {
1059 0x00000000UL,
1060 0x00000089UL,
1061 0x8000008bUL,
1062 0x80008080UL,
1063 0x0000008bUL,
1064 0x00008000UL,
1065 0x80008088UL,
1066 0x80000082UL,
1067 0x0000000bUL,
1068 0x0000000aUL,
1069 0x00008082UL,
1070 0x00008003UL,
1071 0x0000808bUL,
1072 0x8000000bUL,
1073 0x8000008aUL,
1074 0x80000081UL,
1075 0x80000081UL,
1076 0x80000008UL,
1077 0x00000083UL,
1078 0x80008003UL,
1079 0x80008088UL,
1080 0x80000088UL,
1081 0x00008000UL,
1082 0x80008082UL,
1083 };
1084
1085 uint32_t *const s32 = (uint32_t*)state;
1086 unsigned round;
1087
1088 split_halves(state);
1089
1090 for (round = 0; round < NROUNDS; round++) {
1091 unsigned x;
1092
1093 /* Theta */
1094 {
1095 uint32_t BC[20];
1096 for (x = 0; x < 10; ++x) {
1097 BC[x+10] = BC[x] = s32[x]^s32[x+10]^s32[x+20]^s32[x+30]^s32[x+40];
1098 }
1099 for (x = 0; x < 10; x += 2) {
1100 uint32_t ta, tb;
1101 ta = BC[x+8] ^ rotl32(BC[x+3], 1);
1102 tb = BC[x+9] ^ BC[x+2];
1103 s32[x+0] ^= ta;
1104 s32[x+1] ^= tb;
1105 s32[x+10] ^= ta;
1106 s32[x+11] ^= tb;
1107 s32[x+20] ^= ta;
1108 s32[x+21] ^= tb;
1109 s32[x+30] ^= ta;
1110 s32[x+31] ^= tb;
1111 s32[x+40] ^= ta;
1112 s32[x+41] ^= tb;
1113 }
1114 }
1115 /* RhoPi */
1116 {
1117 uint32_t t0a,t0b, t1a,t1b;
1118 t1a = s32[1*2+0];
1119 t1b = s32[1*2+1];
1120
1121#define RhoPi(PI_LANE, ROT_CONST) \
1122 t0a = s32[PI_LANE*2+0];\
1123 t0b = s32[PI_LANE*2+1];\
1124 if (ROT_CONST & 1) {\
1125 s32[PI_LANE*2+0] = rotl32(t1b, ROT_CONST/2+1);\
1126 s32[PI_LANE*2+1] = ROT_CONST == 1 ? t1a : rotl32(t1a, ROT_CONST/2+0);\
1127 } else {\
1128 s32[PI_LANE*2+0] = rotl32(t1a, ROT_CONST/2);\
1129 s32[PI_LANE*2+1] = rotl32(t1b, ROT_CONST/2);\
1130 }\
1131 t1a = t0a; t1b = t0b;
1132
1133 RhoPi(10, 1)
1134 RhoPi( 7, 3)
1135 RhoPi(11, 6)
1136 RhoPi(17,10)
1137 RhoPi(18,15)
1138 RhoPi( 3,21)
1139 RhoPi( 5,28)
1140 RhoPi(16,36)
1141 RhoPi( 8,45)
1142 RhoPi(21,55)
1143 RhoPi(24, 2)
1144 RhoPi( 4,14)
1145 RhoPi(15,27)
1146 RhoPi(23,41)
1147 RhoPi(19,56)
1148 RhoPi(13, 8)
1149 RhoPi(12,25)
1150 RhoPi( 2,43)
1151 RhoPi(20,62)
1152 RhoPi(14,18)
1153 RhoPi(22,39)
1154 RhoPi( 9,61)
1155 RhoPi( 6,20)
1156 RhoPi( 1,44)
1157#undef RhoPi
1158 }
1159 /* Chi */
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001160 for (x = 0; x <= 40;) {
1161 uint32_t BC0, BC1, BC2, BC3, BC4;
1162 BC0 = s32[x + 0*2];
1163 BC1 = s32[x + 1*2];
1164 BC2 = s32[x + 2*2];
1165 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1166 BC3 = s32[x + 3*2];
1167 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1168 BC4 = s32[x + 4*2];
1169 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1170 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1171 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1172 x++;
1173 BC0 = s32[x + 0*2];
1174 BC1 = s32[x + 1*2];
1175 BC2 = s32[x + 2*2];
1176 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1177 BC3 = s32[x + 3*2];
1178 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1179 BC4 = s32[x + 4*2];
1180 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1181 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1182 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1183 x += 9;
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001184 }
1185 /* Iota */
1186 s32[0] ^= IOTA_CONST_0bits & 1;
1187 IOTA_CONST_0bits >>= 1;
1188 s32[1] ^= IOTA_CONST_1[round];
1189 }
1190
1191 combine_halves(state);
1192#else
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001193 /* Native 64-bit algorithm */
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001194 static const uint16_t IOTA_CONST[NROUNDS] = {
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001195 /* Elements should be 64-bit, but top half is always zero
1196 * or 0x80000000. We encode 63rd bits in a separate word below.
1197 * Same is true for 31th bits, which lets us use 16-bit table
1198 * instead of 64-bit. The speed penalty is lost in the noise.
1199 */
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001200 0x0001,
1201 0x8082,
1202 0x808a,
1203 0x8000,
1204 0x808b,
1205 0x0001,
1206 0x8081,
1207 0x8009,
1208 0x008a,
1209 0x0088,
1210 0x8009,
1211 0x000a,
1212 0x808b,
1213 0x008b,
1214 0x8089,
1215 0x8003,
1216 0x8002,
1217 0x0080,
1218 0x800a,
1219 0x000a,
1220 0x8081,
1221 0x8080,
1222 0x0001,
1223 0x8008,
1224 };
1225 /* bit for CONST[0] is in msb: 0011 0011 0000 0111 1101 1101 */
1226 const uint32_t IOTA_CONST_bit63 = (uint32_t)(0x3307dd00);
1227 /* bit for CONST[0] is in msb: 0001 0110 0011 1000 0001 1011 */
1228 const uint32_t IOTA_CONST_bit31 = (uint32_t)(0x16381b00);
1229
1230 static const uint8_t ROT_CONST[24] = {
1231 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
1232 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
1233 };
1234 static const uint8_t PI_LANE[24] = {
1235 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
1236 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
1237 };
1238 /*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, };*/
Denys Vlasenko8fb3ab52013-01-15 22:07:48 +01001239
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001240 unsigned x;
Denys Vlasenko5b7f50f2013-01-15 19:52:30 +01001241 unsigned round;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001242
1243 if (BB_BIG_ENDIAN) {
1244 for (x = 0; x < 25; x++) {
1245 state[x] = SWAP_LE64(state[x]);
1246 }
1247 }
1248
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001249 for (round = 0; round < NROUNDS; ++round) {
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001250 /* Theta */
Denys Vlasenko30a86522013-01-15 01:12:26 +01001251 {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001252 uint64_t BC[10];
Denys Vlasenko30a86522013-01-15 01:12:26 +01001253 for (x = 0; x < 5; ++x) {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001254 BC[x + 5] = BC[x] = state[x]
1255 ^ state[x + 5] ^ state[x + 10]
1256 ^ state[x + 15] ^ state[x + 20];
Denys Vlasenko30a86522013-01-15 01:12:26 +01001257 }
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001258 /* Using 2x5 vector above eliminates the need to use
Denys Vlasenko5b7f50f2013-01-15 19:52:30 +01001259 * BC[MOD5[x+N]] trick below to fetch BC[(x+N) % 5],
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001260 * and the code is a bit _smaller_.
1261 */
Denys Vlasenko30a86522013-01-15 01:12:26 +01001262 for (x = 0; x < 5; ++x) {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001263 uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1);
Denys Vlasenko8fb3ab52013-01-15 22:07:48 +01001264 state[x] ^= temp;
1265 state[x + 5] ^= temp;
1266 state[x + 10] ^= temp;
1267 state[x + 15] ^= temp;
1268 state[x + 20] ^= temp;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001269 }
1270 }
1271
1272 /* Rho Pi */
Denys Vlasenko30a86522013-01-15 01:12:26 +01001273 if (SHA3_SMALL) {
1274 uint64_t t1 = state[1];
1275 for (x = 0; x < 24; ++x) {
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001276 uint64_t t0 = state[PI_LANE[x]];
1277 state[PI_LANE[x]] = rotl64(t1, ROT_CONST[x]);
Denys Vlasenko30a86522013-01-15 01:12:26 +01001278 t1 = t0;
1279 }
1280 } else {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001281 /* Especially large benefit for 32-bit arch (75% faster):
Denys Vlasenko30a86522013-01-15 01:12:26 +01001282 * 64-bit rotations by non-constant usually are SLOW on those.
1283 * We resort to unrolling here.
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001284 * This optimizes out PI_LANE[] and ROT_CONST[],
Denys Vlasenko30a86522013-01-15 01:12:26 +01001285 * but generates 300-500 more bytes of code.
1286 */
1287 uint64_t t0;
1288 uint64_t t1 = state[1];
1289#define RhoPi_twice(x) \
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001290 t0 = state[PI_LANE[x ]]; \
1291 state[PI_LANE[x ]] = rotl64(t1, ROT_CONST[x ]); \
1292 t1 = state[PI_LANE[x+1]]; \
1293 state[PI_LANE[x+1]] = rotl64(t0, ROT_CONST[x+1]);
Denys Vlasenko30a86522013-01-15 01:12:26 +01001294 RhoPi_twice(0); RhoPi_twice(2);
1295 RhoPi_twice(4); RhoPi_twice(6);
1296 RhoPi_twice(8); RhoPi_twice(10);
1297 RhoPi_twice(12); RhoPi_twice(14);
1298 RhoPi_twice(16); RhoPi_twice(18);
1299 RhoPi_twice(20); RhoPi_twice(22);
1300#undef RhoPi_twice
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001301 }
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001302 /* Chi */
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001303# if LONG_MAX > 0x7fffffff
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001304 for (x = 0; x <= 20; x += 5) {
Denys Vlasenko8fb3ab52013-01-15 22:07:48 +01001305 uint64_t BC0, BC1, BC2, BC3, BC4;
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001306 BC0 = state[x + 0];
1307 BC1 = state[x + 1];
1308 BC2 = state[x + 2];
1309 state[x + 0] = BC0 ^ ((~BC1) & BC2);
1310 BC3 = state[x + 3];
1311 state[x + 1] = BC1 ^ ((~BC2) & BC3);
1312 BC4 = state[x + 4];
1313 state[x + 2] = BC2 ^ ((~BC3) & BC4);
1314 state[x + 3] = BC3 ^ ((~BC4) & BC0);
1315 state[x + 4] = BC4 ^ ((~BC0) & BC1);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001316 }
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001317# else
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001318 /* Reduced register pressure version
1319 * for register-starved 32-bit arches
1320 * (i386: -95 bytes, and it is _faster_)
1321 */
1322 for (x = 0; x <= 40;) {
1323 uint32_t BC0, BC1, BC2, BC3, BC4;
1324 uint32_t *const s32 = (uint32_t*)state;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001325# if SHA3_SMALL
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001326 do_half:
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001327# endif
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001328 BC0 = s32[x + 0*2];
1329 BC1 = s32[x + 1*2];
1330 BC2 = s32[x + 2*2];
1331 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1332 BC3 = s32[x + 3*2];
1333 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1334 BC4 = s32[x + 4*2];
1335 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1336 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1337 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1338 x++;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001339# if SHA3_SMALL
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001340 if (x & 1)
1341 goto do_half;
1342 x += 8;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001343# else
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001344 BC0 = s32[x + 0*2];
1345 BC1 = s32[x + 1*2];
1346 BC2 = s32[x + 2*2];
1347 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1348 BC3 = s32[x + 3*2];
1349 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1350 BC4 = s32[x + 4*2];
1351 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1352 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1353 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1354 x += 9;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001355# endif
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001356 }
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001357# endif /* long is 32-bit */
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001358 /* Iota */
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001359 state[0] ^= IOTA_CONST[round]
1360 | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000)
1361 | (uint64_t)((IOTA_CONST_bit63 << round) & 0x80000000) << 32;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001362 }
1363
1364 if (BB_BIG_ENDIAN) {
1365 for (x = 0; x < 25; x++) {
1366 state[x] = SWAP_LE64(state[x]);
1367 }
1368 }
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001369#endif
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001370}
1371
1372void FAST_FUNC sha3_begin(sha3_ctx_t *ctx)
1373{
1374 memset(ctx, 0, sizeof(*ctx));
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001375 /* SHA3-512, user can override */
1376 ctx->input_block_bytes = (1600 - 512*2) / 8; /* 72 bytes */
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001377}
1378
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001379void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buffer, size_t len)
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001380{
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001381#if SHA3_SMALL
1382 const uint8_t *data = buffer;
1383 unsigned bufpos = ctx->bytes_queued;
1384
1385 while (1) {
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001386 unsigned remaining = ctx->input_block_bytes - bufpos;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001387 if (remaining > len)
1388 remaining = len;
1389 len -= remaining;
1390 /* XOR data into buffer */
1391 while (remaining != 0) {
1392 uint8_t *buf = (uint8_t*)ctx->state;
1393 buf[bufpos] ^= *data++;
1394 bufpos++;
1395 remaining--;
1396 }
1397 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001398 bufpos -= ctx->input_block_bytes;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001399 if (bufpos != 0)
1400 break;
1401 /* Buffer is filled up, process it */
1402 sha3_process_block72(ctx->state);
1403 /*bufpos = 0; - already is */
1404 }
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001405 ctx->bytes_queued = bufpos + ctx->input_block_bytes;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001406#else
1407 /* +50 bytes code size, but a bit faster because of long-sized XORs */
1408 const uint8_t *data = buffer;
1409 unsigned bufpos = ctx->bytes_queued;
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001410 unsigned iblk_bytes = ctx->input_block_bytes;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001411
1412 /* If already data in queue, continue queuing first */
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001413 if (bufpos != 0) {
1414 while (len != 0) {
1415 uint8_t *buf = (uint8_t*)ctx->state;
1416 buf[bufpos] ^= *data++;
1417 len--;
1418 bufpos++;
1419 if (bufpos == iblk_bytes) {
1420 bufpos = 0;
1421 goto do_block;
1422 }
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001423 }
1424 }
1425
1426 /* Absorb complete blocks */
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001427 while (len >= iblk_bytes) {
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001428 /* XOR data onto beginning of state[].
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001429 * We try to be efficient - operate one word at a time, not byte.
1430 * Careful wrt unaligned access: can't just use "*(long*)data"!
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001431 */
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001432 unsigned count = iblk_bytes / sizeof(long);
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001433 long *buf = (long*)ctx->state;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001434 do {
1435 long v;
1436 move_from_unaligned_long(v, (long*)data);
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001437 *buf++ ^= v;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001438 data += sizeof(long);
1439 } while (--count);
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001440 len -= iblk_bytes;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001441 do_block:
Denys Vlasenkoe4f0f262013-01-16 12:23:23 +01001442 sha3_process_block72(ctx->state);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001443 }
1444
1445 /* Queue remaining data bytes */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001446 while (len != 0) {
1447 uint8_t *buf = (uint8_t*)ctx->state;
1448 buf[bufpos] ^= *data++;
1449 bufpos++;
1450 len--;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001451 }
Denys Vlasenko970aa6b2013-01-15 22:19:24 +01001452
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001453 ctx->bytes_queued = bufpos;
1454#endif
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001455}
1456
Denys Vlasenko49ecee02017-01-24 16:00:54 +01001457unsigned FAST_FUNC sha3_end(sha3_ctx_t *ctx, void *resbuf)
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001458{
1459 /* Padding */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001460 uint8_t *buf = (uint8_t*)ctx->state;
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001461 /*
1462 * Keccak block padding is: add 1 bit after last bit of input,
1463 * then add zero bits until the end of block, and add the last 1 bit
1464 * (the last bit in the block) - the "10*1" pattern.
1465 * SHA3 standard appends additional two bits, 01, before that padding:
1466 *
1467 * SHA3-224(M) = KECCAK[448](M||01, 224)
1468 * SHA3-256(M) = KECCAK[512](M||01, 256)
1469 * SHA3-384(M) = KECCAK[768](M||01, 384)
1470 * SHA3-512(M) = KECCAK[1024](M||01, 512)
1471 * (M is the input, || is bit concatenation)
1472 *
1473 * The 6 below contains 01 "SHA3" bits and the first 1 "Keccak" bit:
1474 */
1475 buf[ctx->bytes_queued] ^= 6; /* bit pattern 00000110 */
1476 buf[ctx->input_block_bytes - 1] ^= 0x80;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001477
Denys Vlasenkoe4f0f262013-01-16 12:23:23 +01001478 sha3_process_block72(ctx->state);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001479
1480 /* Output */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001481 memcpy(resbuf, ctx->state, 64);
Denys Vlasenko49ecee02017-01-24 16:00:54 +01001482 return 64;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001483}