blob: 9db79ea8bedf506fed0e5bff179c1def0b7f9002 [file] [log] [blame]
"Robert P. J. Day"63fc1a92006-07-02 19:47:05 +00001/* vi: set sw=4 ts=4: */
Rob Landley5cf7c2d2006-02-21 06:44:43 +00002/*
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +02003 * Utility routines.
4 *
5 * Copyright (C) 2010 Denys Vlasenko
6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +02009#include "libbb.h"
10
Denys Vlasenkob8935d02017-01-15 20:16:27 +010011#define NEED_SHA512 (ENABLE_SHA512SUM || ENABLE_USE_BB_CRYPT_SHA)
12
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020013/* gcc 4.2.1 optimizes rotr64 better with inline than with macro
14 * (for rotX32, there is no difference). Why? My guess is that
15 * macro requires clever common subexpression elimination heuristics
16 * in gcc, while inline basically forces it to happen.
17 */
18//#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
19static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n)
20{
21 return (x << n) | (x >> (32 - n));
22}
23//#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n))))
24static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n)
25{
26 return (x >> n) | (x << (32 - n));
27}
28/* rotr64 in needed for sha512 only: */
29//#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n))))
30static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n)
31{
32 return (x >> n) | (x << (64 - n));
33}
34
Lauri Kasanenb8173b62013-01-14 05:20:50 +010035/* rotl64 only used for sha3 currently */
36static ALWAYS_INLINE uint64_t rotl64(uint64_t x, unsigned n)
37{
38 return (x << n) | (x >> (64 - n));
39}
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020040
Denys Vlasenko302ad142010-10-19 02:16:12 +020041/* Feed data through a temporary buffer.
42 * The internal buffer remembers previous data until it has 64
43 * bytes worth to pass on.
44 */
45static void FAST_FUNC common64_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020046{
Denys Vlasenko302ad142010-10-19 02:16:12 +020047 unsigned bufpos = ctx->total64 & 63;
48
49 ctx->total64 += len;
50
51 while (1) {
52 unsigned remaining = 64 - bufpos;
53 if (remaining > len)
54 remaining = len;
55 /* Copy data into aligned buffer */
56 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
57 len -= remaining;
58 buffer = (const char *)buffer + remaining;
59 bufpos += remaining;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +010060 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
Denys Vlasenko302ad142010-10-19 02:16:12 +020061 bufpos -= 64;
62 if (bufpos != 0)
63 break;
64 /* Buffer is filled up, process it */
65 ctx->process_block(ctx);
66 /*bufpos = 0; - already is */
67 }
68}
69
70/* Process the remaining bytes in the buffer */
71static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed)
72{
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020073 unsigned bufpos = ctx->total64 & 63;
74 /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */
75 ctx->wbuffer[bufpos++] = 0x80;
76
77 /* This loop iterates either once or twice, no more, no less */
78 while (1) {
79 unsigned remaining = 64 - bufpos;
80 memset(ctx->wbuffer + bufpos, 0, remaining);
81 /* Do we have enough space for the length count? */
82 if (remaining >= 8) {
83 /* Store the 64-bit counter of bits in the buffer */
84 uint64_t t = ctx->total64 << 3;
85 if (swap_needed)
86 t = bb_bswap_64(t);
87 /* wbuffer is suitably aligned for this */
Denys Vlasenko1f5e81f2013-06-27 01:03:19 +020088 *(bb__aliased_uint64_t *) (&ctx->wbuffer[64 - 8]) = t;
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020089 }
Denys Vlasenko302ad142010-10-19 02:16:12 +020090 ctx->process_block(ctx);
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020091 if (remaining >= 8)
92 break;
93 bufpos = 0;
94 }
95}
96
97
98/*
Denys Vlasenko302ad142010-10-19 02:16:12 +020099 * Compute MD5 checksum of strings according to the
100 * definition of MD5 in RFC 1321 from April 1992.
101 *
102 * Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
103 *
104 * Copyright (C) 1995-1999 Free Software Foundation, Inc.
105 * Copyright (C) 2001 Manuel Novoa III
106 * Copyright (C) 2003 Glenn L. McGrath
107 * Copyright (C) 2003 Erik Andersen
108 *
109 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
110 */
111
112/* 0: fastest, 3: smallest */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200113#if CONFIG_MD5_SMALL < 0
114# define MD5_SMALL 0
115#elif CONFIG_MD5_SMALL > 3
116# define MD5_SMALL 3
Denys Vlasenko302ad142010-10-19 02:16:12 +0200117#else
Denys Vlasenko522041e2011-09-10 13:25:57 +0200118# define MD5_SMALL CONFIG_MD5_SMALL
Denys Vlasenko302ad142010-10-19 02:16:12 +0200119#endif
120
121/* These are the four functions used in the four steps of the MD5 algorithm
122 * and defined in the RFC 1321. The first function is a little bit optimized
123 * (as found in Colin Plumbs public domain implementation).
124 * #define FF(b, c, d) ((b & c) | (~b & d))
125 */
126#undef FF
127#undef FG
128#undef FH
129#undef FI
130#define FF(b, c, d) (d ^ (b & (c ^ d)))
131#define FG(b, c, d) FF(d, b, c)
132#define FH(b, c, d) (b ^ c ^ d)
133#define FI(b, c, d) (c ^ (b | ~d))
134
135/* Hash a single block, 64 bytes long and 4-byte aligned */
136static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
137{
Denys Vlasenko522041e2011-09-10 13:25:57 +0200138#if MD5_SMALL > 0
Denys Vlasenko302ad142010-10-19 02:16:12 +0200139 /* Before we start, one word to the strange constants.
140 They are defined in RFC 1321 as
Denys Vlasenko305958d2015-10-07 19:17:01 +0200141 T[i] = (int)(2^32 * fabs(sin(i))), i=1..64
Denys Vlasenko302ad142010-10-19 02:16:12 +0200142 */
143 static const uint32_t C_array[] = {
144 /* round 1 */
145 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
146 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
147 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
148 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
149 /* round 2 */
150 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
151 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
152 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
153 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
154 /* round 3 */
155 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
156 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
157 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
158 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
159 /* round 4 */
160 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
161 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
162 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
163 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
164 };
165 static const char P_array[] ALIGN1 = {
Denys Vlasenko522041e2011-09-10 13:25:57 +0200166# if MD5_SMALL > 1
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200167 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200168# endif
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200169 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */
170 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */
171 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 /* 4 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200172 };
173#endif
174 uint32_t *words = (void*) ctx->wbuffer;
175 uint32_t A = ctx->hash[0];
176 uint32_t B = ctx->hash[1];
177 uint32_t C = ctx->hash[2];
178 uint32_t D = ctx->hash[3];
179
Denys Vlasenko522041e2011-09-10 13:25:57 +0200180#if MD5_SMALL >= 2 /* 2 or 3 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200181
182 static const char S_array[] ALIGN1 = {
183 7, 12, 17, 22,
184 5, 9, 14, 20,
185 4, 11, 16, 23,
186 6, 10, 15, 21
187 };
188 const uint32_t *pc;
189 const char *pp;
190 const char *ps;
191 int i;
192 uint32_t temp;
193
Denys Vlasenko5368fe52013-01-16 02:20:31 +0100194 if (BB_BIG_ENDIAN)
195 for (i = 0; i < 16; i++)
196 words[i] = SWAP_LE32(words[i]);
Denys Vlasenko302ad142010-10-19 02:16:12 +0200197
Denys Vlasenko522041e2011-09-10 13:25:57 +0200198# if MD5_SMALL == 3
Denys Vlasenko302ad142010-10-19 02:16:12 +0200199 pc = C_array;
200 pp = P_array;
201 ps = S_array - 4;
202
203 for (i = 0; i < 64; i++) {
204 if ((i & 0x0f) == 0)
205 ps += 4;
206 temp = A;
207 switch (i >> 4) {
208 case 0:
209 temp += FF(B, C, D);
210 break;
211 case 1:
212 temp += FG(B, C, D);
213 break;
214 case 2:
215 temp += FH(B, C, D);
216 break;
Denys Vlasenko305958d2015-10-07 19:17:01 +0200217 default: /* case 3 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200218 temp += FI(B, C, D);
219 }
220 temp += words[(int) (*pp++)] + *pc++;
221 temp = rotl32(temp, ps[i & 3]);
222 temp += B;
223 A = D;
224 D = C;
225 C = B;
226 B = temp;
227 }
Denys Vlasenko522041e2011-09-10 13:25:57 +0200228# else /* MD5_SMALL == 2 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200229 pc = C_array;
230 pp = P_array;
231 ps = S_array;
232
233 for (i = 0; i < 16; i++) {
234 temp = A + FF(B, C, D) + words[(int) (*pp++)] + *pc++;
235 temp = rotl32(temp, ps[i & 3]);
236 temp += B;
237 A = D;
238 D = C;
239 C = B;
240 B = temp;
241 }
242 ps += 4;
243 for (i = 0; i < 16; i++) {
244 temp = A + FG(B, C, D) + words[(int) (*pp++)] + *pc++;
245 temp = rotl32(temp, ps[i & 3]);
246 temp += B;
247 A = D;
248 D = C;
249 C = B;
250 B = temp;
251 }
252 ps += 4;
253 for (i = 0; i < 16; i++) {
254 temp = A + FH(B, C, D) + words[(int) (*pp++)] + *pc++;
255 temp = rotl32(temp, ps[i & 3]);
256 temp += B;
257 A = D;
258 D = C;
259 C = B;
260 B = temp;
261 }
262 ps += 4;
263 for (i = 0; i < 16; i++) {
264 temp = A + FI(B, C, D) + words[(int) (*pp++)] + *pc++;
265 temp = rotl32(temp, ps[i & 3]);
266 temp += B;
267 A = D;
268 D = C;
269 C = B;
270 B = temp;
271 }
272# endif
273 /* Add checksum to the starting values */
274 ctx->hash[0] += A;
275 ctx->hash[1] += B;
276 ctx->hash[2] += C;
277 ctx->hash[3] += D;
278
Denys Vlasenko522041e2011-09-10 13:25:57 +0200279#else /* MD5_SMALL == 0 or 1 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200280
Denys Vlasenko522041e2011-09-10 13:25:57 +0200281# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200282 const uint32_t *pc;
283 const char *pp;
284 int i;
285# endif
286
287 /* First round: using the given function, the context and a constant
288 the next context is computed. Because the algorithm's processing
289 unit is a 32-bit word and it is determined to work on words in
290 little endian byte order we perhaps have to change the byte order
291 before the computation. To reduce the work for the next steps
292 we save swapped words in WORDS array. */
293# undef OP
294# define OP(a, b, c, d, s, T) \
295 do { \
296 a += FF(b, c, d) + (*words IF_BIG_ENDIAN(= SWAP_LE32(*words))) + T; \
297 words++; \
298 a = rotl32(a, s); \
299 a += b; \
300 } while (0)
301
302 /* Round 1 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200303# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200304 pc = C_array;
305 for (i = 0; i < 4; i++) {
306 OP(A, B, C, D, 7, *pc++);
307 OP(D, A, B, C, 12, *pc++);
308 OP(C, D, A, B, 17, *pc++);
309 OP(B, C, D, A, 22, *pc++);
310 }
311# else
312 OP(A, B, C, D, 7, 0xd76aa478);
313 OP(D, A, B, C, 12, 0xe8c7b756);
314 OP(C, D, A, B, 17, 0x242070db);
315 OP(B, C, D, A, 22, 0xc1bdceee);
316 OP(A, B, C, D, 7, 0xf57c0faf);
317 OP(D, A, B, C, 12, 0x4787c62a);
318 OP(C, D, A, B, 17, 0xa8304613);
319 OP(B, C, D, A, 22, 0xfd469501);
320 OP(A, B, C, D, 7, 0x698098d8);
321 OP(D, A, B, C, 12, 0x8b44f7af);
322 OP(C, D, A, B, 17, 0xffff5bb1);
323 OP(B, C, D, A, 22, 0x895cd7be);
324 OP(A, B, C, D, 7, 0x6b901122);
325 OP(D, A, B, C, 12, 0xfd987193);
326 OP(C, D, A, B, 17, 0xa679438e);
327 OP(B, C, D, A, 22, 0x49b40821);
328# endif
329 words -= 16;
330
331 /* For the second to fourth round we have the possibly swapped words
332 in WORDS. Redefine the macro to take an additional first
333 argument specifying the function to use. */
334# undef OP
335# define OP(f, a, b, c, d, k, s, T) \
336 do { \
337 a += f(b, c, d) + words[k] + T; \
338 a = rotl32(a, s); \
339 a += b; \
340 } while (0)
341
342 /* Round 2 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200343# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200344 pp = P_array;
345 for (i = 0; i < 4; i++) {
346 OP(FG, A, B, C, D, (int) (*pp++), 5, *pc++);
347 OP(FG, D, A, B, C, (int) (*pp++), 9, *pc++);
348 OP(FG, C, D, A, B, (int) (*pp++), 14, *pc++);
349 OP(FG, B, C, D, A, (int) (*pp++), 20, *pc++);
350 }
351# else
352 OP(FG, A, B, C, D, 1, 5, 0xf61e2562);
353 OP(FG, D, A, B, C, 6, 9, 0xc040b340);
354 OP(FG, C, D, A, B, 11, 14, 0x265e5a51);
355 OP(FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
356 OP(FG, A, B, C, D, 5, 5, 0xd62f105d);
357 OP(FG, D, A, B, C, 10, 9, 0x02441453);
358 OP(FG, C, D, A, B, 15, 14, 0xd8a1e681);
359 OP(FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
360 OP(FG, A, B, C, D, 9, 5, 0x21e1cde6);
361 OP(FG, D, A, B, C, 14, 9, 0xc33707d6);
362 OP(FG, C, D, A, B, 3, 14, 0xf4d50d87);
363 OP(FG, B, C, D, A, 8, 20, 0x455a14ed);
364 OP(FG, A, B, C, D, 13, 5, 0xa9e3e905);
365 OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8);
366 OP(FG, C, D, A, B, 7, 14, 0x676f02d9);
367 OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
368# endif
369
370 /* Round 3 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200371# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200372 for (i = 0; i < 4; i++) {
373 OP(FH, A, B, C, D, (int) (*pp++), 4, *pc++);
374 OP(FH, D, A, B, C, (int) (*pp++), 11, *pc++);
375 OP(FH, C, D, A, B, (int) (*pp++), 16, *pc++);
376 OP(FH, B, C, D, A, (int) (*pp++), 23, *pc++);
377 }
378# else
379 OP(FH, A, B, C, D, 5, 4, 0xfffa3942);
380 OP(FH, D, A, B, C, 8, 11, 0x8771f681);
381 OP(FH, C, D, A, B, 11, 16, 0x6d9d6122);
382 OP(FH, B, C, D, A, 14, 23, 0xfde5380c);
383 OP(FH, A, B, C, D, 1, 4, 0xa4beea44);
384 OP(FH, D, A, B, C, 4, 11, 0x4bdecfa9);
385 OP(FH, C, D, A, B, 7, 16, 0xf6bb4b60);
386 OP(FH, B, C, D, A, 10, 23, 0xbebfbc70);
387 OP(FH, A, B, C, D, 13, 4, 0x289b7ec6);
388 OP(FH, D, A, B, C, 0, 11, 0xeaa127fa);
389 OP(FH, C, D, A, B, 3, 16, 0xd4ef3085);
390 OP(FH, B, C, D, A, 6, 23, 0x04881d05);
391 OP(FH, A, B, C, D, 9, 4, 0xd9d4d039);
392 OP(FH, D, A, B, C, 12, 11, 0xe6db99e5);
393 OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8);
394 OP(FH, B, C, D, A, 2, 23, 0xc4ac5665);
395# endif
396
397 /* Round 4 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200398# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200399 for (i = 0; i < 4; i++) {
400 OP(FI, A, B, C, D, (int) (*pp++), 6, *pc++);
401 OP(FI, D, A, B, C, (int) (*pp++), 10, *pc++);
402 OP(FI, C, D, A, B, (int) (*pp++), 15, *pc++);
403 OP(FI, B, C, D, A, (int) (*pp++), 21, *pc++);
404 }
405# else
406 OP(FI, A, B, C, D, 0, 6, 0xf4292244);
407 OP(FI, D, A, B, C, 7, 10, 0x432aff97);
408 OP(FI, C, D, A, B, 14, 15, 0xab9423a7);
409 OP(FI, B, C, D, A, 5, 21, 0xfc93a039);
410 OP(FI, A, B, C, D, 12, 6, 0x655b59c3);
411 OP(FI, D, A, B, C, 3, 10, 0x8f0ccc92);
412 OP(FI, C, D, A, B, 10, 15, 0xffeff47d);
413 OP(FI, B, C, D, A, 1, 21, 0x85845dd1);
414 OP(FI, A, B, C, D, 8, 6, 0x6fa87e4f);
415 OP(FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
416 OP(FI, C, D, A, B, 6, 15, 0xa3014314);
417 OP(FI, B, C, D, A, 13, 21, 0x4e0811a1);
418 OP(FI, A, B, C, D, 4, 6, 0xf7537e82);
419 OP(FI, D, A, B, C, 11, 10, 0xbd3af235);
420 OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
421 OP(FI, B, C, D, A, 9, 21, 0xeb86d391);
422# undef OP
423# endif
424 /* Add checksum to the starting values */
Denys Vlasenko305958d2015-10-07 19:17:01 +0200425 ctx->hash[0] += A;
426 ctx->hash[1] += B;
427 ctx->hash[2] += C;
428 ctx->hash[3] += D;
Denys Vlasenko302ad142010-10-19 02:16:12 +0200429#endif
430}
431#undef FF
432#undef FG
433#undef FH
434#undef FI
435
436/* Initialize structure containing state of computation.
437 * (RFC 1321, 3.3: Step 3)
438 */
439void FAST_FUNC md5_begin(md5_ctx_t *ctx)
440{
441 ctx->hash[0] = 0x67452301;
442 ctx->hash[1] = 0xefcdab89;
443 ctx->hash[2] = 0x98badcfe;
444 ctx->hash[3] = 0x10325476;
445 ctx->total64 = 0;
446 ctx->process_block = md5_process_block64;
447}
448
449/* Used also for sha1 and sha256 */
450void FAST_FUNC md5_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
451{
452 common64_hash(ctx, buffer, len);
453}
454
455/* Process the remaining bytes in the buffer and put result from CTX
456 * in first 16 bytes following RESBUF. The result is always in little
457 * endian byte order, so that a byte-wise output yields to the wanted
458 * ASCII representation of the message digest.
459 */
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100460unsigned FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf)
Denys Vlasenko302ad142010-10-19 02:16:12 +0200461{
462 /* MD5 stores total in LE, need to swap on BE arches: */
463 common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN);
464
Denys Vlasenko7ab94ca2010-10-19 02:33:39 +0200465 /* The MD5 result is in little endian byte order */
Denys Vlasenko5368fe52013-01-16 02:20:31 +0100466 if (BB_BIG_ENDIAN) {
467 ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
468 ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
469 ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
470 ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
471 }
472
Denys Vlasenko302ad142010-10-19 02:16:12 +0200473 memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4);
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100474 return sizeof(ctx->hash[0]) * 4;
Denys Vlasenko302ad142010-10-19 02:16:12 +0200475}
476
477
478/*
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200479 * SHA1 part is:
480 * Copyright 2007 Rob Landley <rob@landley.net>
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000481 *
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200482 * Based on the public domain SHA-1 in C by Steve Reid <steve@edmweb.com>
483 * from http://www.mirrors.wiretapped.net/security/cryptography/hashes/sha1/
Denis Vlasenko9213a9e2006-09-17 16:28:10 +0000484 *
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200485 * Licensed under GPLv2, see file LICENSE in this source tree.
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000486 *
487 * ---------------------------------------------------------------------------
488 *
489 * SHA256 and SHA512 parts are:
490 * Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>.
Denis Vlasenkoddb1b852009-03-12 16:05:02 +0000491 * Shrank by Denys Vlasenko.
492 *
493 * ---------------------------------------------------------------------------
494 *
495 * The best way to test random blocksizes is to go to coreutils/md5_sha1_sum.c
496 * and replace "4096" with something like "2000 + time(NULL) % 2097",
497 * then rebuild and compare "shaNNNsum bigfile" results.
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000498 */
499
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000500static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000501{
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200502 static const uint32_t rconsts[] = {
503 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
504 };
505 int i, j;
506 int cnt;
507 uint32_t W[16+16];
508 uint32_t a, b, c, d, e;
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000509
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200510 /* On-stack work buffer frees up one register in the main loop
511 * which otherwise will be needed to hold ctx pointer */
512 for (i = 0; i < 16; i++)
513 W[i] = W[i+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[i]);
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000514
515 a = ctx->hash[0];
516 b = ctx->hash[1];
517 c = ctx->hash[2];
518 d = ctx->hash[3];
519 e = ctx->hash[4];
520
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200521 /* 4 rounds of 20 operations each */
522 cnt = 0;
523 for (i = 0; i < 4; i++) {
524 j = 19;
525 do {
526 uint32_t work;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000527
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200528 work = c ^ d;
529 if (i == 0) {
530 work = (work & b) ^ d;
531 if (j <= 3)
532 goto ge16;
533 /* Used to do SWAP_BE32 here, but this
534 * requires ctx (see comment above) */
535 work += W[cnt];
536 } else {
537 if (i == 2)
538 work = ((b | c) & d) | (b & c);
539 else /* i = 1 or 3 */
540 work ^= b;
541 ge16:
542 W[cnt] = W[cnt+16] = rotl32(W[cnt+13] ^ W[cnt+8] ^ W[cnt+2] ^ W[cnt], 1);
543 work += W[cnt];
544 }
Denys Vlasenko03a5fe32010-10-24 20:51:28 +0200545 work += e + rotl32(a, 5) + rconsts[i];
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000546
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200547 /* Rotate by one for next time */
548 e = d;
549 d = c;
550 c = /* b = */ rotl32(b, 30);
551 b = a;
552 a = work;
553 cnt = (cnt + 1) & 15;
554 } while (--j >= 0);
555 }
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000556
557 ctx->hash[0] += a;
558 ctx->hash[1] += b;
559 ctx->hash[2] += c;
560 ctx->hash[3] += d;
561 ctx->hash[4] += e;
562}
563
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000564/* Constants for SHA512 from FIPS 180-2:4.2.3.
565 * SHA256 constants from FIPS 180-2:4.2.2
566 * are the most significant half of first 64 elements
567 * of the same array.
568 */
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100569#undef K
570#if NEED_SHA512
571typedef uint64_t sha_K_int;
572# define K(v) v
573#else
574typedef uint32_t sha_K_int;
575# define K(v) (uint32_t)(v >> 32)
576#endif
577static const sha_K_int sha_K[] = {
578 K(0x428a2f98d728ae22ULL), K(0x7137449123ef65cdULL),
579 K(0xb5c0fbcfec4d3b2fULL), K(0xe9b5dba58189dbbcULL),
580 K(0x3956c25bf348b538ULL), K(0x59f111f1b605d019ULL),
581 K(0x923f82a4af194f9bULL), K(0xab1c5ed5da6d8118ULL),
582 K(0xd807aa98a3030242ULL), K(0x12835b0145706fbeULL),
583 K(0x243185be4ee4b28cULL), K(0x550c7dc3d5ffb4e2ULL),
584 K(0x72be5d74f27b896fULL), K(0x80deb1fe3b1696b1ULL),
585 K(0x9bdc06a725c71235ULL), K(0xc19bf174cf692694ULL),
586 K(0xe49b69c19ef14ad2ULL), K(0xefbe4786384f25e3ULL),
587 K(0x0fc19dc68b8cd5b5ULL), K(0x240ca1cc77ac9c65ULL),
588 K(0x2de92c6f592b0275ULL), K(0x4a7484aa6ea6e483ULL),
589 K(0x5cb0a9dcbd41fbd4ULL), K(0x76f988da831153b5ULL),
590 K(0x983e5152ee66dfabULL), K(0xa831c66d2db43210ULL),
591 K(0xb00327c898fb213fULL), K(0xbf597fc7beef0ee4ULL),
592 K(0xc6e00bf33da88fc2ULL), K(0xd5a79147930aa725ULL),
593 K(0x06ca6351e003826fULL), K(0x142929670a0e6e70ULL),
594 K(0x27b70a8546d22ffcULL), K(0x2e1b21385c26c926ULL),
595 K(0x4d2c6dfc5ac42aedULL), K(0x53380d139d95b3dfULL),
596 K(0x650a73548baf63deULL), K(0x766a0abb3c77b2a8ULL),
597 K(0x81c2c92e47edaee6ULL), K(0x92722c851482353bULL),
598 K(0xa2bfe8a14cf10364ULL), K(0xa81a664bbc423001ULL),
599 K(0xc24b8b70d0f89791ULL), K(0xc76c51a30654be30ULL),
600 K(0xd192e819d6ef5218ULL), K(0xd69906245565a910ULL),
601 K(0xf40e35855771202aULL), K(0x106aa07032bbd1b8ULL),
602 K(0x19a4c116b8d2d0c8ULL), K(0x1e376c085141ab53ULL),
603 K(0x2748774cdf8eeb99ULL), K(0x34b0bcb5e19b48a8ULL),
604 K(0x391c0cb3c5c95a63ULL), K(0x4ed8aa4ae3418acbULL),
605 K(0x5b9cca4f7763e373ULL), K(0x682e6ff3d6b2b8a3ULL),
606 K(0x748f82ee5defb2fcULL), K(0x78a5636f43172f60ULL),
607 K(0x84c87814a1f0ab72ULL), K(0x8cc702081a6439ecULL),
608 K(0x90befffa23631e28ULL), K(0xa4506cebde82bde9ULL),
609 K(0xbef9a3f7b2c67915ULL), K(0xc67178f2e372532bULL),
610#if NEED_SHA512 /* [64]+ are used for sha512 only */
611 K(0xca273eceea26619cULL), K(0xd186b8c721c0c207ULL),
612 K(0xeada7dd6cde0eb1eULL), K(0xf57d4f7fee6ed178ULL),
613 K(0x06f067aa72176fbaULL), K(0x0a637dc5a2c898a6ULL),
614 K(0x113f9804bef90daeULL), K(0x1b710b35131c471bULL),
615 K(0x28db77f523047d84ULL), K(0x32caab7b40c72493ULL),
616 K(0x3c9ebe0a15c9bebcULL), K(0x431d67c49c100d4cULL),
617 K(0x4cc5d4becb3e42b6ULL), K(0x597f299cfc657e2aULL),
618 K(0x5fcb6fab3ad6faecULL), K(0x6c44198c4a475817ULL),
619#endif
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000620};
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100621#undef K
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000622
Denys Vlasenkofe4ef362009-07-05 20:34:38 +0200623#undef Ch
624#undef Maj
625#undef S0
626#undef S1
627#undef R0
628#undef R1
629
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000630static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000631{
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000632 unsigned t;
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000633 uint32_t W[64], a, b, c, d, e, f, g, h;
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000634 const uint32_t *words = (uint32_t*) ctx->wbuffer;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000635
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000636 /* Operators defined in FIPS 180-2:4.1.2. */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000637#define Ch(x, y, z) ((x & y) ^ (~x & z))
638#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
639#define S0(x) (rotr32(x, 2) ^ rotr32(x, 13) ^ rotr32(x, 22))
640#define S1(x) (rotr32(x, 6) ^ rotr32(x, 11) ^ rotr32(x, 25))
641#define R0(x) (rotr32(x, 7) ^ rotr32(x, 18) ^ (x >> 3))
642#define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
643
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000644 /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200645 for (t = 0; t < 16; ++t)
Denys Vlasenkob102e122010-10-18 11:39:47 +0200646 W[t] = SWAP_BE32(words[t]);
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000647 for (/*t = 16*/; t < 64; ++t)
648 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000649
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000650 a = ctx->hash[0];
651 b = ctx->hash[1];
652 c = ctx->hash[2];
653 d = ctx->hash[3];
654 e = ctx->hash[4];
655 f = ctx->hash[5];
656 g = ctx->hash[6];
657 h = ctx->hash[7];
658
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000659 /* The actual computation according to FIPS 180-2:6.2.2 step 3. */
660 for (t = 0; t < 64; ++t) {
Denis Vlasenkoa2333c82009-03-28 19:08:23 +0000661 /* Need to fetch upper half of sha_K[t]
662 * (I hope compiler is clever enough to just fetch
663 * upper half)
664 */
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100665 uint32_t K_t = NEED_SHA512 ? (sha_K[t] >> 32) : sha_K[t];
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000666 uint32_t T1 = h + S1(e) + Ch(e, f, g) + K_t + W[t];
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000667 uint32_t T2 = S0(a) + Maj(a, b, c);
668 h = g;
669 g = f;
670 f = e;
671 e = d + T1;
672 d = c;
673 c = b;
674 b = a;
675 a = T1 + T2;
676 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000677#undef Ch
678#undef Maj
679#undef S0
680#undef S1
681#undef R0
682#undef R1
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000683 /* Add the starting values of the context according to FIPS 180-2:6.2.2
684 step 4. */
685 ctx->hash[0] += a;
686 ctx->hash[1] += b;
687 ctx->hash[2] += c;
688 ctx->hash[3] += d;
689 ctx->hash[4] += e;
690 ctx->hash[5] += f;
691 ctx->hash[6] += g;
692 ctx->hash[7] += h;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000693}
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000694
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100695#if NEED_SHA512
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000696static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000697{
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000698 unsigned t;
699 uint64_t W[80];
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000700 /* On i386, having assignments here (not later as sha256 does)
701 * produces 99 bytes smaller code with gcc 4.3.1
702 */
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000703 uint64_t a = ctx->hash[0];
704 uint64_t b = ctx->hash[1];
705 uint64_t c = ctx->hash[2];
706 uint64_t d = ctx->hash[3];
707 uint64_t e = ctx->hash[4];
708 uint64_t f = ctx->hash[5];
709 uint64_t g = ctx->hash[6];
710 uint64_t h = ctx->hash[7];
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000711 const uint64_t *words = (uint64_t*) ctx->wbuffer;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000712
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000713 /* Operators defined in FIPS 180-2:4.1.2. */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000714#define Ch(x, y, z) ((x & y) ^ (~x & z))
715#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
716#define S0(x) (rotr64(x, 28) ^ rotr64(x, 34) ^ rotr64(x, 39))
717#define S1(x) (rotr64(x, 14) ^ rotr64(x, 18) ^ rotr64(x, 41))
718#define R0(x) (rotr64(x, 1) ^ rotr64(x, 8) ^ (x >> 7))
719#define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
720
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000721 /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200722 for (t = 0; t < 16; ++t)
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200723 W[t] = SWAP_BE64(words[t]);
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000724 for (/*t = 16*/; t < 80; ++t)
725 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000726
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000727 /* The actual computation according to FIPS 180-2:6.3.2 step 3. */
728 for (t = 0; t < 80; ++t) {
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000729 uint64_t T1 = h + S1(e) + Ch(e, f, g) + sha_K[t] + W[t];
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000730 uint64_t T2 = S0(a) + Maj(a, b, c);
731 h = g;
732 g = f;
733 f = e;
734 e = d + T1;
735 d = c;
736 c = b;
737 b = a;
738 a = T1 + T2;
739 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000740#undef Ch
741#undef Maj
742#undef S0
743#undef S1
744#undef R0
745#undef R1
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000746 /* Add the starting values of the context according to FIPS 180-2:6.3.2
747 step 4. */
748 ctx->hash[0] += a;
749 ctx->hash[1] += b;
750 ctx->hash[2] += c;
751 ctx->hash[3] += d;
752 ctx->hash[4] += e;
753 ctx->hash[5] += f;
754 ctx->hash[6] += g;
755 ctx->hash[7] += h;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000756}
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100757#endif /* NEED_SHA512 */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000758
Denis Vlasenkodefc1ea2008-06-27 02:52:20 +0000759void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000760{
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000761 ctx->hash[0] = 0x67452301;
762 ctx->hash[1] = 0xefcdab89;
763 ctx->hash[2] = 0x98badcfe;
764 ctx->hash[3] = 0x10325476;
765 ctx->hash[4] = 0xc3d2e1f0;
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000766 ctx->total64 = 0;
767 ctx->process_block = sha1_process_block64;
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000768}
769
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000770static const uint32_t init256[] = {
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200771 0,
772 0,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000773 0x6a09e667,
774 0xbb67ae85,
775 0x3c6ef372,
776 0xa54ff53a,
777 0x510e527f,
778 0x9b05688c,
779 0x1f83d9ab,
Denys Vlasenkoa971a192010-10-17 01:35:16 +0200780 0x5be0cd19,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000781};
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100782#if NEED_SHA512
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000783static const uint32_t init512_lo[] = {
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200784 0,
785 0,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000786 0xf3bcc908,
787 0x84caa73b,
788 0xfe94f82b,
789 0x5f1d36f1,
790 0xade682d1,
791 0x2b3e6c1f,
792 0xfb41bd6b,
Denys Vlasenkoa971a192010-10-17 01:35:16 +0200793 0x137e2179,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000794};
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100795#endif /* NEED_SHA512 */
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000796
Denys Vlasenkof69f2072018-11-26 13:00:28 +0100797// Note: SHA-384 is identical to SHA-512, except that initial hash values are
798// 0xcbbb9d5dc1059ed8, 0x629a292a367cd507, 0x9159015a3070dd17, 0x152fecd8f70e5939,
799// 0x67332667ffc00b31, 0x8eb44a8768581511, 0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4,
800// and the output is constructed by omitting last two 64-bit words of it.
801
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000802/* Initialize structure containing state of computation.
803 (FIPS 180-2:5.3.2) */
804void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
805{
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200806 memcpy(&ctx->total64, init256, sizeof(init256));
807 /*ctx->total64 = 0; - done by prepending two 32-bit zeros to init256 */
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000808 ctx->process_block = sha256_process_block64;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000809}
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000810
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100811#if NEED_SHA512
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000812/* Initialize structure containing state of computation.
813 (FIPS 180-2:5.3.3) */
814void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
815{
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000816 int i;
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200817 /* Two extra iterations zero out ctx->total64[2] */
818 uint64_t *tp = ctx->total64;
819 for (i = 0; i < 2+8; i++)
820 tp[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i];
Denys Vlasenkoa971a192010-10-17 01:35:16 +0200821 /*ctx->total64[0] = ctx->total64[1] = 0; - already done */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000822}
823
Denys Vlasenkoc0683ac2010-10-16 20:45:27 +0200824void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000825{
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200826 unsigned bufpos = ctx->total64[0] & 127;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200827 unsigned remaining;
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000828
829 /* First increment the byte count. FIPS 180-2 specifies the possible
830 length of the file up to 2^128 _bits_.
831 We compute the number of _bytes_ and convert to bits later. */
832 ctx->total64[0] += len;
833 if (ctx->total64[0] < len)
834 ctx->total64[1]++;
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100835# if 0
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200836 remaining = 128 - bufpos;
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000837
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200838 /* Hash whole blocks */
839 while (len >= remaining) {
840 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
841 buffer = (const char *)buffer + remaining;
842 len -= remaining;
843 remaining = 128;
844 bufpos = 0;
845 sha512_process_block128(ctx);
846 }
847
848 /* Save last, partial blosk */
849 memcpy(ctx->wbuffer + bufpos, buffer, len);
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100850# else
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200851 while (1) {
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200852 remaining = 128 - bufpos;
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200853 if (remaining > len)
854 remaining = len;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200855 /* Copy data into aligned buffer */
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200856 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
857 len -= remaining;
858 buffer = (const char *)buffer + remaining;
859 bufpos += remaining;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +0100860 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200861 bufpos -= 128;
862 if (bufpos != 0)
863 break;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200864 /* Buffer is filled up, process it */
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000865 sha512_process_block128(ctx);
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200866 /*bufpos = 0; - already is */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000867 }
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100868# endif
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000869}
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100870#endif /* NEED_SHA512 */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000871
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000872/* Used also for sha256 */
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100873unsigned FAST_FUNC sha1_end(sha1_ctx_t *ctx, void *resbuf)
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000874{
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200875 unsigned hash_size;
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000876
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200877 /* SHA stores total in BE, need to swap on LE arches: */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200878 common64_end(ctx, /*swap_needed:*/ BB_LITTLE_ENDIAN);
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000879
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200880 hash_size = (ctx->process_block == sha1_process_block64) ? 5 : 8;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000881 /* This way we do not impose alignment constraints on resbuf: */
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100882 if (BB_LITTLE_ENDIAN) {
883 unsigned i;
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200884 for (i = 0; i < hash_size; ++i)
Denys Vlasenkob102e122010-10-18 11:39:47 +0200885 ctx->hash[i] = SWAP_BE32(ctx->hash[i]);
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100886 }
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100887 hash_size *= sizeof(ctx->hash[0]);
888 memcpy(resbuf, ctx->hash, hash_size);
889 return hash_size;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000890}
891
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100892#if NEED_SHA512
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100893unsigned FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000894{
Denys Vlasenko36ab5852010-10-17 03:00:36 +0200895 unsigned bufpos = ctx->total64[0] & 127;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000896
Denys Vlasenko36ab5852010-10-17 03:00:36 +0200897 /* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... */
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200898 ctx->wbuffer[bufpos++] = 0x80;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000899
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000900 while (1) {
Denys Vlasenkof6dacc22010-10-17 03:21:51 +0200901 unsigned remaining = 128 - bufpos;
902 memset(ctx->wbuffer + bufpos, 0, remaining);
903 if (remaining >= 16) {
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000904 /* Store the 128-bit counter of bits in the buffer in BE format */
905 uint64_t t;
906 t = ctx->total64[0] << 3;
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200907 t = SWAP_BE64(t);
Denys Vlasenko1f5e81f2013-06-27 01:03:19 +0200908 *(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 8]) = t;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000909 t = (ctx->total64[1] << 3) | (ctx->total64[0] >> 61);
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200910 t = SWAP_BE64(t);
Denys Vlasenko1f5e81f2013-06-27 01:03:19 +0200911 *(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 16]) = t;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000912 }
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000913 sha512_process_block128(ctx);
Denys Vlasenkof6dacc22010-10-17 03:21:51 +0200914 if (remaining >= 16)
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000915 break;
Denys Vlasenko36ab5852010-10-17 03:00:36 +0200916 bufpos = 0;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000917 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000918
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100919 if (BB_LITTLE_ENDIAN) {
920 unsigned i;
921 for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200922 ctx->hash[i] = SWAP_BE64(ctx->hash[i]);
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100923 }
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000924 memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100925 return sizeof(ctx->hash);
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000926}
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100927#endif /* NEED_SHA512 */
Lauri Kasanenb8173b62013-01-14 05:20:50 +0100928
929
930/*
931 * The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
932 * Michael Peeters and Gilles Van Assche. For more information, feedback or
933 * questions, please refer to our website: http://keccak.noekeon.org/
934 *
935 * Implementation by Ronny Van Keer,
936 * hereby denoted as "the implementer".
937 *
938 * To the extent possible under law, the implementer has waived all copyright
939 * and related or neighboring rights to the source code in this file.
940 * http://creativecommons.org/publicdomain/zero/1.0/
941 *
942 * Busybox modifications (C) Lauri Kasanen, under the GPLv2.
943 */
944
Denys Vlasenko30a86522013-01-15 01:12:26 +0100945#if CONFIG_SHA3_SMALL < 0
946# define SHA3_SMALL 0
947#elif CONFIG_SHA3_SMALL > 1
948# define SHA3_SMALL 1
949#else
950# define SHA3_SMALL CONFIG_SHA3_SMALL
951#endif
952
Denys Vlasenko2a563ea2014-07-25 17:24:13 +0200953#define OPTIMIZE_SHA3_FOR_32 0
954/*
955 * SHA3 can be optimized for 32-bit CPUs with bit-slicing:
956 * every 64-bit word of state[] can be split into two 32-bit words
957 * by even/odd bits. In this form, all rotations of sha3 round
958 * are 32-bit - and there are lots of them.
959 * However, it requires either splitting/combining state words
960 * before/after sha3 round (code does this now)
961 * or shuffling bits before xor'ing them into state and in sha3_end.
962 * Without shuffling, bit-slicing results in -130 bytes of code
963 * and marginal speedup (but of course it gives wrong result).
964 * With shuffling it works, but +260 code bytes, and slower.
965 * Disabled for now:
966 */
967#if 0 /* LONG_MAX == 0x7fffffff */
968# undef OPTIMIZE_SHA3_FOR_32
969# define OPTIMIZE_SHA3_FOR_32 1
970#endif
971
Denys Vlasenko2a563ea2014-07-25 17:24:13 +0200972#if OPTIMIZE_SHA3_FOR_32
973/* This splits every 64-bit word into a pair of 32-bit words,
974 * even bits go into first word, odd bits go to second one.
975 * The conversion is done in-place.
976 */
977static void split_halves(uint64_t *state)
978{
979 /* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
980 uint32_t *s32 = (uint32_t*)state;
981 uint32_t t, x0, x1;
982 int i;
983 for (i = 24; i >= 0; --i) {
984 x0 = s32[0];
985 t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
986 t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
987 t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
988 t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
989 x1 = s32[1];
990 t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
991 t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
992 t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
993 t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
994 *s32++ = (x0 & 0x0000FFFF) | (x1 << 16);
995 *s32++ = (x0 >> 16) | (x1 & 0xFFFF0000);
996 }
997}
998/* The reverse operation */
999static void combine_halves(uint64_t *state)
1000{
1001 uint32_t *s32 = (uint32_t*)state;
1002 uint32_t t, x0, x1;
1003 int i;
1004 for (i = 24; i >= 0; --i) {
1005 x0 = s32[0];
1006 x1 = s32[1];
1007 t = (x0 & 0x0000FFFF) | (x1 << 16);
1008 x1 = (x0 >> 16) | (x1 & 0xFFFF0000);
1009 x0 = t;
1010 t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
1011 t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
1012 t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
1013 t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
1014 *s32++ = x0;
1015 t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
1016 t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
1017 t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
1018 t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
1019 *s32++ = x1;
1020 }
1021}
1022#endif
1023
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001024/*
1025 * In the crypto literature this function is usually called Keccak-f().
Denys Vlasenkoac4100e2013-01-15 16:27:39 +01001026 */
Denys Vlasenkoe4f0f262013-01-16 12:23:23 +01001027static void sha3_process_block72(uint64_t *state)
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001028{
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001029 enum { NROUNDS = 24 };
1030
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001031#if OPTIMIZE_SHA3_FOR_32
1032 /*
1033 static const uint32_t IOTA_CONST_0[NROUNDS] = {
1034 0x00000001UL,
1035 0x00000000UL,
1036 0x00000000UL,
1037 0x00000000UL,
1038 0x00000001UL,
1039 0x00000001UL,
1040 0x00000001UL,
1041 0x00000001UL,
1042 0x00000000UL,
1043 0x00000000UL,
1044 0x00000001UL,
1045 0x00000000UL,
1046 0x00000001UL,
1047 0x00000001UL,
1048 0x00000001UL,
1049 0x00000001UL,
1050 0x00000000UL,
1051 0x00000000UL,
1052 0x00000000UL,
1053 0x00000000UL,
1054 0x00000001UL,
1055 0x00000000UL,
1056 0x00000001UL,
1057 0x00000000UL,
1058 };
1059 ** bits are in lsb: 0101 0000 1111 0100 1111 0001
1060 */
1061 uint32_t IOTA_CONST_0bits = (uint32_t)(0x0050f4f1);
1062 static const uint32_t IOTA_CONST_1[NROUNDS] = {
1063 0x00000000UL,
1064 0x00000089UL,
1065 0x8000008bUL,
1066 0x80008080UL,
1067 0x0000008bUL,
1068 0x00008000UL,
1069 0x80008088UL,
1070 0x80000082UL,
1071 0x0000000bUL,
1072 0x0000000aUL,
1073 0x00008082UL,
1074 0x00008003UL,
1075 0x0000808bUL,
1076 0x8000000bUL,
1077 0x8000008aUL,
1078 0x80000081UL,
1079 0x80000081UL,
1080 0x80000008UL,
1081 0x00000083UL,
1082 0x80008003UL,
1083 0x80008088UL,
1084 0x80000088UL,
1085 0x00008000UL,
1086 0x80008082UL,
1087 };
1088
1089 uint32_t *const s32 = (uint32_t*)state;
1090 unsigned round;
1091
1092 split_halves(state);
1093
1094 for (round = 0; round < NROUNDS; round++) {
1095 unsigned x;
1096
1097 /* Theta */
1098 {
1099 uint32_t BC[20];
1100 for (x = 0; x < 10; ++x) {
1101 BC[x+10] = BC[x] = s32[x]^s32[x+10]^s32[x+20]^s32[x+30]^s32[x+40];
1102 }
1103 for (x = 0; x < 10; x += 2) {
1104 uint32_t ta, tb;
1105 ta = BC[x+8] ^ rotl32(BC[x+3], 1);
1106 tb = BC[x+9] ^ BC[x+2];
1107 s32[x+0] ^= ta;
1108 s32[x+1] ^= tb;
1109 s32[x+10] ^= ta;
1110 s32[x+11] ^= tb;
1111 s32[x+20] ^= ta;
1112 s32[x+21] ^= tb;
1113 s32[x+30] ^= ta;
1114 s32[x+31] ^= tb;
1115 s32[x+40] ^= ta;
1116 s32[x+41] ^= tb;
1117 }
1118 }
1119 /* RhoPi */
1120 {
1121 uint32_t t0a,t0b, t1a,t1b;
1122 t1a = s32[1*2+0];
1123 t1b = s32[1*2+1];
1124
1125#define RhoPi(PI_LANE, ROT_CONST) \
1126 t0a = s32[PI_LANE*2+0];\
1127 t0b = s32[PI_LANE*2+1];\
1128 if (ROT_CONST & 1) {\
1129 s32[PI_LANE*2+0] = rotl32(t1b, ROT_CONST/2+1);\
1130 s32[PI_LANE*2+1] = ROT_CONST == 1 ? t1a : rotl32(t1a, ROT_CONST/2+0);\
1131 } else {\
1132 s32[PI_LANE*2+0] = rotl32(t1a, ROT_CONST/2);\
1133 s32[PI_LANE*2+1] = rotl32(t1b, ROT_CONST/2);\
1134 }\
1135 t1a = t0a; t1b = t0b;
1136
1137 RhoPi(10, 1)
1138 RhoPi( 7, 3)
1139 RhoPi(11, 6)
1140 RhoPi(17,10)
1141 RhoPi(18,15)
1142 RhoPi( 3,21)
1143 RhoPi( 5,28)
1144 RhoPi(16,36)
1145 RhoPi( 8,45)
1146 RhoPi(21,55)
1147 RhoPi(24, 2)
1148 RhoPi( 4,14)
1149 RhoPi(15,27)
1150 RhoPi(23,41)
1151 RhoPi(19,56)
1152 RhoPi(13, 8)
1153 RhoPi(12,25)
1154 RhoPi( 2,43)
1155 RhoPi(20,62)
1156 RhoPi(14,18)
1157 RhoPi(22,39)
1158 RhoPi( 9,61)
1159 RhoPi( 6,20)
1160 RhoPi( 1,44)
1161#undef RhoPi
1162 }
1163 /* Chi */
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001164 for (x = 0; x <= 40;) {
1165 uint32_t BC0, BC1, BC2, BC3, BC4;
1166 BC0 = s32[x + 0*2];
1167 BC1 = s32[x + 1*2];
1168 BC2 = s32[x + 2*2];
1169 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1170 BC3 = s32[x + 3*2];
1171 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1172 BC4 = s32[x + 4*2];
1173 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1174 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1175 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1176 x++;
1177 BC0 = s32[x + 0*2];
1178 BC1 = s32[x + 1*2];
1179 BC2 = s32[x + 2*2];
1180 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1181 BC3 = s32[x + 3*2];
1182 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1183 BC4 = s32[x + 4*2];
1184 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1185 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1186 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1187 x += 9;
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001188 }
1189 /* Iota */
1190 s32[0] ^= IOTA_CONST_0bits & 1;
1191 IOTA_CONST_0bits >>= 1;
1192 s32[1] ^= IOTA_CONST_1[round];
1193 }
1194
1195 combine_halves(state);
1196#else
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001197 /* Native 64-bit algorithm */
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001198 static const uint16_t IOTA_CONST[NROUNDS] = {
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001199 /* Elements should be 64-bit, but top half is always zero
1200 * or 0x80000000. We encode 63rd bits in a separate word below.
1201 * Same is true for 31th bits, which lets us use 16-bit table
1202 * instead of 64-bit. The speed penalty is lost in the noise.
1203 */
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001204 0x0001,
1205 0x8082,
1206 0x808a,
1207 0x8000,
1208 0x808b,
1209 0x0001,
1210 0x8081,
1211 0x8009,
1212 0x008a,
1213 0x0088,
1214 0x8009,
1215 0x000a,
1216 0x808b,
1217 0x008b,
1218 0x8089,
1219 0x8003,
1220 0x8002,
1221 0x0080,
1222 0x800a,
1223 0x000a,
1224 0x8081,
1225 0x8080,
1226 0x0001,
1227 0x8008,
1228 };
1229 /* bit for CONST[0] is in msb: 0011 0011 0000 0111 1101 1101 */
1230 const uint32_t IOTA_CONST_bit63 = (uint32_t)(0x3307dd00);
1231 /* bit for CONST[0] is in msb: 0001 0110 0011 1000 0001 1011 */
1232 const uint32_t IOTA_CONST_bit31 = (uint32_t)(0x16381b00);
1233
1234 static const uint8_t ROT_CONST[24] = {
1235 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
1236 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
1237 };
1238 static const uint8_t PI_LANE[24] = {
1239 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
1240 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
1241 };
1242 /*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, };*/
Denys Vlasenko8fb3ab52013-01-15 22:07:48 +01001243
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001244 unsigned x;
Denys Vlasenko5b7f50f2013-01-15 19:52:30 +01001245 unsigned round;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001246
1247 if (BB_BIG_ENDIAN) {
1248 for (x = 0; x < 25; x++) {
1249 state[x] = SWAP_LE64(state[x]);
1250 }
1251 }
1252
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001253 for (round = 0; round < NROUNDS; ++round) {
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001254 /* Theta */
Denys Vlasenko30a86522013-01-15 01:12:26 +01001255 {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001256 uint64_t BC[10];
Denys Vlasenko30a86522013-01-15 01:12:26 +01001257 for (x = 0; x < 5; ++x) {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001258 BC[x + 5] = BC[x] = state[x]
1259 ^ state[x + 5] ^ state[x + 10]
1260 ^ state[x + 15] ^ state[x + 20];
Denys Vlasenko30a86522013-01-15 01:12:26 +01001261 }
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001262 /* Using 2x5 vector above eliminates the need to use
Denys Vlasenko5b7f50f2013-01-15 19:52:30 +01001263 * BC[MOD5[x+N]] trick below to fetch BC[(x+N) % 5],
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001264 * and the code is a bit _smaller_.
1265 */
Denys Vlasenko30a86522013-01-15 01:12:26 +01001266 for (x = 0; x < 5; ++x) {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001267 uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1);
Denys Vlasenko8fb3ab52013-01-15 22:07:48 +01001268 state[x] ^= temp;
1269 state[x + 5] ^= temp;
1270 state[x + 10] ^= temp;
1271 state[x + 15] ^= temp;
1272 state[x + 20] ^= temp;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001273 }
1274 }
1275
1276 /* Rho Pi */
Denys Vlasenko30a86522013-01-15 01:12:26 +01001277 if (SHA3_SMALL) {
1278 uint64_t t1 = state[1];
1279 for (x = 0; x < 24; ++x) {
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001280 uint64_t t0 = state[PI_LANE[x]];
1281 state[PI_LANE[x]] = rotl64(t1, ROT_CONST[x]);
Denys Vlasenko30a86522013-01-15 01:12:26 +01001282 t1 = t0;
1283 }
1284 } else {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001285 /* Especially large benefit for 32-bit arch (75% faster):
Denys Vlasenko30a86522013-01-15 01:12:26 +01001286 * 64-bit rotations by non-constant usually are SLOW on those.
1287 * We resort to unrolling here.
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001288 * This optimizes out PI_LANE[] and ROT_CONST[],
Denys Vlasenko30a86522013-01-15 01:12:26 +01001289 * but generates 300-500 more bytes of code.
1290 */
1291 uint64_t t0;
1292 uint64_t t1 = state[1];
1293#define RhoPi_twice(x) \
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001294 t0 = state[PI_LANE[x ]]; \
1295 state[PI_LANE[x ]] = rotl64(t1, ROT_CONST[x ]); \
1296 t1 = state[PI_LANE[x+1]]; \
1297 state[PI_LANE[x+1]] = rotl64(t0, ROT_CONST[x+1]);
Denys Vlasenko30a86522013-01-15 01:12:26 +01001298 RhoPi_twice(0); RhoPi_twice(2);
1299 RhoPi_twice(4); RhoPi_twice(6);
1300 RhoPi_twice(8); RhoPi_twice(10);
1301 RhoPi_twice(12); RhoPi_twice(14);
1302 RhoPi_twice(16); RhoPi_twice(18);
1303 RhoPi_twice(20); RhoPi_twice(22);
1304#undef RhoPi_twice
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001305 }
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001306 /* Chi */
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001307# if LONG_MAX > 0x7fffffff
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001308 for (x = 0; x <= 20; x += 5) {
Denys Vlasenko8fb3ab52013-01-15 22:07:48 +01001309 uint64_t BC0, BC1, BC2, BC3, BC4;
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001310 BC0 = state[x + 0];
1311 BC1 = state[x + 1];
1312 BC2 = state[x + 2];
1313 state[x + 0] = BC0 ^ ((~BC1) & BC2);
1314 BC3 = state[x + 3];
1315 state[x + 1] = BC1 ^ ((~BC2) & BC3);
1316 BC4 = state[x + 4];
1317 state[x + 2] = BC2 ^ ((~BC3) & BC4);
1318 state[x + 3] = BC3 ^ ((~BC4) & BC0);
1319 state[x + 4] = BC4 ^ ((~BC0) & BC1);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001320 }
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001321# else
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001322 /* Reduced register pressure version
1323 * for register-starved 32-bit arches
1324 * (i386: -95 bytes, and it is _faster_)
1325 */
1326 for (x = 0; x <= 40;) {
1327 uint32_t BC0, BC1, BC2, BC3, BC4;
1328 uint32_t *const s32 = (uint32_t*)state;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001329# if SHA3_SMALL
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001330 do_half:
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001331# endif
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001332 BC0 = s32[x + 0*2];
1333 BC1 = s32[x + 1*2];
1334 BC2 = s32[x + 2*2];
1335 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1336 BC3 = s32[x + 3*2];
1337 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1338 BC4 = s32[x + 4*2];
1339 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1340 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1341 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1342 x++;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001343# if SHA3_SMALL
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001344 if (x & 1)
1345 goto do_half;
1346 x += 8;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001347# else
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001348 BC0 = s32[x + 0*2];
1349 BC1 = s32[x + 1*2];
1350 BC2 = s32[x + 2*2];
1351 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1352 BC3 = s32[x + 3*2];
1353 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1354 BC4 = s32[x + 4*2];
1355 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1356 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1357 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1358 x += 9;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001359# endif
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001360 }
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001361# endif /* long is 32-bit */
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001362 /* Iota */
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001363 state[0] ^= IOTA_CONST[round]
1364 | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000)
1365 | (uint64_t)((IOTA_CONST_bit63 << round) & 0x80000000) << 32;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001366 }
1367
1368 if (BB_BIG_ENDIAN) {
1369 for (x = 0; x < 25; x++) {
1370 state[x] = SWAP_LE64(state[x]);
1371 }
1372 }
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001373#endif
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001374}
1375
1376void FAST_FUNC sha3_begin(sha3_ctx_t *ctx)
1377{
1378 memset(ctx, 0, sizeof(*ctx));
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001379 /* SHA3-512, user can override */
1380 ctx->input_block_bytes = (1600 - 512*2) / 8; /* 72 bytes */
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001381}
1382
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001383void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buffer, size_t len)
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001384{
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001385#if SHA3_SMALL
1386 const uint8_t *data = buffer;
1387 unsigned bufpos = ctx->bytes_queued;
1388
1389 while (1) {
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001390 unsigned remaining = ctx->input_block_bytes - bufpos;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001391 if (remaining > len)
1392 remaining = len;
1393 len -= remaining;
1394 /* XOR data into buffer */
1395 while (remaining != 0) {
1396 uint8_t *buf = (uint8_t*)ctx->state;
1397 buf[bufpos] ^= *data++;
1398 bufpos++;
1399 remaining--;
1400 }
1401 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001402 bufpos -= ctx->input_block_bytes;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001403 if (bufpos != 0)
1404 break;
1405 /* Buffer is filled up, process it */
1406 sha3_process_block72(ctx->state);
1407 /*bufpos = 0; - already is */
1408 }
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001409 ctx->bytes_queued = bufpos + ctx->input_block_bytes;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001410#else
1411 /* +50 bytes code size, but a bit faster because of long-sized XORs */
1412 const uint8_t *data = buffer;
1413 unsigned bufpos = ctx->bytes_queued;
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001414 unsigned iblk_bytes = ctx->input_block_bytes;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001415
1416 /* If already data in queue, continue queuing first */
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001417 if (bufpos != 0) {
1418 while (len != 0) {
1419 uint8_t *buf = (uint8_t*)ctx->state;
1420 buf[bufpos] ^= *data++;
1421 len--;
1422 bufpos++;
1423 if (bufpos == iblk_bytes) {
1424 bufpos = 0;
1425 goto do_block;
1426 }
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001427 }
1428 }
1429
1430 /* Absorb complete blocks */
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001431 while (len >= iblk_bytes) {
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001432 /* XOR data onto beginning of state[].
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001433 * We try to be efficient - operate one word at a time, not byte.
1434 * Careful wrt unaligned access: can't just use "*(long*)data"!
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001435 */
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001436 unsigned count = iblk_bytes / sizeof(long);
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001437 long *buf = (long*)ctx->state;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001438 do {
1439 long v;
1440 move_from_unaligned_long(v, (long*)data);
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001441 *buf++ ^= v;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001442 data += sizeof(long);
1443 } while (--count);
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001444 len -= iblk_bytes;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001445 do_block:
Denys Vlasenkoe4f0f262013-01-16 12:23:23 +01001446 sha3_process_block72(ctx->state);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001447 }
1448
1449 /* Queue remaining data bytes */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001450 while (len != 0) {
1451 uint8_t *buf = (uint8_t*)ctx->state;
1452 buf[bufpos] ^= *data++;
1453 bufpos++;
1454 len--;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001455 }
Denys Vlasenko970aa6b2013-01-15 22:19:24 +01001456
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001457 ctx->bytes_queued = bufpos;
1458#endif
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001459}
1460
Denys Vlasenko49ecee02017-01-24 16:00:54 +01001461unsigned FAST_FUNC sha3_end(sha3_ctx_t *ctx, void *resbuf)
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001462{
1463 /* Padding */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001464 uint8_t *buf = (uint8_t*)ctx->state;
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001465 /*
1466 * Keccak block padding is: add 1 bit after last bit of input,
1467 * then add zero bits until the end of block, and add the last 1 bit
1468 * (the last bit in the block) - the "10*1" pattern.
1469 * SHA3 standard appends additional two bits, 01, before that padding:
1470 *
1471 * SHA3-224(M) = KECCAK[448](M||01, 224)
1472 * SHA3-256(M) = KECCAK[512](M||01, 256)
1473 * SHA3-384(M) = KECCAK[768](M||01, 384)
1474 * SHA3-512(M) = KECCAK[1024](M||01, 512)
1475 * (M is the input, || is bit concatenation)
1476 *
1477 * The 6 below contains 01 "SHA3" bits and the first 1 "Keccak" bit:
1478 */
1479 buf[ctx->bytes_queued] ^= 6; /* bit pattern 00000110 */
1480 buf[ctx->input_block_bytes - 1] ^= 0x80;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001481
Denys Vlasenkoe4f0f262013-01-16 12:23:23 +01001482 sha3_process_block72(ctx->state);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001483
1484 /* Output */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001485 memcpy(resbuf, ctx->state, 64);
Denys Vlasenko49ecee02017-01-24 16:00:54 +01001486 return 64;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001487}