blob: d08c6b2f74d26983481d9aed6dc68ef2c97b3e04 [file] [log] [blame]
"Robert P. J. Day"63fc1a92006-07-02 19:47:05 +00001/* vi: set sw=4 ts=4: */
Rob Landley5cf7c2d2006-02-21 06:44:43 +00002/*
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +02003 * Utility routines.
4 *
5 * Copyright (C) 2010 Denys Vlasenko
6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
9
10#include "libbb.h"
11
12/* gcc 4.2.1 optimizes rotr64 better with inline than with macro
13 * (for rotX32, there is no difference). Why? My guess is that
14 * macro requires clever common subexpression elimination heuristics
15 * in gcc, while inline basically forces it to happen.
16 */
17//#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
18static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n)
19{
20 return (x << n) | (x >> (32 - n));
21}
22//#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n))))
23static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n)
24{
25 return (x >> n) | (x << (32 - n));
26}
27/* rotr64 in needed for sha512 only: */
28//#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n))))
29static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n)
30{
31 return (x >> n) | (x << (64 - n));
32}
33
Lauri Kasanenb8173b62013-01-14 05:20:50 +010034/* rotl64 only used for sha3 currently */
35static ALWAYS_INLINE uint64_t rotl64(uint64_t x, unsigned n)
36{
37 return (x << n) | (x >> (64 - n));
38}
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020039
Denys Vlasenko302ad142010-10-19 02:16:12 +020040/* Feed data through a temporary buffer.
41 * The internal buffer remembers previous data until it has 64
42 * bytes worth to pass on.
43 */
44static void FAST_FUNC common64_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020045{
Denys Vlasenko302ad142010-10-19 02:16:12 +020046 unsigned bufpos = ctx->total64 & 63;
47
48 ctx->total64 += len;
49
50 while (1) {
51 unsigned remaining = 64 - bufpos;
52 if (remaining > len)
53 remaining = len;
54 /* Copy data into aligned buffer */
55 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
56 len -= remaining;
57 buffer = (const char *)buffer + remaining;
58 bufpos += remaining;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +010059 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
Denys Vlasenko302ad142010-10-19 02:16:12 +020060 bufpos -= 64;
61 if (bufpos != 0)
62 break;
63 /* Buffer is filled up, process it */
64 ctx->process_block(ctx);
65 /*bufpos = 0; - already is */
66 }
67}
68
69/* Process the remaining bytes in the buffer */
70static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed)
71{
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020072 unsigned bufpos = ctx->total64 & 63;
73 /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */
74 ctx->wbuffer[bufpos++] = 0x80;
75
76 /* This loop iterates either once or twice, no more, no less */
77 while (1) {
78 unsigned remaining = 64 - bufpos;
79 memset(ctx->wbuffer + bufpos, 0, remaining);
80 /* Do we have enough space for the length count? */
81 if (remaining >= 8) {
82 /* Store the 64-bit counter of bits in the buffer */
83 uint64_t t = ctx->total64 << 3;
84 if (swap_needed)
85 t = bb_bswap_64(t);
86 /* wbuffer is suitably aligned for this */
Denys Vlasenko1f5e81f2013-06-27 01:03:19 +020087 *(bb__aliased_uint64_t *) (&ctx->wbuffer[64 - 8]) = t;
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020088 }
Denys Vlasenko302ad142010-10-19 02:16:12 +020089 ctx->process_block(ctx);
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020090 if (remaining >= 8)
91 break;
92 bufpos = 0;
93 }
94}
95
96
97/*
Denys Vlasenko302ad142010-10-19 02:16:12 +020098 * Compute MD5 checksum of strings according to the
99 * definition of MD5 in RFC 1321 from April 1992.
100 *
101 * Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
102 *
103 * Copyright (C) 1995-1999 Free Software Foundation, Inc.
104 * Copyright (C) 2001 Manuel Novoa III
105 * Copyright (C) 2003 Glenn L. McGrath
106 * Copyright (C) 2003 Erik Andersen
107 *
108 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
109 */
110
111/* 0: fastest, 3: smallest */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200112#if CONFIG_MD5_SMALL < 0
113# define MD5_SMALL 0
114#elif CONFIG_MD5_SMALL > 3
115# define MD5_SMALL 3
Denys Vlasenko302ad142010-10-19 02:16:12 +0200116#else
Denys Vlasenko522041e2011-09-10 13:25:57 +0200117# define MD5_SMALL CONFIG_MD5_SMALL
Denys Vlasenko302ad142010-10-19 02:16:12 +0200118#endif
119
120/* These are the four functions used in the four steps of the MD5 algorithm
121 * and defined in the RFC 1321. The first function is a little bit optimized
122 * (as found in Colin Plumbs public domain implementation).
123 * #define FF(b, c, d) ((b & c) | (~b & d))
124 */
125#undef FF
126#undef FG
127#undef FH
128#undef FI
129#define FF(b, c, d) (d ^ (b & (c ^ d)))
130#define FG(b, c, d) FF(d, b, c)
131#define FH(b, c, d) (b ^ c ^ d)
132#define FI(b, c, d) (c ^ (b | ~d))
133
134/* Hash a single block, 64 bytes long and 4-byte aligned */
135static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
136{
Denys Vlasenko522041e2011-09-10 13:25:57 +0200137#if MD5_SMALL > 0
Denys Vlasenko302ad142010-10-19 02:16:12 +0200138 /* Before we start, one word to the strange constants.
139 They are defined in RFC 1321 as
Denys Vlasenko305958d2015-10-07 19:17:01 +0200140 T[i] = (int)(2^32 * fabs(sin(i))), i=1..64
Denys Vlasenko302ad142010-10-19 02:16:12 +0200141 */
142 static const uint32_t C_array[] = {
143 /* round 1 */
144 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
145 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
146 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
147 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
148 /* round 2 */
149 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
150 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
151 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
152 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
153 /* round 3 */
154 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
155 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
156 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
157 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
158 /* round 4 */
159 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
160 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
161 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
162 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
163 };
164 static const char P_array[] ALIGN1 = {
Denys Vlasenko522041e2011-09-10 13:25:57 +0200165# if MD5_SMALL > 1
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200166 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200167# endif
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200168 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */
169 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */
170 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 /* 4 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200171 };
172#endif
173 uint32_t *words = (void*) ctx->wbuffer;
174 uint32_t A = ctx->hash[0];
175 uint32_t B = ctx->hash[1];
176 uint32_t C = ctx->hash[2];
177 uint32_t D = ctx->hash[3];
178
Denys Vlasenko522041e2011-09-10 13:25:57 +0200179#if MD5_SMALL >= 2 /* 2 or 3 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200180
181 static const char S_array[] ALIGN1 = {
182 7, 12, 17, 22,
183 5, 9, 14, 20,
184 4, 11, 16, 23,
185 6, 10, 15, 21
186 };
187 const uint32_t *pc;
188 const char *pp;
189 const char *ps;
190 int i;
191 uint32_t temp;
192
Denys Vlasenko5368fe52013-01-16 02:20:31 +0100193 if (BB_BIG_ENDIAN)
194 for (i = 0; i < 16; i++)
195 words[i] = SWAP_LE32(words[i]);
Denys Vlasenko302ad142010-10-19 02:16:12 +0200196
Denys Vlasenko522041e2011-09-10 13:25:57 +0200197# if MD5_SMALL == 3
Denys Vlasenko302ad142010-10-19 02:16:12 +0200198 pc = C_array;
199 pp = P_array;
200 ps = S_array - 4;
201
202 for (i = 0; i < 64; i++) {
203 if ((i & 0x0f) == 0)
204 ps += 4;
205 temp = A;
206 switch (i >> 4) {
207 case 0:
208 temp += FF(B, C, D);
209 break;
210 case 1:
211 temp += FG(B, C, D);
212 break;
213 case 2:
214 temp += FH(B, C, D);
215 break;
Denys Vlasenko305958d2015-10-07 19:17:01 +0200216 default: /* case 3 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200217 temp += FI(B, C, D);
218 }
219 temp += words[(int) (*pp++)] + *pc++;
220 temp = rotl32(temp, ps[i & 3]);
221 temp += B;
222 A = D;
223 D = C;
224 C = B;
225 B = temp;
226 }
Denys Vlasenko522041e2011-09-10 13:25:57 +0200227# else /* MD5_SMALL == 2 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200228 pc = C_array;
229 pp = P_array;
230 ps = S_array;
231
232 for (i = 0; i < 16; i++) {
233 temp = A + FF(B, C, D) + words[(int) (*pp++)] + *pc++;
234 temp = rotl32(temp, ps[i & 3]);
235 temp += B;
236 A = D;
237 D = C;
238 C = B;
239 B = temp;
240 }
241 ps += 4;
242 for (i = 0; i < 16; i++) {
243 temp = A + FG(B, C, D) + words[(int) (*pp++)] + *pc++;
244 temp = rotl32(temp, ps[i & 3]);
245 temp += B;
246 A = D;
247 D = C;
248 C = B;
249 B = temp;
250 }
251 ps += 4;
252 for (i = 0; i < 16; i++) {
253 temp = A + FH(B, C, D) + words[(int) (*pp++)] + *pc++;
254 temp = rotl32(temp, ps[i & 3]);
255 temp += B;
256 A = D;
257 D = C;
258 C = B;
259 B = temp;
260 }
261 ps += 4;
262 for (i = 0; i < 16; i++) {
263 temp = A + FI(B, C, D) + words[(int) (*pp++)] + *pc++;
264 temp = rotl32(temp, ps[i & 3]);
265 temp += B;
266 A = D;
267 D = C;
268 C = B;
269 B = temp;
270 }
271# endif
272 /* Add checksum to the starting values */
273 ctx->hash[0] += A;
274 ctx->hash[1] += B;
275 ctx->hash[2] += C;
276 ctx->hash[3] += D;
277
Denys Vlasenko522041e2011-09-10 13:25:57 +0200278#else /* MD5_SMALL == 0 or 1 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200279
Denys Vlasenko522041e2011-09-10 13:25:57 +0200280# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200281 const uint32_t *pc;
282 const char *pp;
283 int i;
284# endif
285
286 /* First round: using the given function, the context and a constant
287 the next context is computed. Because the algorithm's processing
288 unit is a 32-bit word and it is determined to work on words in
289 little endian byte order we perhaps have to change the byte order
290 before the computation. To reduce the work for the next steps
291 we save swapped words in WORDS array. */
292# undef OP
293# define OP(a, b, c, d, s, T) \
294 do { \
295 a += FF(b, c, d) + (*words IF_BIG_ENDIAN(= SWAP_LE32(*words))) + T; \
296 words++; \
297 a = rotl32(a, s); \
298 a += b; \
299 } while (0)
300
301 /* Round 1 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200302# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200303 pc = C_array;
304 for (i = 0; i < 4; i++) {
305 OP(A, B, C, D, 7, *pc++);
306 OP(D, A, B, C, 12, *pc++);
307 OP(C, D, A, B, 17, *pc++);
308 OP(B, C, D, A, 22, *pc++);
309 }
310# else
311 OP(A, B, C, D, 7, 0xd76aa478);
312 OP(D, A, B, C, 12, 0xe8c7b756);
313 OP(C, D, A, B, 17, 0x242070db);
314 OP(B, C, D, A, 22, 0xc1bdceee);
315 OP(A, B, C, D, 7, 0xf57c0faf);
316 OP(D, A, B, C, 12, 0x4787c62a);
317 OP(C, D, A, B, 17, 0xa8304613);
318 OP(B, C, D, A, 22, 0xfd469501);
319 OP(A, B, C, D, 7, 0x698098d8);
320 OP(D, A, B, C, 12, 0x8b44f7af);
321 OP(C, D, A, B, 17, 0xffff5bb1);
322 OP(B, C, D, A, 22, 0x895cd7be);
323 OP(A, B, C, D, 7, 0x6b901122);
324 OP(D, A, B, C, 12, 0xfd987193);
325 OP(C, D, A, B, 17, 0xa679438e);
326 OP(B, C, D, A, 22, 0x49b40821);
327# endif
328 words -= 16;
329
330 /* For the second to fourth round we have the possibly swapped words
331 in WORDS. Redefine the macro to take an additional first
332 argument specifying the function to use. */
333# undef OP
334# define OP(f, a, b, c, d, k, s, T) \
335 do { \
336 a += f(b, c, d) + words[k] + T; \
337 a = rotl32(a, s); \
338 a += b; \
339 } while (0)
340
341 /* Round 2 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200342# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200343 pp = P_array;
344 for (i = 0; i < 4; i++) {
345 OP(FG, A, B, C, D, (int) (*pp++), 5, *pc++);
346 OP(FG, D, A, B, C, (int) (*pp++), 9, *pc++);
347 OP(FG, C, D, A, B, (int) (*pp++), 14, *pc++);
348 OP(FG, B, C, D, A, (int) (*pp++), 20, *pc++);
349 }
350# else
351 OP(FG, A, B, C, D, 1, 5, 0xf61e2562);
352 OP(FG, D, A, B, C, 6, 9, 0xc040b340);
353 OP(FG, C, D, A, B, 11, 14, 0x265e5a51);
354 OP(FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
355 OP(FG, A, B, C, D, 5, 5, 0xd62f105d);
356 OP(FG, D, A, B, C, 10, 9, 0x02441453);
357 OP(FG, C, D, A, B, 15, 14, 0xd8a1e681);
358 OP(FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
359 OP(FG, A, B, C, D, 9, 5, 0x21e1cde6);
360 OP(FG, D, A, B, C, 14, 9, 0xc33707d6);
361 OP(FG, C, D, A, B, 3, 14, 0xf4d50d87);
362 OP(FG, B, C, D, A, 8, 20, 0x455a14ed);
363 OP(FG, A, B, C, D, 13, 5, 0xa9e3e905);
364 OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8);
365 OP(FG, C, D, A, B, 7, 14, 0x676f02d9);
366 OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
367# endif
368
369 /* Round 3 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200370# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200371 for (i = 0; i < 4; i++) {
372 OP(FH, A, B, C, D, (int) (*pp++), 4, *pc++);
373 OP(FH, D, A, B, C, (int) (*pp++), 11, *pc++);
374 OP(FH, C, D, A, B, (int) (*pp++), 16, *pc++);
375 OP(FH, B, C, D, A, (int) (*pp++), 23, *pc++);
376 }
377# else
378 OP(FH, A, B, C, D, 5, 4, 0xfffa3942);
379 OP(FH, D, A, B, C, 8, 11, 0x8771f681);
380 OP(FH, C, D, A, B, 11, 16, 0x6d9d6122);
381 OP(FH, B, C, D, A, 14, 23, 0xfde5380c);
382 OP(FH, A, B, C, D, 1, 4, 0xa4beea44);
383 OP(FH, D, A, B, C, 4, 11, 0x4bdecfa9);
384 OP(FH, C, D, A, B, 7, 16, 0xf6bb4b60);
385 OP(FH, B, C, D, A, 10, 23, 0xbebfbc70);
386 OP(FH, A, B, C, D, 13, 4, 0x289b7ec6);
387 OP(FH, D, A, B, C, 0, 11, 0xeaa127fa);
388 OP(FH, C, D, A, B, 3, 16, 0xd4ef3085);
389 OP(FH, B, C, D, A, 6, 23, 0x04881d05);
390 OP(FH, A, B, C, D, 9, 4, 0xd9d4d039);
391 OP(FH, D, A, B, C, 12, 11, 0xe6db99e5);
392 OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8);
393 OP(FH, B, C, D, A, 2, 23, 0xc4ac5665);
394# endif
395
396 /* Round 4 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200397# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200398 for (i = 0; i < 4; i++) {
399 OP(FI, A, B, C, D, (int) (*pp++), 6, *pc++);
400 OP(FI, D, A, B, C, (int) (*pp++), 10, *pc++);
401 OP(FI, C, D, A, B, (int) (*pp++), 15, *pc++);
402 OP(FI, B, C, D, A, (int) (*pp++), 21, *pc++);
403 }
404# else
405 OP(FI, A, B, C, D, 0, 6, 0xf4292244);
406 OP(FI, D, A, B, C, 7, 10, 0x432aff97);
407 OP(FI, C, D, A, B, 14, 15, 0xab9423a7);
408 OP(FI, B, C, D, A, 5, 21, 0xfc93a039);
409 OP(FI, A, B, C, D, 12, 6, 0x655b59c3);
410 OP(FI, D, A, B, C, 3, 10, 0x8f0ccc92);
411 OP(FI, C, D, A, B, 10, 15, 0xffeff47d);
412 OP(FI, B, C, D, A, 1, 21, 0x85845dd1);
413 OP(FI, A, B, C, D, 8, 6, 0x6fa87e4f);
414 OP(FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
415 OP(FI, C, D, A, B, 6, 15, 0xa3014314);
416 OP(FI, B, C, D, A, 13, 21, 0x4e0811a1);
417 OP(FI, A, B, C, D, 4, 6, 0xf7537e82);
418 OP(FI, D, A, B, C, 11, 10, 0xbd3af235);
419 OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
420 OP(FI, B, C, D, A, 9, 21, 0xeb86d391);
421# undef OP
422# endif
423 /* Add checksum to the starting values */
Denys Vlasenko305958d2015-10-07 19:17:01 +0200424 ctx->hash[0] += A;
425 ctx->hash[1] += B;
426 ctx->hash[2] += C;
427 ctx->hash[3] += D;
Denys Vlasenko302ad142010-10-19 02:16:12 +0200428#endif
429}
430#undef FF
431#undef FG
432#undef FH
433#undef FI
434
435/* Initialize structure containing state of computation.
436 * (RFC 1321, 3.3: Step 3)
437 */
438void FAST_FUNC md5_begin(md5_ctx_t *ctx)
439{
440 ctx->hash[0] = 0x67452301;
441 ctx->hash[1] = 0xefcdab89;
442 ctx->hash[2] = 0x98badcfe;
443 ctx->hash[3] = 0x10325476;
444 ctx->total64 = 0;
445 ctx->process_block = md5_process_block64;
446}
447
448/* Used also for sha1 and sha256 */
449void FAST_FUNC md5_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
450{
451 common64_hash(ctx, buffer, len);
452}
453
454/* Process the remaining bytes in the buffer and put result from CTX
455 * in first 16 bytes following RESBUF. The result is always in little
456 * endian byte order, so that a byte-wise output yields to the wanted
457 * ASCII representation of the message digest.
458 */
459void FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf)
460{
461 /* MD5 stores total in LE, need to swap on BE arches: */
462 common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN);
463
Denys Vlasenko7ab94ca2010-10-19 02:33:39 +0200464 /* The MD5 result is in little endian byte order */
Denys Vlasenko5368fe52013-01-16 02:20:31 +0100465 if (BB_BIG_ENDIAN) {
466 ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
467 ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
468 ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
469 ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
470 }
471
Denys Vlasenko302ad142010-10-19 02:16:12 +0200472 memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4);
473}
474
475
476/*
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200477 * SHA1 part is:
478 * Copyright 2007 Rob Landley <rob@landley.net>
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000479 *
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200480 * Based on the public domain SHA-1 in C by Steve Reid <steve@edmweb.com>
481 * from http://www.mirrors.wiretapped.net/security/cryptography/hashes/sha1/
Denis Vlasenko9213a9e2006-09-17 16:28:10 +0000482 *
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200483 * Licensed under GPLv2, see file LICENSE in this source tree.
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000484 *
485 * ---------------------------------------------------------------------------
486 *
487 * SHA256 and SHA512 parts are:
488 * Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>.
Denis Vlasenkoddb1b852009-03-12 16:05:02 +0000489 * Shrank by Denys Vlasenko.
490 *
491 * ---------------------------------------------------------------------------
492 *
493 * The best way to test random blocksizes is to go to coreutils/md5_sha1_sum.c
494 * and replace "4096" with something like "2000 + time(NULL) % 2097",
495 * then rebuild and compare "shaNNNsum bigfile" results.
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000496 */
497
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000498static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000499{
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200500 static const uint32_t rconsts[] = {
501 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
502 };
503 int i, j;
504 int cnt;
505 uint32_t W[16+16];
506 uint32_t a, b, c, d, e;
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000507
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200508 /* On-stack work buffer frees up one register in the main loop
509 * which otherwise will be needed to hold ctx pointer */
510 for (i = 0; i < 16; i++)
511 W[i] = W[i+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[i]);
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000512
513 a = ctx->hash[0];
514 b = ctx->hash[1];
515 c = ctx->hash[2];
516 d = ctx->hash[3];
517 e = ctx->hash[4];
518
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200519 /* 4 rounds of 20 operations each */
520 cnt = 0;
521 for (i = 0; i < 4; i++) {
522 j = 19;
523 do {
524 uint32_t work;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000525
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200526 work = c ^ d;
527 if (i == 0) {
528 work = (work & b) ^ d;
529 if (j <= 3)
530 goto ge16;
531 /* Used to do SWAP_BE32 here, but this
532 * requires ctx (see comment above) */
533 work += W[cnt];
534 } else {
535 if (i == 2)
536 work = ((b | c) & d) | (b & c);
537 else /* i = 1 or 3 */
538 work ^= b;
539 ge16:
540 W[cnt] = W[cnt+16] = rotl32(W[cnt+13] ^ W[cnt+8] ^ W[cnt+2] ^ W[cnt], 1);
541 work += W[cnt];
542 }
Denys Vlasenko03a5fe32010-10-24 20:51:28 +0200543 work += e + rotl32(a, 5) + rconsts[i];
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000544
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200545 /* Rotate by one for next time */
546 e = d;
547 d = c;
548 c = /* b = */ rotl32(b, 30);
549 b = a;
550 a = work;
551 cnt = (cnt + 1) & 15;
552 } while (--j >= 0);
553 }
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000554
555 ctx->hash[0] += a;
556 ctx->hash[1] += b;
557 ctx->hash[2] += c;
558 ctx->hash[3] += d;
559 ctx->hash[4] += e;
560}
561
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000562/* Constants for SHA512 from FIPS 180-2:4.2.3.
563 * SHA256 constants from FIPS 180-2:4.2.2
564 * are the most significant half of first 64 elements
565 * of the same array.
566 */
567static const uint64_t sha_K[80] = {
568 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
569 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
570 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
571 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
572 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
573 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
574 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
575 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
576 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
577 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
578 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
579 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
580 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
581 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
582 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
583 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
584 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
585 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
586 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
587 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
588 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
589 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
590 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
591 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
592 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
593 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
594 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
595 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
596 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
597 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
598 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
599 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
600 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, /* [64]+ are used for sha512 only */
601 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
602 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
603 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
604 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
605 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
606 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
607 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000608};
609
Denys Vlasenkofe4ef362009-07-05 20:34:38 +0200610#undef Ch
611#undef Maj
612#undef S0
613#undef S1
614#undef R0
615#undef R1
616
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000617static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000618{
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000619 unsigned t;
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000620 uint32_t W[64], a, b, c, d, e, f, g, h;
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000621 const uint32_t *words = (uint32_t*) ctx->wbuffer;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000622
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000623 /* Operators defined in FIPS 180-2:4.1.2. */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000624#define Ch(x, y, z) ((x & y) ^ (~x & z))
625#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
626#define S0(x) (rotr32(x, 2) ^ rotr32(x, 13) ^ rotr32(x, 22))
627#define S1(x) (rotr32(x, 6) ^ rotr32(x, 11) ^ rotr32(x, 25))
628#define R0(x) (rotr32(x, 7) ^ rotr32(x, 18) ^ (x >> 3))
629#define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
630
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000631 /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200632 for (t = 0; t < 16; ++t)
Denys Vlasenkob102e122010-10-18 11:39:47 +0200633 W[t] = SWAP_BE32(words[t]);
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000634 for (/*t = 16*/; t < 64; ++t)
635 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000636
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000637 a = ctx->hash[0];
638 b = ctx->hash[1];
639 c = ctx->hash[2];
640 d = ctx->hash[3];
641 e = ctx->hash[4];
642 f = ctx->hash[5];
643 g = ctx->hash[6];
644 h = ctx->hash[7];
645
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000646 /* The actual computation according to FIPS 180-2:6.2.2 step 3. */
647 for (t = 0; t < 64; ++t) {
Denis Vlasenkoa2333c82009-03-28 19:08:23 +0000648 /* Need to fetch upper half of sha_K[t]
649 * (I hope compiler is clever enough to just fetch
650 * upper half)
651 */
652 uint32_t K_t = sha_K[t] >> 32;
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000653 uint32_t T1 = h + S1(e) + Ch(e, f, g) + K_t + W[t];
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000654 uint32_t T2 = S0(a) + Maj(a, b, c);
655 h = g;
656 g = f;
657 f = e;
658 e = d + T1;
659 d = c;
660 c = b;
661 b = a;
662 a = T1 + T2;
663 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000664#undef Ch
665#undef Maj
666#undef S0
667#undef S1
668#undef R0
669#undef R1
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000670 /* Add the starting values of the context according to FIPS 180-2:6.2.2
671 step 4. */
672 ctx->hash[0] += a;
673 ctx->hash[1] += b;
674 ctx->hash[2] += c;
675 ctx->hash[3] += d;
676 ctx->hash[4] += e;
677 ctx->hash[5] += f;
678 ctx->hash[6] += g;
679 ctx->hash[7] += h;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000680}
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000681
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000682static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000683{
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000684 unsigned t;
685 uint64_t W[80];
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000686 /* On i386, having assignments here (not later as sha256 does)
687 * produces 99 bytes smaller code with gcc 4.3.1
688 */
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000689 uint64_t a = ctx->hash[0];
690 uint64_t b = ctx->hash[1];
691 uint64_t c = ctx->hash[2];
692 uint64_t d = ctx->hash[3];
693 uint64_t e = ctx->hash[4];
694 uint64_t f = ctx->hash[5];
695 uint64_t g = ctx->hash[6];
696 uint64_t h = ctx->hash[7];
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000697 const uint64_t *words = (uint64_t*) ctx->wbuffer;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000698
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000699 /* Operators defined in FIPS 180-2:4.1.2. */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000700#define Ch(x, y, z) ((x & y) ^ (~x & z))
701#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
702#define S0(x) (rotr64(x, 28) ^ rotr64(x, 34) ^ rotr64(x, 39))
703#define S1(x) (rotr64(x, 14) ^ rotr64(x, 18) ^ rotr64(x, 41))
704#define R0(x) (rotr64(x, 1) ^ rotr64(x, 8) ^ (x >> 7))
705#define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
706
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000707 /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200708 for (t = 0; t < 16; ++t)
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200709 W[t] = SWAP_BE64(words[t]);
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000710 for (/*t = 16*/; t < 80; ++t)
711 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000712
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000713 /* The actual computation according to FIPS 180-2:6.3.2 step 3. */
714 for (t = 0; t < 80; ++t) {
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000715 uint64_t T1 = h + S1(e) + Ch(e, f, g) + sha_K[t] + W[t];
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000716 uint64_t T2 = S0(a) + Maj(a, b, c);
717 h = g;
718 g = f;
719 f = e;
720 e = d + T1;
721 d = c;
722 c = b;
723 b = a;
724 a = T1 + T2;
725 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000726#undef Ch
727#undef Maj
728#undef S0
729#undef S1
730#undef R0
731#undef R1
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000732 /* Add the starting values of the context according to FIPS 180-2:6.3.2
733 step 4. */
734 ctx->hash[0] += a;
735 ctx->hash[1] += b;
736 ctx->hash[2] += c;
737 ctx->hash[3] += d;
738 ctx->hash[4] += e;
739 ctx->hash[5] += f;
740 ctx->hash[6] += g;
741 ctx->hash[7] += h;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000742}
743
744
Denis Vlasenkodefc1ea2008-06-27 02:52:20 +0000745void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000746{
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000747 ctx->hash[0] = 0x67452301;
748 ctx->hash[1] = 0xefcdab89;
749 ctx->hash[2] = 0x98badcfe;
750 ctx->hash[3] = 0x10325476;
751 ctx->hash[4] = 0xc3d2e1f0;
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000752 ctx->total64 = 0;
753 ctx->process_block = sha1_process_block64;
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000754}
755
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000756static const uint32_t init256[] = {
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200757 0,
758 0,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000759 0x6a09e667,
760 0xbb67ae85,
761 0x3c6ef372,
762 0xa54ff53a,
763 0x510e527f,
764 0x9b05688c,
765 0x1f83d9ab,
Denys Vlasenkoa971a192010-10-17 01:35:16 +0200766 0x5be0cd19,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000767};
768static const uint32_t init512_lo[] = {
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200769 0,
770 0,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000771 0xf3bcc908,
772 0x84caa73b,
773 0xfe94f82b,
774 0x5f1d36f1,
775 0xade682d1,
776 0x2b3e6c1f,
777 0xfb41bd6b,
Denys Vlasenkoa971a192010-10-17 01:35:16 +0200778 0x137e2179,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000779};
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000780
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000781/* Initialize structure containing state of computation.
782 (FIPS 180-2:5.3.2) */
783void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
784{
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200785 memcpy(&ctx->total64, init256, sizeof(init256));
786 /*ctx->total64 = 0; - done by prepending two 32-bit zeros to init256 */
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000787 ctx->process_block = sha256_process_block64;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000788}
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000789
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000790/* Initialize structure containing state of computation.
791 (FIPS 180-2:5.3.3) */
792void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
793{
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000794 int i;
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200795 /* Two extra iterations zero out ctx->total64[2] */
796 uint64_t *tp = ctx->total64;
797 for (i = 0; i < 2+8; i++)
798 tp[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i];
Denys Vlasenkoa971a192010-10-17 01:35:16 +0200799 /*ctx->total64[0] = ctx->total64[1] = 0; - already done */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000800}
801
Denys Vlasenkoc0683ac2010-10-16 20:45:27 +0200802void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000803{
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200804 unsigned bufpos = ctx->total64[0] & 127;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200805 unsigned remaining;
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000806
807 /* First increment the byte count. FIPS 180-2 specifies the possible
808 length of the file up to 2^128 _bits_.
809 We compute the number of _bytes_ and convert to bits later. */
810 ctx->total64[0] += len;
811 if (ctx->total64[0] < len)
812 ctx->total64[1]++;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200813#if 0
814 remaining = 128 - bufpos;
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000815
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200816 /* Hash whole blocks */
817 while (len >= remaining) {
818 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
819 buffer = (const char *)buffer + remaining;
820 len -= remaining;
821 remaining = 128;
822 bufpos = 0;
823 sha512_process_block128(ctx);
824 }
825
826 /* Save last, partial blosk */
827 memcpy(ctx->wbuffer + bufpos, buffer, len);
828#else
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200829 while (1) {
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200830 remaining = 128 - bufpos;
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200831 if (remaining > len)
832 remaining = len;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200833 /* Copy data into aligned buffer */
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200834 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
835 len -= remaining;
836 buffer = (const char *)buffer + remaining;
837 bufpos += remaining;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +0100838 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200839 bufpos -= 128;
840 if (bufpos != 0)
841 break;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200842 /* Buffer is filled up, process it */
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000843 sha512_process_block128(ctx);
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200844 /*bufpos = 0; - already is */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000845 }
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200846#endif
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000847}
848
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000849/* Used also for sha256 */
Denys Vlasenkoc0683ac2010-10-16 20:45:27 +0200850void FAST_FUNC sha1_end(sha1_ctx_t *ctx, void *resbuf)
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000851{
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200852 unsigned hash_size;
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000853
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200854 /* SHA stores total in BE, need to swap on LE arches: */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200855 common64_end(ctx, /*swap_needed:*/ BB_LITTLE_ENDIAN);
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000856
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200857 hash_size = (ctx->process_block == sha1_process_block64) ? 5 : 8;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000858 /* This way we do not impose alignment constraints on resbuf: */
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100859 if (BB_LITTLE_ENDIAN) {
860 unsigned i;
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200861 for (i = 0; i < hash_size; ++i)
Denys Vlasenkob102e122010-10-18 11:39:47 +0200862 ctx->hash[i] = SWAP_BE32(ctx->hash[i]);
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100863 }
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200864 memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * hash_size);
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000865}
866
Denys Vlasenkoc0683ac2010-10-16 20:45:27 +0200867void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000868{
Denys Vlasenko36ab5852010-10-17 03:00:36 +0200869 unsigned bufpos = ctx->total64[0] & 127;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000870
Denys Vlasenko36ab5852010-10-17 03:00:36 +0200871 /* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... */
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200872 ctx->wbuffer[bufpos++] = 0x80;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000873
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000874 while (1) {
Denys Vlasenkof6dacc22010-10-17 03:21:51 +0200875 unsigned remaining = 128 - bufpos;
876 memset(ctx->wbuffer + bufpos, 0, remaining);
877 if (remaining >= 16) {
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000878 /* Store the 128-bit counter of bits in the buffer in BE format */
879 uint64_t t;
880 t = ctx->total64[0] << 3;
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200881 t = SWAP_BE64(t);
Denys Vlasenko1f5e81f2013-06-27 01:03:19 +0200882 *(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 8]) = t;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000883 t = (ctx->total64[1] << 3) | (ctx->total64[0] >> 61);
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200884 t = SWAP_BE64(t);
Denys Vlasenko1f5e81f2013-06-27 01:03:19 +0200885 *(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 16]) = t;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000886 }
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000887 sha512_process_block128(ctx);
Denys Vlasenkof6dacc22010-10-17 03:21:51 +0200888 if (remaining >= 16)
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000889 break;
Denys Vlasenko36ab5852010-10-17 03:00:36 +0200890 bufpos = 0;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000891 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000892
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100893 if (BB_LITTLE_ENDIAN) {
894 unsigned i;
895 for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200896 ctx->hash[i] = SWAP_BE64(ctx->hash[i]);
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100897 }
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000898 memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000899}
Lauri Kasanenb8173b62013-01-14 05:20:50 +0100900
901
902/*
903 * The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
904 * Michael Peeters and Gilles Van Assche. For more information, feedback or
905 * questions, please refer to our website: http://keccak.noekeon.org/
906 *
907 * Implementation by Ronny Van Keer,
908 * hereby denoted as "the implementer".
909 *
910 * To the extent possible under law, the implementer has waived all copyright
911 * and related or neighboring rights to the source code in this file.
912 * http://creativecommons.org/publicdomain/zero/1.0/
913 *
914 * Busybox modifications (C) Lauri Kasanen, under the GPLv2.
915 */
916
Denys Vlasenko30a86522013-01-15 01:12:26 +0100917#if CONFIG_SHA3_SMALL < 0
918# define SHA3_SMALL 0
919#elif CONFIG_SHA3_SMALL > 1
920# define SHA3_SMALL 1
921#else
922# define SHA3_SMALL CONFIG_SHA3_SMALL
923#endif
924
Denys Vlasenko2a563ea2014-07-25 17:24:13 +0200925#define OPTIMIZE_SHA3_FOR_32 0
926/*
927 * SHA3 can be optimized for 32-bit CPUs with bit-slicing:
928 * every 64-bit word of state[] can be split into two 32-bit words
929 * by even/odd bits. In this form, all rotations of sha3 round
930 * are 32-bit - and there are lots of them.
931 * However, it requires either splitting/combining state words
932 * before/after sha3 round (code does this now)
933 * or shuffling bits before xor'ing them into state and in sha3_end.
934 * Without shuffling, bit-slicing results in -130 bytes of code
935 * and marginal speedup (but of course it gives wrong result).
936 * With shuffling it works, but +260 code bytes, and slower.
937 * Disabled for now:
938 */
939#if 0 /* LONG_MAX == 0x7fffffff */
940# undef OPTIMIZE_SHA3_FOR_32
941# define OPTIMIZE_SHA3_FOR_32 1
942#endif
943
Lauri Kasanenb8173b62013-01-14 05:20:50 +0100944enum {
Denys Vlasenko5368fe52013-01-16 02:20:31 +0100945 SHA3_IBLK_BYTES = 72, /* 576 bits / 8 */
Lauri Kasanenb8173b62013-01-14 05:20:50 +0100946};
947
Denys Vlasenko2a563ea2014-07-25 17:24:13 +0200948#if OPTIMIZE_SHA3_FOR_32
949/* This splits every 64-bit word into a pair of 32-bit words,
950 * even bits go into first word, odd bits go to second one.
951 * The conversion is done in-place.
952 */
953static void split_halves(uint64_t *state)
954{
955 /* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
956 uint32_t *s32 = (uint32_t*)state;
957 uint32_t t, x0, x1;
958 int i;
959 for (i = 24; i >= 0; --i) {
960 x0 = s32[0];
961 t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
962 t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
963 t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
964 t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
965 x1 = s32[1];
966 t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
967 t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
968 t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
969 t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
970 *s32++ = (x0 & 0x0000FFFF) | (x1 << 16);
971 *s32++ = (x0 >> 16) | (x1 & 0xFFFF0000);
972 }
973}
974/* The reverse operation */
975static void combine_halves(uint64_t *state)
976{
977 uint32_t *s32 = (uint32_t*)state;
978 uint32_t t, x0, x1;
979 int i;
980 for (i = 24; i >= 0; --i) {
981 x0 = s32[0];
982 x1 = s32[1];
983 t = (x0 & 0x0000FFFF) | (x1 << 16);
984 x1 = (x0 >> 16) | (x1 & 0xFFFF0000);
985 x0 = t;
986 t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
987 t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
988 t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
989 t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
990 *s32++ = x0;
991 t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
992 t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
993 t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
994 t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
995 *s32++ = x1;
996 }
997}
998#endif
999
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001000/*
1001 * In the crypto literature this function is usually called Keccak-f().
Denys Vlasenkoac4100e2013-01-15 16:27:39 +01001002 */
Denys Vlasenkoe4f0f262013-01-16 12:23:23 +01001003static void sha3_process_block72(uint64_t *state)
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001004{
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001005 enum { NROUNDS = 24 };
1006
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001007#if OPTIMIZE_SHA3_FOR_32
1008 /*
1009 static const uint32_t IOTA_CONST_0[NROUNDS] = {
1010 0x00000001UL,
1011 0x00000000UL,
1012 0x00000000UL,
1013 0x00000000UL,
1014 0x00000001UL,
1015 0x00000001UL,
1016 0x00000001UL,
1017 0x00000001UL,
1018 0x00000000UL,
1019 0x00000000UL,
1020 0x00000001UL,
1021 0x00000000UL,
1022 0x00000001UL,
1023 0x00000001UL,
1024 0x00000001UL,
1025 0x00000001UL,
1026 0x00000000UL,
1027 0x00000000UL,
1028 0x00000000UL,
1029 0x00000000UL,
1030 0x00000001UL,
1031 0x00000000UL,
1032 0x00000001UL,
1033 0x00000000UL,
1034 };
1035 ** bits are in lsb: 0101 0000 1111 0100 1111 0001
1036 */
1037 uint32_t IOTA_CONST_0bits = (uint32_t)(0x0050f4f1);
1038 static const uint32_t IOTA_CONST_1[NROUNDS] = {
1039 0x00000000UL,
1040 0x00000089UL,
1041 0x8000008bUL,
1042 0x80008080UL,
1043 0x0000008bUL,
1044 0x00008000UL,
1045 0x80008088UL,
1046 0x80000082UL,
1047 0x0000000bUL,
1048 0x0000000aUL,
1049 0x00008082UL,
1050 0x00008003UL,
1051 0x0000808bUL,
1052 0x8000000bUL,
1053 0x8000008aUL,
1054 0x80000081UL,
1055 0x80000081UL,
1056 0x80000008UL,
1057 0x00000083UL,
1058 0x80008003UL,
1059 0x80008088UL,
1060 0x80000088UL,
1061 0x00008000UL,
1062 0x80008082UL,
1063 };
1064
1065 uint32_t *const s32 = (uint32_t*)state;
1066 unsigned round;
1067
1068 split_halves(state);
1069
1070 for (round = 0; round < NROUNDS; round++) {
1071 unsigned x;
1072
1073 /* Theta */
1074 {
1075 uint32_t BC[20];
1076 for (x = 0; x < 10; ++x) {
1077 BC[x+10] = BC[x] = s32[x]^s32[x+10]^s32[x+20]^s32[x+30]^s32[x+40];
1078 }
1079 for (x = 0; x < 10; x += 2) {
1080 uint32_t ta, tb;
1081 ta = BC[x+8] ^ rotl32(BC[x+3], 1);
1082 tb = BC[x+9] ^ BC[x+2];
1083 s32[x+0] ^= ta;
1084 s32[x+1] ^= tb;
1085 s32[x+10] ^= ta;
1086 s32[x+11] ^= tb;
1087 s32[x+20] ^= ta;
1088 s32[x+21] ^= tb;
1089 s32[x+30] ^= ta;
1090 s32[x+31] ^= tb;
1091 s32[x+40] ^= ta;
1092 s32[x+41] ^= tb;
1093 }
1094 }
1095 /* RhoPi */
1096 {
1097 uint32_t t0a,t0b, t1a,t1b;
1098 t1a = s32[1*2+0];
1099 t1b = s32[1*2+1];
1100
1101#define RhoPi(PI_LANE, ROT_CONST) \
1102 t0a = s32[PI_LANE*2+0];\
1103 t0b = s32[PI_LANE*2+1];\
1104 if (ROT_CONST & 1) {\
1105 s32[PI_LANE*2+0] = rotl32(t1b, ROT_CONST/2+1);\
1106 s32[PI_LANE*2+1] = ROT_CONST == 1 ? t1a : rotl32(t1a, ROT_CONST/2+0);\
1107 } else {\
1108 s32[PI_LANE*2+0] = rotl32(t1a, ROT_CONST/2);\
1109 s32[PI_LANE*2+1] = rotl32(t1b, ROT_CONST/2);\
1110 }\
1111 t1a = t0a; t1b = t0b;
1112
1113 RhoPi(10, 1)
1114 RhoPi( 7, 3)
1115 RhoPi(11, 6)
1116 RhoPi(17,10)
1117 RhoPi(18,15)
1118 RhoPi( 3,21)
1119 RhoPi( 5,28)
1120 RhoPi(16,36)
1121 RhoPi( 8,45)
1122 RhoPi(21,55)
1123 RhoPi(24, 2)
1124 RhoPi( 4,14)
1125 RhoPi(15,27)
1126 RhoPi(23,41)
1127 RhoPi(19,56)
1128 RhoPi(13, 8)
1129 RhoPi(12,25)
1130 RhoPi( 2,43)
1131 RhoPi(20,62)
1132 RhoPi(14,18)
1133 RhoPi(22,39)
1134 RhoPi( 9,61)
1135 RhoPi( 6,20)
1136 RhoPi( 1,44)
1137#undef RhoPi
1138 }
1139 /* Chi */
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001140 for (x = 0; x <= 40;) {
1141 uint32_t BC0, BC1, BC2, BC3, BC4;
1142 BC0 = s32[x + 0*2];
1143 BC1 = s32[x + 1*2];
1144 BC2 = s32[x + 2*2];
1145 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1146 BC3 = s32[x + 3*2];
1147 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1148 BC4 = s32[x + 4*2];
1149 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1150 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1151 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1152 x++;
1153 BC0 = s32[x + 0*2];
1154 BC1 = s32[x + 1*2];
1155 BC2 = s32[x + 2*2];
1156 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1157 BC3 = s32[x + 3*2];
1158 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1159 BC4 = s32[x + 4*2];
1160 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1161 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1162 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1163 x += 9;
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001164 }
1165 /* Iota */
1166 s32[0] ^= IOTA_CONST_0bits & 1;
1167 IOTA_CONST_0bits >>= 1;
1168 s32[1] ^= IOTA_CONST_1[round];
1169 }
1170
1171 combine_halves(state);
1172#else
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001173 /* Native 64-bit algorithm */
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001174 static const uint16_t IOTA_CONST[NROUNDS] = {
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001175 /* Elements should be 64-bit, but top half is always zero
1176 * or 0x80000000. We encode 63rd bits in a separate word below.
1177 * Same is true for 31th bits, which lets us use 16-bit table
1178 * instead of 64-bit. The speed penalty is lost in the noise.
1179 */
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001180 0x0001,
1181 0x8082,
1182 0x808a,
1183 0x8000,
1184 0x808b,
1185 0x0001,
1186 0x8081,
1187 0x8009,
1188 0x008a,
1189 0x0088,
1190 0x8009,
1191 0x000a,
1192 0x808b,
1193 0x008b,
1194 0x8089,
1195 0x8003,
1196 0x8002,
1197 0x0080,
1198 0x800a,
1199 0x000a,
1200 0x8081,
1201 0x8080,
1202 0x0001,
1203 0x8008,
1204 };
1205 /* bit for CONST[0] is in msb: 0011 0011 0000 0111 1101 1101 */
1206 const uint32_t IOTA_CONST_bit63 = (uint32_t)(0x3307dd00);
1207 /* bit for CONST[0] is in msb: 0001 0110 0011 1000 0001 1011 */
1208 const uint32_t IOTA_CONST_bit31 = (uint32_t)(0x16381b00);
1209
1210 static const uint8_t ROT_CONST[24] = {
1211 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
1212 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
1213 };
1214 static const uint8_t PI_LANE[24] = {
1215 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
1216 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
1217 };
1218 /*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, };*/
Denys Vlasenko8fb3ab52013-01-15 22:07:48 +01001219
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001220 unsigned x;
Denys Vlasenko5b7f50f2013-01-15 19:52:30 +01001221 unsigned round;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001222
1223 if (BB_BIG_ENDIAN) {
1224 for (x = 0; x < 25; x++) {
1225 state[x] = SWAP_LE64(state[x]);
1226 }
1227 }
1228
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001229 for (round = 0; round < NROUNDS; ++round) {
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001230 /* Theta */
Denys Vlasenko30a86522013-01-15 01:12:26 +01001231 {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001232 uint64_t BC[10];
Denys Vlasenko30a86522013-01-15 01:12:26 +01001233 for (x = 0; x < 5; ++x) {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001234 BC[x + 5] = BC[x] = state[x]
1235 ^ state[x + 5] ^ state[x + 10]
1236 ^ state[x + 15] ^ state[x + 20];
Denys Vlasenko30a86522013-01-15 01:12:26 +01001237 }
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001238 /* Using 2x5 vector above eliminates the need to use
Denys Vlasenko5b7f50f2013-01-15 19:52:30 +01001239 * BC[MOD5[x+N]] trick below to fetch BC[(x+N) % 5],
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001240 * and the code is a bit _smaller_.
1241 */
Denys Vlasenko30a86522013-01-15 01:12:26 +01001242 for (x = 0; x < 5; ++x) {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001243 uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1);
Denys Vlasenko8fb3ab52013-01-15 22:07:48 +01001244 state[x] ^= temp;
1245 state[x + 5] ^= temp;
1246 state[x + 10] ^= temp;
1247 state[x + 15] ^= temp;
1248 state[x + 20] ^= temp;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001249 }
1250 }
1251
1252 /* Rho Pi */
Denys Vlasenko30a86522013-01-15 01:12:26 +01001253 if (SHA3_SMALL) {
1254 uint64_t t1 = state[1];
1255 for (x = 0; x < 24; ++x) {
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001256 uint64_t t0 = state[PI_LANE[x]];
1257 state[PI_LANE[x]] = rotl64(t1, ROT_CONST[x]);
Denys Vlasenko30a86522013-01-15 01:12:26 +01001258 t1 = t0;
1259 }
1260 } else {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001261 /* Especially large benefit for 32-bit arch (75% faster):
Denys Vlasenko30a86522013-01-15 01:12:26 +01001262 * 64-bit rotations by non-constant usually are SLOW on those.
1263 * We resort to unrolling here.
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001264 * This optimizes out PI_LANE[] and ROT_CONST[],
Denys Vlasenko30a86522013-01-15 01:12:26 +01001265 * but generates 300-500 more bytes of code.
1266 */
1267 uint64_t t0;
1268 uint64_t t1 = state[1];
1269#define RhoPi_twice(x) \
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001270 t0 = state[PI_LANE[x ]]; \
1271 state[PI_LANE[x ]] = rotl64(t1, ROT_CONST[x ]); \
1272 t1 = state[PI_LANE[x+1]]; \
1273 state[PI_LANE[x+1]] = rotl64(t0, ROT_CONST[x+1]);
Denys Vlasenko30a86522013-01-15 01:12:26 +01001274 RhoPi_twice(0); RhoPi_twice(2);
1275 RhoPi_twice(4); RhoPi_twice(6);
1276 RhoPi_twice(8); RhoPi_twice(10);
1277 RhoPi_twice(12); RhoPi_twice(14);
1278 RhoPi_twice(16); RhoPi_twice(18);
1279 RhoPi_twice(20); RhoPi_twice(22);
1280#undef RhoPi_twice
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001281 }
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001282 /* Chi */
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001283# if LONG_MAX > 0x7fffffff
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001284 for (x = 0; x <= 20; x += 5) {
Denys Vlasenko8fb3ab52013-01-15 22:07:48 +01001285 uint64_t BC0, BC1, BC2, BC3, BC4;
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001286 BC0 = state[x + 0];
1287 BC1 = state[x + 1];
1288 BC2 = state[x + 2];
1289 state[x + 0] = BC0 ^ ((~BC1) & BC2);
1290 BC3 = state[x + 3];
1291 state[x + 1] = BC1 ^ ((~BC2) & BC3);
1292 BC4 = state[x + 4];
1293 state[x + 2] = BC2 ^ ((~BC3) & BC4);
1294 state[x + 3] = BC3 ^ ((~BC4) & BC0);
1295 state[x + 4] = BC4 ^ ((~BC0) & BC1);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001296 }
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001297# else
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001298 /* Reduced register pressure version
1299 * for register-starved 32-bit arches
1300 * (i386: -95 bytes, and it is _faster_)
1301 */
1302 for (x = 0; x <= 40;) {
1303 uint32_t BC0, BC1, BC2, BC3, BC4;
1304 uint32_t *const s32 = (uint32_t*)state;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001305# if SHA3_SMALL
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001306 do_half:
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001307# endif
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001308 BC0 = s32[x + 0*2];
1309 BC1 = s32[x + 1*2];
1310 BC2 = s32[x + 2*2];
1311 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1312 BC3 = s32[x + 3*2];
1313 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1314 BC4 = s32[x + 4*2];
1315 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1316 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1317 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1318 x++;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001319# if SHA3_SMALL
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001320 if (x & 1)
1321 goto do_half;
1322 x += 8;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001323# else
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001324 BC0 = s32[x + 0*2];
1325 BC1 = s32[x + 1*2];
1326 BC2 = s32[x + 2*2];
1327 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1328 BC3 = s32[x + 3*2];
1329 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1330 BC4 = s32[x + 4*2];
1331 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1332 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1333 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1334 x += 9;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001335# endif
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001336 }
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001337# endif /* long is 32-bit */
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001338 /* Iota */
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001339 state[0] ^= IOTA_CONST[round]
1340 | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000)
1341 | (uint64_t)((IOTA_CONST_bit63 << round) & 0x80000000) << 32;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001342 }
1343
1344 if (BB_BIG_ENDIAN) {
1345 for (x = 0; x < 25; x++) {
1346 state[x] = SWAP_LE64(state[x]);
1347 }
1348 }
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001349#endif
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001350}
1351
1352void FAST_FUNC sha3_begin(sha3_ctx_t *ctx)
1353{
1354 memset(ctx, 0, sizeof(*ctx));
1355}
1356
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001357void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buffer, size_t len)
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001358{
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001359#if SHA3_SMALL
1360 const uint8_t *data = buffer;
1361 unsigned bufpos = ctx->bytes_queued;
1362
1363 while (1) {
1364 unsigned remaining = SHA3_IBLK_BYTES - bufpos;
1365 if (remaining > len)
1366 remaining = len;
1367 len -= remaining;
1368 /* XOR data into buffer */
1369 while (remaining != 0) {
1370 uint8_t *buf = (uint8_t*)ctx->state;
1371 buf[bufpos] ^= *data++;
1372 bufpos++;
1373 remaining--;
1374 }
1375 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
1376 bufpos -= SHA3_IBLK_BYTES;
1377 if (bufpos != 0)
1378 break;
1379 /* Buffer is filled up, process it */
1380 sha3_process_block72(ctx->state);
1381 /*bufpos = 0; - already is */
1382 }
1383 ctx->bytes_queued = bufpos + SHA3_IBLK_BYTES;
1384#else
1385 /* +50 bytes code size, but a bit faster because of long-sized XORs */
1386 const uint8_t *data = buffer;
1387 unsigned bufpos = ctx->bytes_queued;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001388
1389 /* If already data in queue, continue queuing first */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001390 while (len != 0 && bufpos != 0) {
1391 uint8_t *buf = (uint8_t*)ctx->state;
1392 buf[bufpos] ^= *data++;
1393 len--;
1394 bufpos++;
1395 if (bufpos == SHA3_IBLK_BYTES) {
1396 bufpos = 0;
1397 goto do_block;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001398 }
1399 }
1400
1401 /* Absorb complete blocks */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001402 while (len >= SHA3_IBLK_BYTES) {
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001403 /* XOR data onto beginning of state[].
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001404 * We try to be efficient - operate one word at a time, not byte.
1405 * Careful wrt unaligned access: can't just use "*(long*)data"!
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001406 */
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001407 unsigned count = SHA3_IBLK_BYTES / sizeof(long);
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001408 long *buf = (long*)ctx->state;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001409 do {
1410 long v;
1411 move_from_unaligned_long(v, (long*)data);
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001412 *buf++ ^= v;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001413 data += sizeof(long);
1414 } while (--count);
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001415 len -= SHA3_IBLK_BYTES;
1416 do_block:
Denys Vlasenkoe4f0f262013-01-16 12:23:23 +01001417 sha3_process_block72(ctx->state);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001418 }
1419
1420 /* Queue remaining data bytes */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001421 while (len != 0) {
1422 uint8_t *buf = (uint8_t*)ctx->state;
1423 buf[bufpos] ^= *data++;
1424 bufpos++;
1425 len--;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001426 }
Denys Vlasenko970aa6b2013-01-15 22:19:24 +01001427
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001428 ctx->bytes_queued = bufpos;
1429#endif
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001430}
1431
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001432void FAST_FUNC sha3_end(sha3_ctx_t *ctx, void *resbuf)
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001433{
1434 /* Padding */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001435 uint8_t *buf = (uint8_t*)ctx->state;
1436 buf[ctx->bytes_queued] ^= 1;
1437 buf[SHA3_IBLK_BYTES - 1] ^= 0x80;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001438
Denys Vlasenkoe4f0f262013-01-16 12:23:23 +01001439 sha3_process_block72(ctx->state);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001440
1441 /* Output */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001442 memcpy(resbuf, ctx->state, 64);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001443}