blob: b4d955e5a3a887b0296dab1b0d17dec546815925 [file] [log] [blame]
"Robert P. J. Day"63fc1a92006-07-02 19:47:05 +00001/* vi: set sw=4 ts=4: */
Rob Landley5cf7c2d2006-02-21 06:44:43 +00002/*
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +02003 * Utility routines.
4 *
5 * Copyright (C) 2010 Denys Vlasenko
6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
9
10#include "libbb.h"
11
12/* gcc 4.2.1 optimizes rotr64 better with inline than with macro
13 * (for rotX32, there is no difference). Why? My guess is that
14 * macro requires clever common subexpression elimination heuristics
15 * in gcc, while inline basically forces it to happen.
16 */
17//#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
18static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n)
19{
20 return (x << n) | (x >> (32 - n));
21}
22//#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n))))
23static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n)
24{
25 return (x >> n) | (x << (32 - n));
26}
27/* rotr64 in needed for sha512 only: */
28//#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n))))
29static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n)
30{
31 return (x >> n) | (x << (64 - n));
32}
33
Lauri Kasanenb8173b62013-01-14 05:20:50 +010034/* rotl64 only used for sha3 currently */
35static ALWAYS_INLINE uint64_t rotl64(uint64_t x, unsigned n)
36{
37 return (x << n) | (x >> (64 - n));
38}
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020039
Denys Vlasenko302ad142010-10-19 02:16:12 +020040/* Feed data through a temporary buffer.
41 * The internal buffer remembers previous data until it has 64
42 * bytes worth to pass on.
43 */
44static void FAST_FUNC common64_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020045{
Denys Vlasenko302ad142010-10-19 02:16:12 +020046 unsigned bufpos = ctx->total64 & 63;
47
48 ctx->total64 += len;
49
50 while (1) {
51 unsigned remaining = 64 - bufpos;
52 if (remaining > len)
53 remaining = len;
54 /* Copy data into aligned buffer */
55 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
56 len -= remaining;
57 buffer = (const char *)buffer + remaining;
58 bufpos += remaining;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +010059 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
Denys Vlasenko302ad142010-10-19 02:16:12 +020060 bufpos -= 64;
61 if (bufpos != 0)
62 break;
63 /* Buffer is filled up, process it */
64 ctx->process_block(ctx);
65 /*bufpos = 0; - already is */
66 }
67}
68
69/* Process the remaining bytes in the buffer */
70static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed)
71{
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020072 unsigned bufpos = ctx->total64 & 63;
73 /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */
74 ctx->wbuffer[bufpos++] = 0x80;
75
76 /* This loop iterates either once or twice, no more, no less */
77 while (1) {
78 unsigned remaining = 64 - bufpos;
79 memset(ctx->wbuffer + bufpos, 0, remaining);
80 /* Do we have enough space for the length count? */
81 if (remaining >= 8) {
82 /* Store the 64-bit counter of bits in the buffer */
83 uint64_t t = ctx->total64 << 3;
84 if (swap_needed)
85 t = bb_bswap_64(t);
86 /* wbuffer is suitably aligned for this */
87 *(uint64_t *) (&ctx->wbuffer[64 - 8]) = t;
88 }
Denys Vlasenko302ad142010-10-19 02:16:12 +020089 ctx->process_block(ctx);
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020090 if (remaining >= 8)
91 break;
92 bufpos = 0;
93 }
94}
95
96
97/*
Denys Vlasenko302ad142010-10-19 02:16:12 +020098 * Compute MD5 checksum of strings according to the
99 * definition of MD5 in RFC 1321 from April 1992.
100 *
101 * Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
102 *
103 * Copyright (C) 1995-1999 Free Software Foundation, Inc.
104 * Copyright (C) 2001 Manuel Novoa III
105 * Copyright (C) 2003 Glenn L. McGrath
106 * Copyright (C) 2003 Erik Andersen
107 *
108 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
109 */
110
111/* 0: fastest, 3: smallest */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200112#if CONFIG_MD5_SMALL < 0
113# define MD5_SMALL 0
114#elif CONFIG_MD5_SMALL > 3
115# define MD5_SMALL 3
Denys Vlasenko302ad142010-10-19 02:16:12 +0200116#else
Denys Vlasenko522041e2011-09-10 13:25:57 +0200117# define MD5_SMALL CONFIG_MD5_SMALL
Denys Vlasenko302ad142010-10-19 02:16:12 +0200118#endif
119
120/* These are the four functions used in the four steps of the MD5 algorithm
121 * and defined in the RFC 1321. The first function is a little bit optimized
122 * (as found in Colin Plumbs public domain implementation).
123 * #define FF(b, c, d) ((b & c) | (~b & d))
124 */
125#undef FF
126#undef FG
127#undef FH
128#undef FI
129#define FF(b, c, d) (d ^ (b & (c ^ d)))
130#define FG(b, c, d) FF(d, b, c)
131#define FH(b, c, d) (b ^ c ^ d)
132#define FI(b, c, d) (c ^ (b | ~d))
133
134/* Hash a single block, 64 bytes long and 4-byte aligned */
135static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
136{
Denys Vlasenko522041e2011-09-10 13:25:57 +0200137#if MD5_SMALL > 0
Denys Vlasenko302ad142010-10-19 02:16:12 +0200138 /* Before we start, one word to the strange constants.
139 They are defined in RFC 1321 as
140 T[i] = (int)(4294967296.0 * fabs(sin(i))), i=1..64
141 */
142 static const uint32_t C_array[] = {
143 /* round 1 */
144 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
145 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
146 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
147 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
148 /* round 2 */
149 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
150 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
151 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
152 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
153 /* round 3 */
154 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
155 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
156 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
157 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
158 /* round 4 */
159 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
160 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
161 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
162 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
163 };
164 static const char P_array[] ALIGN1 = {
Denys Vlasenko522041e2011-09-10 13:25:57 +0200165# if MD5_SMALL > 1
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200166 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200167# endif
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200168 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */
169 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */
170 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 /* 4 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200171 };
172#endif
173 uint32_t *words = (void*) ctx->wbuffer;
174 uint32_t A = ctx->hash[0];
175 uint32_t B = ctx->hash[1];
176 uint32_t C = ctx->hash[2];
177 uint32_t D = ctx->hash[3];
178
Denys Vlasenko522041e2011-09-10 13:25:57 +0200179#if MD5_SMALL >= 2 /* 2 or 3 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200180
181 static const char S_array[] ALIGN1 = {
182 7, 12, 17, 22,
183 5, 9, 14, 20,
184 4, 11, 16, 23,
185 6, 10, 15, 21
186 };
187 const uint32_t *pc;
188 const char *pp;
189 const char *ps;
190 int i;
191 uint32_t temp;
192
Denys Vlasenko5368fe52013-01-16 02:20:31 +0100193 if (BB_BIG_ENDIAN)
194 for (i = 0; i < 16; i++)
195 words[i] = SWAP_LE32(words[i]);
Denys Vlasenko302ad142010-10-19 02:16:12 +0200196
Denys Vlasenko522041e2011-09-10 13:25:57 +0200197# if MD5_SMALL == 3
Denys Vlasenko302ad142010-10-19 02:16:12 +0200198 pc = C_array;
199 pp = P_array;
200 ps = S_array - 4;
201
202 for (i = 0; i < 64; i++) {
203 if ((i & 0x0f) == 0)
204 ps += 4;
205 temp = A;
206 switch (i >> 4) {
207 case 0:
208 temp += FF(B, C, D);
209 break;
210 case 1:
211 temp += FG(B, C, D);
212 break;
213 case 2:
214 temp += FH(B, C, D);
215 break;
216 case 3:
217 temp += FI(B, C, D);
218 }
219 temp += words[(int) (*pp++)] + *pc++;
220 temp = rotl32(temp, ps[i & 3]);
221 temp += B;
222 A = D;
223 D = C;
224 C = B;
225 B = temp;
226 }
Denys Vlasenko522041e2011-09-10 13:25:57 +0200227# else /* MD5_SMALL == 2 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200228 pc = C_array;
229 pp = P_array;
230 ps = S_array;
231
232 for (i = 0; i < 16; i++) {
233 temp = A + FF(B, C, D) + words[(int) (*pp++)] + *pc++;
234 temp = rotl32(temp, ps[i & 3]);
235 temp += B;
236 A = D;
237 D = C;
238 C = B;
239 B = temp;
240 }
241 ps += 4;
242 for (i = 0; i < 16; i++) {
243 temp = A + FG(B, C, D) + words[(int) (*pp++)] + *pc++;
244 temp = rotl32(temp, ps[i & 3]);
245 temp += B;
246 A = D;
247 D = C;
248 C = B;
249 B = temp;
250 }
251 ps += 4;
252 for (i = 0; i < 16; i++) {
253 temp = A + FH(B, C, D) + words[(int) (*pp++)] + *pc++;
254 temp = rotl32(temp, ps[i & 3]);
255 temp += B;
256 A = D;
257 D = C;
258 C = B;
259 B = temp;
260 }
261 ps += 4;
262 for (i = 0; i < 16; i++) {
263 temp = A + FI(B, C, D) + words[(int) (*pp++)] + *pc++;
264 temp = rotl32(temp, ps[i & 3]);
265 temp += B;
266 A = D;
267 D = C;
268 C = B;
269 B = temp;
270 }
271# endif
272 /* Add checksum to the starting values */
273 ctx->hash[0] += A;
274 ctx->hash[1] += B;
275 ctx->hash[2] += C;
276 ctx->hash[3] += D;
277
Denys Vlasenko522041e2011-09-10 13:25:57 +0200278#else /* MD5_SMALL == 0 or 1 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200279
280 uint32_t A_save = A;
281 uint32_t B_save = B;
282 uint32_t C_save = C;
283 uint32_t D_save = D;
Denys Vlasenko522041e2011-09-10 13:25:57 +0200284# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200285 const uint32_t *pc;
286 const char *pp;
287 int i;
288# endif
289
290 /* First round: using the given function, the context and a constant
291 the next context is computed. Because the algorithm's processing
292 unit is a 32-bit word and it is determined to work on words in
293 little endian byte order we perhaps have to change the byte order
294 before the computation. To reduce the work for the next steps
295 we save swapped words in WORDS array. */
296# undef OP
297# define OP(a, b, c, d, s, T) \
298 do { \
299 a += FF(b, c, d) + (*words IF_BIG_ENDIAN(= SWAP_LE32(*words))) + T; \
300 words++; \
301 a = rotl32(a, s); \
302 a += b; \
303 } while (0)
304
305 /* Round 1 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200306# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200307 pc = C_array;
308 for (i = 0; i < 4; i++) {
309 OP(A, B, C, D, 7, *pc++);
310 OP(D, A, B, C, 12, *pc++);
311 OP(C, D, A, B, 17, *pc++);
312 OP(B, C, D, A, 22, *pc++);
313 }
314# else
315 OP(A, B, C, D, 7, 0xd76aa478);
316 OP(D, A, B, C, 12, 0xe8c7b756);
317 OP(C, D, A, B, 17, 0x242070db);
318 OP(B, C, D, A, 22, 0xc1bdceee);
319 OP(A, B, C, D, 7, 0xf57c0faf);
320 OP(D, A, B, C, 12, 0x4787c62a);
321 OP(C, D, A, B, 17, 0xa8304613);
322 OP(B, C, D, A, 22, 0xfd469501);
323 OP(A, B, C, D, 7, 0x698098d8);
324 OP(D, A, B, C, 12, 0x8b44f7af);
325 OP(C, D, A, B, 17, 0xffff5bb1);
326 OP(B, C, D, A, 22, 0x895cd7be);
327 OP(A, B, C, D, 7, 0x6b901122);
328 OP(D, A, B, C, 12, 0xfd987193);
329 OP(C, D, A, B, 17, 0xa679438e);
330 OP(B, C, D, A, 22, 0x49b40821);
331# endif
332 words -= 16;
333
334 /* For the second to fourth round we have the possibly swapped words
335 in WORDS. Redefine the macro to take an additional first
336 argument specifying the function to use. */
337# undef OP
338# define OP(f, a, b, c, d, k, s, T) \
339 do { \
340 a += f(b, c, d) + words[k] + T; \
341 a = rotl32(a, s); \
342 a += b; \
343 } while (0)
344
345 /* Round 2 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200346# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200347 pp = P_array;
348 for (i = 0; i < 4; i++) {
349 OP(FG, A, B, C, D, (int) (*pp++), 5, *pc++);
350 OP(FG, D, A, B, C, (int) (*pp++), 9, *pc++);
351 OP(FG, C, D, A, B, (int) (*pp++), 14, *pc++);
352 OP(FG, B, C, D, A, (int) (*pp++), 20, *pc++);
353 }
354# else
355 OP(FG, A, B, C, D, 1, 5, 0xf61e2562);
356 OP(FG, D, A, B, C, 6, 9, 0xc040b340);
357 OP(FG, C, D, A, B, 11, 14, 0x265e5a51);
358 OP(FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
359 OP(FG, A, B, C, D, 5, 5, 0xd62f105d);
360 OP(FG, D, A, B, C, 10, 9, 0x02441453);
361 OP(FG, C, D, A, B, 15, 14, 0xd8a1e681);
362 OP(FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
363 OP(FG, A, B, C, D, 9, 5, 0x21e1cde6);
364 OP(FG, D, A, B, C, 14, 9, 0xc33707d6);
365 OP(FG, C, D, A, B, 3, 14, 0xf4d50d87);
366 OP(FG, B, C, D, A, 8, 20, 0x455a14ed);
367 OP(FG, A, B, C, D, 13, 5, 0xa9e3e905);
368 OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8);
369 OP(FG, C, D, A, B, 7, 14, 0x676f02d9);
370 OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
371# endif
372
373 /* Round 3 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200374# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200375 for (i = 0; i < 4; i++) {
376 OP(FH, A, B, C, D, (int) (*pp++), 4, *pc++);
377 OP(FH, D, A, B, C, (int) (*pp++), 11, *pc++);
378 OP(FH, C, D, A, B, (int) (*pp++), 16, *pc++);
379 OP(FH, B, C, D, A, (int) (*pp++), 23, *pc++);
380 }
381# else
382 OP(FH, A, B, C, D, 5, 4, 0xfffa3942);
383 OP(FH, D, A, B, C, 8, 11, 0x8771f681);
384 OP(FH, C, D, A, B, 11, 16, 0x6d9d6122);
385 OP(FH, B, C, D, A, 14, 23, 0xfde5380c);
386 OP(FH, A, B, C, D, 1, 4, 0xa4beea44);
387 OP(FH, D, A, B, C, 4, 11, 0x4bdecfa9);
388 OP(FH, C, D, A, B, 7, 16, 0xf6bb4b60);
389 OP(FH, B, C, D, A, 10, 23, 0xbebfbc70);
390 OP(FH, A, B, C, D, 13, 4, 0x289b7ec6);
391 OP(FH, D, A, B, C, 0, 11, 0xeaa127fa);
392 OP(FH, C, D, A, B, 3, 16, 0xd4ef3085);
393 OP(FH, B, C, D, A, 6, 23, 0x04881d05);
394 OP(FH, A, B, C, D, 9, 4, 0xd9d4d039);
395 OP(FH, D, A, B, C, 12, 11, 0xe6db99e5);
396 OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8);
397 OP(FH, B, C, D, A, 2, 23, 0xc4ac5665);
398# endif
399
400 /* Round 4 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200401# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200402 for (i = 0; i < 4; i++) {
403 OP(FI, A, B, C, D, (int) (*pp++), 6, *pc++);
404 OP(FI, D, A, B, C, (int) (*pp++), 10, *pc++);
405 OP(FI, C, D, A, B, (int) (*pp++), 15, *pc++);
406 OP(FI, B, C, D, A, (int) (*pp++), 21, *pc++);
407 }
408# else
409 OP(FI, A, B, C, D, 0, 6, 0xf4292244);
410 OP(FI, D, A, B, C, 7, 10, 0x432aff97);
411 OP(FI, C, D, A, B, 14, 15, 0xab9423a7);
412 OP(FI, B, C, D, A, 5, 21, 0xfc93a039);
413 OP(FI, A, B, C, D, 12, 6, 0x655b59c3);
414 OP(FI, D, A, B, C, 3, 10, 0x8f0ccc92);
415 OP(FI, C, D, A, B, 10, 15, 0xffeff47d);
416 OP(FI, B, C, D, A, 1, 21, 0x85845dd1);
417 OP(FI, A, B, C, D, 8, 6, 0x6fa87e4f);
418 OP(FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
419 OP(FI, C, D, A, B, 6, 15, 0xa3014314);
420 OP(FI, B, C, D, A, 13, 21, 0x4e0811a1);
421 OP(FI, A, B, C, D, 4, 6, 0xf7537e82);
422 OP(FI, D, A, B, C, 11, 10, 0xbd3af235);
423 OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
424 OP(FI, B, C, D, A, 9, 21, 0xeb86d391);
425# undef OP
426# endif
427 /* Add checksum to the starting values */
428 ctx->hash[0] = A_save + A;
429 ctx->hash[1] = B_save + B;
430 ctx->hash[2] = C_save + C;
431 ctx->hash[3] = D_save + D;
432#endif
433}
434#undef FF
435#undef FG
436#undef FH
437#undef FI
438
439/* Initialize structure containing state of computation.
440 * (RFC 1321, 3.3: Step 3)
441 */
442void FAST_FUNC md5_begin(md5_ctx_t *ctx)
443{
444 ctx->hash[0] = 0x67452301;
445 ctx->hash[1] = 0xefcdab89;
446 ctx->hash[2] = 0x98badcfe;
447 ctx->hash[3] = 0x10325476;
448 ctx->total64 = 0;
449 ctx->process_block = md5_process_block64;
450}
451
452/* Used also for sha1 and sha256 */
453void FAST_FUNC md5_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
454{
455 common64_hash(ctx, buffer, len);
456}
457
458/* Process the remaining bytes in the buffer and put result from CTX
459 * in first 16 bytes following RESBUF. The result is always in little
460 * endian byte order, so that a byte-wise output yields to the wanted
461 * ASCII representation of the message digest.
462 */
463void FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf)
464{
465 /* MD5 stores total in LE, need to swap on BE arches: */
466 common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN);
467
Denys Vlasenko7ab94ca2010-10-19 02:33:39 +0200468 /* The MD5 result is in little endian byte order */
Denys Vlasenko5368fe52013-01-16 02:20:31 +0100469 if (BB_BIG_ENDIAN) {
470 ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
471 ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
472 ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
473 ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
474 }
475
Denys Vlasenko302ad142010-10-19 02:16:12 +0200476 memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4);
477}
478
479
480/*
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200481 * SHA1 part is:
482 * Copyright 2007 Rob Landley <rob@landley.net>
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000483 *
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200484 * Based on the public domain SHA-1 in C by Steve Reid <steve@edmweb.com>
485 * from http://www.mirrors.wiretapped.net/security/cryptography/hashes/sha1/
Denis Vlasenko9213a9e2006-09-17 16:28:10 +0000486 *
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200487 * Licensed under GPLv2, see file LICENSE in this source tree.
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000488 *
489 * ---------------------------------------------------------------------------
490 *
491 * SHA256 and SHA512 parts are:
492 * Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>.
Denis Vlasenkoddb1b852009-03-12 16:05:02 +0000493 * Shrank by Denys Vlasenko.
494 *
495 * ---------------------------------------------------------------------------
496 *
497 * The best way to test random blocksizes is to go to coreutils/md5_sha1_sum.c
498 * and replace "4096" with something like "2000 + time(NULL) % 2097",
499 * then rebuild and compare "shaNNNsum bigfile" results.
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000500 */
501
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000502static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000503{
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200504 static const uint32_t rconsts[] = {
505 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
506 };
507 int i, j;
508 int cnt;
509 uint32_t W[16+16];
510 uint32_t a, b, c, d, e;
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000511
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200512 /* On-stack work buffer frees up one register in the main loop
513 * which otherwise will be needed to hold ctx pointer */
514 for (i = 0; i < 16; i++)
515 W[i] = W[i+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[i]);
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000516
517 a = ctx->hash[0];
518 b = ctx->hash[1];
519 c = ctx->hash[2];
520 d = ctx->hash[3];
521 e = ctx->hash[4];
522
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200523 /* 4 rounds of 20 operations each */
524 cnt = 0;
525 for (i = 0; i < 4; i++) {
526 j = 19;
527 do {
528 uint32_t work;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000529
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200530 work = c ^ d;
531 if (i == 0) {
532 work = (work & b) ^ d;
533 if (j <= 3)
534 goto ge16;
535 /* Used to do SWAP_BE32 here, but this
536 * requires ctx (see comment above) */
537 work += W[cnt];
538 } else {
539 if (i == 2)
540 work = ((b | c) & d) | (b & c);
541 else /* i = 1 or 3 */
542 work ^= b;
543 ge16:
544 W[cnt] = W[cnt+16] = rotl32(W[cnt+13] ^ W[cnt+8] ^ W[cnt+2] ^ W[cnt], 1);
545 work += W[cnt];
546 }
Denys Vlasenko03a5fe32010-10-24 20:51:28 +0200547 work += e + rotl32(a, 5) + rconsts[i];
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000548
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200549 /* Rotate by one for next time */
550 e = d;
551 d = c;
552 c = /* b = */ rotl32(b, 30);
553 b = a;
554 a = work;
555 cnt = (cnt + 1) & 15;
556 } while (--j >= 0);
557 }
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000558
559 ctx->hash[0] += a;
560 ctx->hash[1] += b;
561 ctx->hash[2] += c;
562 ctx->hash[3] += d;
563 ctx->hash[4] += e;
564}
565
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000566/* Constants for SHA512 from FIPS 180-2:4.2.3.
567 * SHA256 constants from FIPS 180-2:4.2.2
568 * are the most significant half of first 64 elements
569 * of the same array.
570 */
571static const uint64_t sha_K[80] = {
572 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
573 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
574 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
575 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
576 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
577 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
578 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
579 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
580 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
581 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
582 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
583 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
584 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
585 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
586 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
587 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
588 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
589 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
590 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
591 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
592 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
593 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
594 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
595 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
596 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
597 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
598 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
599 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
600 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
601 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
602 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
603 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
604 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, /* [64]+ are used for sha512 only */
605 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
606 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
607 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
608 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
609 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
610 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
611 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000612};
613
Denys Vlasenkofe4ef362009-07-05 20:34:38 +0200614#undef Ch
615#undef Maj
616#undef S0
617#undef S1
618#undef R0
619#undef R1
620
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000621static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000622{
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000623 unsigned t;
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000624 uint32_t W[64], a, b, c, d, e, f, g, h;
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000625 const uint32_t *words = (uint32_t*) ctx->wbuffer;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000626
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000627 /* Operators defined in FIPS 180-2:4.1.2. */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000628#define Ch(x, y, z) ((x & y) ^ (~x & z))
629#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
630#define S0(x) (rotr32(x, 2) ^ rotr32(x, 13) ^ rotr32(x, 22))
631#define S1(x) (rotr32(x, 6) ^ rotr32(x, 11) ^ rotr32(x, 25))
632#define R0(x) (rotr32(x, 7) ^ rotr32(x, 18) ^ (x >> 3))
633#define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
634
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000635 /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200636 for (t = 0; t < 16; ++t)
Denys Vlasenkob102e122010-10-18 11:39:47 +0200637 W[t] = SWAP_BE32(words[t]);
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000638 for (/*t = 16*/; t < 64; ++t)
639 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000640
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000641 a = ctx->hash[0];
642 b = ctx->hash[1];
643 c = ctx->hash[2];
644 d = ctx->hash[3];
645 e = ctx->hash[4];
646 f = ctx->hash[5];
647 g = ctx->hash[6];
648 h = ctx->hash[7];
649
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000650 /* The actual computation according to FIPS 180-2:6.2.2 step 3. */
651 for (t = 0; t < 64; ++t) {
Denis Vlasenkoa2333c82009-03-28 19:08:23 +0000652 /* Need to fetch upper half of sha_K[t]
653 * (I hope compiler is clever enough to just fetch
654 * upper half)
655 */
656 uint32_t K_t = sha_K[t] >> 32;
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000657 uint32_t T1 = h + S1(e) + Ch(e, f, g) + K_t + W[t];
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000658 uint32_t T2 = S0(a) + Maj(a, b, c);
659 h = g;
660 g = f;
661 f = e;
662 e = d + T1;
663 d = c;
664 c = b;
665 b = a;
666 a = T1 + T2;
667 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000668#undef Ch
669#undef Maj
670#undef S0
671#undef S1
672#undef R0
673#undef R1
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000674 /* Add the starting values of the context according to FIPS 180-2:6.2.2
675 step 4. */
676 ctx->hash[0] += a;
677 ctx->hash[1] += b;
678 ctx->hash[2] += c;
679 ctx->hash[3] += d;
680 ctx->hash[4] += e;
681 ctx->hash[5] += f;
682 ctx->hash[6] += g;
683 ctx->hash[7] += h;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000684}
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000685
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000686static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000687{
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000688 unsigned t;
689 uint64_t W[80];
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000690 /* On i386, having assignments here (not later as sha256 does)
691 * produces 99 bytes smaller code with gcc 4.3.1
692 */
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000693 uint64_t a = ctx->hash[0];
694 uint64_t b = ctx->hash[1];
695 uint64_t c = ctx->hash[2];
696 uint64_t d = ctx->hash[3];
697 uint64_t e = ctx->hash[4];
698 uint64_t f = ctx->hash[5];
699 uint64_t g = ctx->hash[6];
700 uint64_t h = ctx->hash[7];
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000701 const uint64_t *words = (uint64_t*) ctx->wbuffer;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000702
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000703 /* Operators defined in FIPS 180-2:4.1.2. */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000704#define Ch(x, y, z) ((x & y) ^ (~x & z))
705#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
706#define S0(x) (rotr64(x, 28) ^ rotr64(x, 34) ^ rotr64(x, 39))
707#define S1(x) (rotr64(x, 14) ^ rotr64(x, 18) ^ rotr64(x, 41))
708#define R0(x) (rotr64(x, 1) ^ rotr64(x, 8) ^ (x >> 7))
709#define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
710
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000711 /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200712 for (t = 0; t < 16; ++t)
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200713 W[t] = SWAP_BE64(words[t]);
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000714 for (/*t = 16*/; t < 80; ++t)
715 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000716
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000717 /* The actual computation according to FIPS 180-2:6.3.2 step 3. */
718 for (t = 0; t < 80; ++t) {
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000719 uint64_t T1 = h + S1(e) + Ch(e, f, g) + sha_K[t] + W[t];
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000720 uint64_t T2 = S0(a) + Maj(a, b, c);
721 h = g;
722 g = f;
723 f = e;
724 e = d + T1;
725 d = c;
726 c = b;
727 b = a;
728 a = T1 + T2;
729 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000730#undef Ch
731#undef Maj
732#undef S0
733#undef S1
734#undef R0
735#undef R1
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000736 /* Add the starting values of the context according to FIPS 180-2:6.3.2
737 step 4. */
738 ctx->hash[0] += a;
739 ctx->hash[1] += b;
740 ctx->hash[2] += c;
741 ctx->hash[3] += d;
742 ctx->hash[4] += e;
743 ctx->hash[5] += f;
744 ctx->hash[6] += g;
745 ctx->hash[7] += h;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000746}
747
748
Denis Vlasenkodefc1ea2008-06-27 02:52:20 +0000749void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000750{
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000751 ctx->hash[0] = 0x67452301;
752 ctx->hash[1] = 0xefcdab89;
753 ctx->hash[2] = 0x98badcfe;
754 ctx->hash[3] = 0x10325476;
755 ctx->hash[4] = 0xc3d2e1f0;
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000756 ctx->total64 = 0;
757 ctx->process_block = sha1_process_block64;
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000758}
759
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000760static const uint32_t init256[] = {
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200761 0,
762 0,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000763 0x6a09e667,
764 0xbb67ae85,
765 0x3c6ef372,
766 0xa54ff53a,
767 0x510e527f,
768 0x9b05688c,
769 0x1f83d9ab,
Denys Vlasenkoa971a192010-10-17 01:35:16 +0200770 0x5be0cd19,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000771};
772static const uint32_t init512_lo[] = {
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200773 0,
774 0,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000775 0xf3bcc908,
776 0x84caa73b,
777 0xfe94f82b,
778 0x5f1d36f1,
779 0xade682d1,
780 0x2b3e6c1f,
781 0xfb41bd6b,
Denys Vlasenkoa971a192010-10-17 01:35:16 +0200782 0x137e2179,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000783};
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000784
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000785/* Initialize structure containing state of computation.
786 (FIPS 180-2:5.3.2) */
787void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
788{
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200789 memcpy(&ctx->total64, init256, sizeof(init256));
790 /*ctx->total64 = 0; - done by prepending two 32-bit zeros to init256 */
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000791 ctx->process_block = sha256_process_block64;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000792}
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000793
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000794/* Initialize structure containing state of computation.
795 (FIPS 180-2:5.3.3) */
796void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
797{
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000798 int i;
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200799 /* Two extra iterations zero out ctx->total64[2] */
800 uint64_t *tp = ctx->total64;
801 for (i = 0; i < 2+8; i++)
802 tp[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i];
Denys Vlasenkoa971a192010-10-17 01:35:16 +0200803 /*ctx->total64[0] = ctx->total64[1] = 0; - already done */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000804}
805
Denys Vlasenkoc0683ac2010-10-16 20:45:27 +0200806void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000807{
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200808 unsigned bufpos = ctx->total64[0] & 127;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200809 unsigned remaining;
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000810
811 /* First increment the byte count. FIPS 180-2 specifies the possible
812 length of the file up to 2^128 _bits_.
813 We compute the number of _bytes_ and convert to bits later. */
814 ctx->total64[0] += len;
815 if (ctx->total64[0] < len)
816 ctx->total64[1]++;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200817#if 0
818 remaining = 128 - bufpos;
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000819
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200820 /* Hash whole blocks */
821 while (len >= remaining) {
822 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
823 buffer = (const char *)buffer + remaining;
824 len -= remaining;
825 remaining = 128;
826 bufpos = 0;
827 sha512_process_block128(ctx);
828 }
829
830 /* Save last, partial blosk */
831 memcpy(ctx->wbuffer + bufpos, buffer, len);
832#else
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200833 while (1) {
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200834 remaining = 128 - bufpos;
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200835 if (remaining > len)
836 remaining = len;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200837 /* Copy data into aligned buffer */
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200838 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
839 len -= remaining;
840 buffer = (const char *)buffer + remaining;
841 bufpos += remaining;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +0100842 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200843 bufpos -= 128;
844 if (bufpos != 0)
845 break;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200846 /* Buffer is filled up, process it */
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000847 sha512_process_block128(ctx);
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200848 /*bufpos = 0; - already is */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000849 }
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200850#endif
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000851}
852
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000853/* Used also for sha256 */
Denys Vlasenkoc0683ac2010-10-16 20:45:27 +0200854void FAST_FUNC sha1_end(sha1_ctx_t *ctx, void *resbuf)
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000855{
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200856 unsigned hash_size;
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000857
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200858 /* SHA stores total in BE, need to swap on LE arches: */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200859 common64_end(ctx, /*swap_needed:*/ BB_LITTLE_ENDIAN);
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000860
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200861 hash_size = (ctx->process_block == sha1_process_block64) ? 5 : 8;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000862 /* This way we do not impose alignment constraints on resbuf: */
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100863 if (BB_LITTLE_ENDIAN) {
864 unsigned i;
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200865 for (i = 0; i < hash_size; ++i)
Denys Vlasenkob102e122010-10-18 11:39:47 +0200866 ctx->hash[i] = SWAP_BE32(ctx->hash[i]);
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100867 }
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200868 memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * hash_size);
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000869}
870
Denys Vlasenkoc0683ac2010-10-16 20:45:27 +0200871void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000872{
Denys Vlasenko36ab5852010-10-17 03:00:36 +0200873 unsigned bufpos = ctx->total64[0] & 127;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000874
Denys Vlasenko36ab5852010-10-17 03:00:36 +0200875 /* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... */
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200876 ctx->wbuffer[bufpos++] = 0x80;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000877
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000878 while (1) {
Denys Vlasenkof6dacc22010-10-17 03:21:51 +0200879 unsigned remaining = 128 - bufpos;
880 memset(ctx->wbuffer + bufpos, 0, remaining);
881 if (remaining >= 16) {
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000882 /* Store the 128-bit counter of bits in the buffer in BE format */
883 uint64_t t;
884 t = ctx->total64[0] << 3;
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200885 t = SWAP_BE64(t);
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000886 *(uint64_t *) (&ctx->wbuffer[128 - 8]) = t;
887 t = (ctx->total64[1] << 3) | (ctx->total64[0] >> 61);
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200888 t = SWAP_BE64(t);
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000889 *(uint64_t *) (&ctx->wbuffer[128 - 16]) = t;
890 }
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000891 sha512_process_block128(ctx);
Denys Vlasenkof6dacc22010-10-17 03:21:51 +0200892 if (remaining >= 16)
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000893 break;
Denys Vlasenko36ab5852010-10-17 03:00:36 +0200894 bufpos = 0;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000895 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000896
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100897 if (BB_LITTLE_ENDIAN) {
898 unsigned i;
899 for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200900 ctx->hash[i] = SWAP_BE64(ctx->hash[i]);
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100901 }
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000902 memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000903}
Lauri Kasanenb8173b62013-01-14 05:20:50 +0100904
905
906/*
907 * The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
908 * Michael Peeters and Gilles Van Assche. For more information, feedback or
909 * questions, please refer to our website: http://keccak.noekeon.org/
910 *
911 * Implementation by Ronny Van Keer,
912 * hereby denoted as "the implementer".
913 *
914 * To the extent possible under law, the implementer has waived all copyright
915 * and related or neighboring rights to the source code in this file.
916 * http://creativecommons.org/publicdomain/zero/1.0/
917 *
918 * Busybox modifications (C) Lauri Kasanen, under the GPLv2.
919 */
920
Denys Vlasenko30a86522013-01-15 01:12:26 +0100921#if CONFIG_SHA3_SMALL < 0
922# define SHA3_SMALL 0
923#elif CONFIG_SHA3_SMALL > 1
924# define SHA3_SMALL 1
925#else
926# define SHA3_SMALL CONFIG_SHA3_SMALL
927#endif
928
Lauri Kasanenb8173b62013-01-14 05:20:50 +0100929enum {
Denys Vlasenko5368fe52013-01-16 02:20:31 +0100930 SHA3_IBLK_BYTES = 72, /* 576 bits / 8 */
Lauri Kasanenb8173b62013-01-14 05:20:50 +0100931};
932
Denys Vlasenko5368fe52013-01-16 02:20:31 +0100933/*
934 * In the crypto literature this function is usually called Keccak-f().
Denys Vlasenkoac4100e2013-01-15 16:27:39 +0100935 */
Denys Vlasenkoe4f0f262013-01-16 12:23:23 +0100936static void sha3_process_block72(uint64_t *state)
Lauri Kasanenb8173b62013-01-14 05:20:50 +0100937{
Denys Vlasenko5368fe52013-01-16 02:20:31 +0100938 enum { NROUNDS = 24 };
939
940 /* Elements should be 64-bit, but top half is always zero or 0x80000000.
941 * We encode 63rd bits in a separate word below.
942 * Same is true for 31th bits, which lets us use 16-bit table instead of 64-bit.
943 * The speed penalty is lost in the noise.
944 */
945 static const uint16_t IOTA_CONST[NROUNDS] = {
946 0x0001,
947 0x8082,
948 0x808a,
949 0x8000,
950 0x808b,
951 0x0001,
952 0x8081,
953 0x8009,
954 0x008a,
955 0x0088,
956 0x8009,
957 0x000a,
958 0x808b,
959 0x008b,
960 0x8089,
961 0x8003,
962 0x8002,
963 0x0080,
964 0x800a,
965 0x000a,
966 0x8081,
967 0x8080,
968 0x0001,
969 0x8008,
970 };
971 /* bit for CONST[0] is in msb: 0011 0011 0000 0111 1101 1101 */
972 const uint32_t IOTA_CONST_bit63 = (uint32_t)(0x3307dd00);
973 /* bit for CONST[0] is in msb: 0001 0110 0011 1000 0001 1011 */
974 const uint32_t IOTA_CONST_bit31 = (uint32_t)(0x16381b00);
975
976 static const uint8_t ROT_CONST[24] = {
977 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
978 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
979 };
980 static const uint8_t PI_LANE[24] = {
981 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
982 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
983 };
984 /*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, };*/
Denys Vlasenko8fb3ab52013-01-15 22:07:48 +0100985
Denys Vlasenko5b7f50f2013-01-15 19:52:30 +0100986 unsigned x, y;
987 unsigned round;
Lauri Kasanenb8173b62013-01-14 05:20:50 +0100988
989 if (BB_BIG_ENDIAN) {
990 for (x = 0; x < 25; x++) {
991 state[x] = SWAP_LE64(state[x]);
992 }
993 }
994
Denys Vlasenko5368fe52013-01-16 02:20:31 +0100995 for (round = 0; round < NROUNDS; ++round) {
Lauri Kasanenb8173b62013-01-14 05:20:50 +0100996 /* Theta */
Denys Vlasenko30a86522013-01-15 01:12:26 +0100997 {
Denys Vlasenkoa55df272013-01-15 15:22:30 +0100998 uint64_t BC[10];
Denys Vlasenko30a86522013-01-15 01:12:26 +0100999 for (x = 0; x < 5; ++x) {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001000 BC[x + 5] = BC[x] = state[x]
1001 ^ state[x + 5] ^ state[x + 10]
1002 ^ state[x + 15] ^ state[x + 20];
Denys Vlasenko30a86522013-01-15 01:12:26 +01001003 }
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001004 /* Using 2x5 vector above eliminates the need to use
Denys Vlasenko5b7f50f2013-01-15 19:52:30 +01001005 * BC[MOD5[x+N]] trick below to fetch BC[(x+N) % 5],
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001006 * and the code is a bit _smaller_.
1007 */
Denys Vlasenko30a86522013-01-15 01:12:26 +01001008 for (x = 0; x < 5; ++x) {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001009 uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1);
Denys Vlasenko8fb3ab52013-01-15 22:07:48 +01001010 state[x] ^= temp;
1011 state[x + 5] ^= temp;
1012 state[x + 10] ^= temp;
1013 state[x + 15] ^= temp;
1014 state[x + 20] ^= temp;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001015 }
1016 }
1017
1018 /* Rho Pi */
Denys Vlasenko30a86522013-01-15 01:12:26 +01001019 if (SHA3_SMALL) {
1020 uint64_t t1 = state[1];
1021 for (x = 0; x < 24; ++x) {
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001022 uint64_t t0 = state[PI_LANE[x]];
1023 state[PI_LANE[x]] = rotl64(t1, ROT_CONST[x]);
Denys Vlasenko30a86522013-01-15 01:12:26 +01001024 t1 = t0;
1025 }
1026 } else {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001027 /* Especially large benefit for 32-bit arch (75% faster):
Denys Vlasenko30a86522013-01-15 01:12:26 +01001028 * 64-bit rotations by non-constant usually are SLOW on those.
1029 * We resort to unrolling here.
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001030 * This optimizes out PI_LANE[] and ROT_CONST[],
Denys Vlasenko30a86522013-01-15 01:12:26 +01001031 * but generates 300-500 more bytes of code.
1032 */
1033 uint64_t t0;
1034 uint64_t t1 = state[1];
1035#define RhoPi_twice(x) \
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001036 t0 = state[PI_LANE[x ]]; \
1037 state[PI_LANE[x ]] = rotl64(t1, ROT_CONST[x ]); \
1038 t1 = state[PI_LANE[x+1]]; \
1039 state[PI_LANE[x+1]] = rotl64(t0, ROT_CONST[x+1]);
Denys Vlasenko30a86522013-01-15 01:12:26 +01001040 RhoPi_twice(0); RhoPi_twice(2);
1041 RhoPi_twice(4); RhoPi_twice(6);
1042 RhoPi_twice(8); RhoPi_twice(10);
1043 RhoPi_twice(12); RhoPi_twice(14);
1044 RhoPi_twice(16); RhoPi_twice(18);
1045 RhoPi_twice(20); RhoPi_twice(22);
1046#undef RhoPi_twice
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001047 }
1048
1049 /* Chi */
Denys Vlasenko30a86522013-01-15 01:12:26 +01001050 for (y = 0; y <= 20; y += 5) {
Denys Vlasenko8fb3ab52013-01-15 22:07:48 +01001051 uint64_t BC0, BC1, BC2, BC3, BC4;
1052 BC0 = state[y + 0];
1053 BC1 = state[y + 1];
1054 BC2 = state[y + 2];
1055 state[y + 0] = BC0 ^ ((~BC1) & BC2);
1056 BC3 = state[y + 3];
1057 state[y + 1] = BC1 ^ ((~BC2) & BC3);
1058 BC4 = state[y + 4];
1059 state[y + 2] = BC2 ^ ((~BC3) & BC4);
1060 state[y + 3] = BC3 ^ ((~BC4) & BC0);
1061 state[y + 4] = BC4 ^ ((~BC0) & BC1);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001062 }
1063
1064 /* Iota */
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001065 state[0] ^= IOTA_CONST[round]
1066 | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000)
1067 | (uint64_t)((IOTA_CONST_bit63 << round) & 0x80000000) << 32;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001068 }
1069
1070 if (BB_BIG_ENDIAN) {
1071 for (x = 0; x < 25; x++) {
1072 state[x] = SWAP_LE64(state[x]);
1073 }
1074 }
1075}
1076
1077void FAST_FUNC sha3_begin(sha3_ctx_t *ctx)
1078{
1079 memset(ctx, 0, sizeof(*ctx));
1080}
1081
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001082void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buffer, size_t len)
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001083{
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001084#if SHA3_SMALL
1085 const uint8_t *data = buffer;
1086 unsigned bufpos = ctx->bytes_queued;
1087
1088 while (1) {
1089 unsigned remaining = SHA3_IBLK_BYTES - bufpos;
1090 if (remaining > len)
1091 remaining = len;
1092 len -= remaining;
1093 /* XOR data into buffer */
1094 while (remaining != 0) {
1095 uint8_t *buf = (uint8_t*)ctx->state;
1096 buf[bufpos] ^= *data++;
1097 bufpos++;
1098 remaining--;
1099 }
1100 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
1101 bufpos -= SHA3_IBLK_BYTES;
1102 if (bufpos != 0)
1103 break;
1104 /* Buffer is filled up, process it */
1105 sha3_process_block72(ctx->state);
1106 /*bufpos = 0; - already is */
1107 }
1108 ctx->bytes_queued = bufpos + SHA3_IBLK_BYTES;
1109#else
1110 /* +50 bytes code size, but a bit faster because of long-sized XORs */
1111 const uint8_t *data = buffer;
1112 unsigned bufpos = ctx->bytes_queued;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001113
1114 /* If already data in queue, continue queuing first */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001115 while (len != 0 && bufpos != 0) {
1116 uint8_t *buf = (uint8_t*)ctx->state;
1117 buf[bufpos] ^= *data++;
1118 len--;
1119 bufpos++;
1120 if (bufpos == SHA3_IBLK_BYTES) {
1121 bufpos = 0;
1122 goto do_block;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001123 }
1124 }
1125
1126 /* Absorb complete blocks */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001127 while (len >= SHA3_IBLK_BYTES) {
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001128 /* XOR data onto beginning of state[].
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001129 * We try to be efficient - operate one word at a time, not byte.
1130 * Careful wrt unaligned access: can't just use "*(long*)data"!
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001131 */
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001132 unsigned count = SHA3_IBLK_BYTES / sizeof(long);
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001133 long *buf = (long*)ctx->state;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001134 do {
1135 long v;
1136 move_from_unaligned_long(v, (long*)data);
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001137 *buf++ ^= v;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001138 data += sizeof(long);
1139 } while (--count);
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001140 len -= SHA3_IBLK_BYTES;
1141 do_block:
Denys Vlasenkoe4f0f262013-01-16 12:23:23 +01001142 sha3_process_block72(ctx->state);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001143 }
1144
1145 /* Queue remaining data bytes */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001146 while (len != 0) {
1147 uint8_t *buf = (uint8_t*)ctx->state;
1148 buf[bufpos] ^= *data++;
1149 bufpos++;
1150 len--;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001151 }
Denys Vlasenko970aa6b2013-01-15 22:19:24 +01001152
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001153 ctx->bytes_queued = bufpos;
1154#endif
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001155}
1156
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001157void FAST_FUNC sha3_end(sha3_ctx_t *ctx, void *resbuf)
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001158{
1159 /* Padding */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001160 uint8_t *buf = (uint8_t*)ctx->state;
1161 buf[ctx->bytes_queued] ^= 1;
1162 buf[SHA3_IBLK_BYTES - 1] ^= 0x80;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001163
Denys Vlasenkoe4f0f262013-01-16 12:23:23 +01001164 sha3_process_block72(ctx->state);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001165
1166 /* Output */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001167 memcpy(resbuf, ctx->state, 64);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001168}