blob: e0db8ce6735df202fecf60726b9949f545aad62e [file] [log] [blame]
"Robert P. J. Day"63fc1a92006-07-02 19:47:05 +00001/* vi: set sw=4 ts=4: */
Rob Landley5cf7c2d2006-02-21 06:44:43 +00002/*
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +02003 * Utility routines.
4 *
5 * Copyright (C) 2010 Denys Vlasenko
6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +02009#include "libbb.h"
10
Denys Vlasenkob8935d02017-01-15 20:16:27 +010011#define NEED_SHA512 (ENABLE_SHA512SUM || ENABLE_USE_BB_CRYPT_SHA)
12
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020013/* gcc 4.2.1 optimizes rotr64 better with inline than with macro
14 * (for rotX32, there is no difference). Why? My guess is that
15 * macro requires clever common subexpression elimination heuristics
16 * in gcc, while inline basically forces it to happen.
17 */
18//#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
19static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n)
20{
21 return (x << n) | (x >> (32 - n));
22}
23//#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n))))
24static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n)
25{
26 return (x >> n) | (x << (32 - n));
27}
28/* rotr64 in needed for sha512 only: */
29//#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n))))
30static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n)
31{
32 return (x >> n) | (x << (64 - n));
33}
34
Lauri Kasanenb8173b62013-01-14 05:20:50 +010035/* rotl64 only used for sha3 currently */
36static ALWAYS_INLINE uint64_t rotl64(uint64_t x, unsigned n)
37{
38 return (x << n) | (x >> (64 - n));
39}
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020040
Denys Vlasenko302ad142010-10-19 02:16:12 +020041/* Process the remaining bytes in the buffer */
42static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed)
43{
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020044 unsigned bufpos = ctx->total64 & 63;
45 /* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */
46 ctx->wbuffer[bufpos++] = 0x80;
47
48 /* This loop iterates either once or twice, no more, no less */
49 while (1) {
50 unsigned remaining = 64 - bufpos;
51 memset(ctx->wbuffer + bufpos, 0, remaining);
52 /* Do we have enough space for the length count? */
53 if (remaining >= 8) {
54 /* Store the 64-bit counter of bits in the buffer */
55 uint64_t t = ctx->total64 << 3;
56 if (swap_needed)
57 t = bb_bswap_64(t);
58 /* wbuffer is suitably aligned for this */
Denys Vlasenko1f5e81f2013-06-27 01:03:19 +020059 *(bb__aliased_uint64_t *) (&ctx->wbuffer[64 - 8]) = t;
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020060 }
Denys Vlasenko302ad142010-10-19 02:16:12 +020061 ctx->process_block(ctx);
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +020062 if (remaining >= 8)
63 break;
64 bufpos = 0;
65 }
66}
67
68
69/*
Denys Vlasenko302ad142010-10-19 02:16:12 +020070 * Compute MD5 checksum of strings according to the
71 * definition of MD5 in RFC 1321 from April 1992.
72 *
73 * Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
74 *
75 * Copyright (C) 1995-1999 Free Software Foundation, Inc.
76 * Copyright (C) 2001 Manuel Novoa III
77 * Copyright (C) 2003 Glenn L. McGrath
78 * Copyright (C) 2003 Erik Andersen
79 *
80 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
81 */
82
83/* 0: fastest, 3: smallest */
Denys Vlasenko522041e2011-09-10 13:25:57 +020084#if CONFIG_MD5_SMALL < 0
85# define MD5_SMALL 0
86#elif CONFIG_MD5_SMALL > 3
87# define MD5_SMALL 3
Denys Vlasenko302ad142010-10-19 02:16:12 +020088#else
Denys Vlasenko522041e2011-09-10 13:25:57 +020089# define MD5_SMALL CONFIG_MD5_SMALL
Denys Vlasenko302ad142010-10-19 02:16:12 +020090#endif
91
92/* These are the four functions used in the four steps of the MD5 algorithm
93 * and defined in the RFC 1321. The first function is a little bit optimized
94 * (as found in Colin Plumbs public domain implementation).
95 * #define FF(b, c, d) ((b & c) | (~b & d))
96 */
97#undef FF
98#undef FG
99#undef FH
100#undef FI
101#define FF(b, c, d) (d ^ (b & (c ^ d)))
102#define FG(b, c, d) FF(d, b, c)
103#define FH(b, c, d) (b ^ c ^ d)
104#define FI(b, c, d) (c ^ (b | ~d))
105
106/* Hash a single block, 64 bytes long and 4-byte aligned */
107static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
108{
Denys Vlasenko522041e2011-09-10 13:25:57 +0200109#if MD5_SMALL > 0
Denys Vlasenko302ad142010-10-19 02:16:12 +0200110 /* Before we start, one word to the strange constants.
111 They are defined in RFC 1321 as
Denys Vlasenko305958d2015-10-07 19:17:01 +0200112 T[i] = (int)(2^32 * fabs(sin(i))), i=1..64
Denys Vlasenko302ad142010-10-19 02:16:12 +0200113 */
Denys Vlasenko965b7952020-11-30 13:03:03 +0100114 static const uint32_t C_array[] ALIGN4 = {
Denys Vlasenko302ad142010-10-19 02:16:12 +0200115 /* round 1 */
116 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
117 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
118 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
119 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
120 /* round 2 */
121 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
122 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
123 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
124 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
125 /* round 3 */
126 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
127 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
128 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
129 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
130 /* round 4 */
131 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
132 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
133 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
134 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
135 };
136 static const char P_array[] ALIGN1 = {
Denys Vlasenko522041e2011-09-10 13:25:57 +0200137# if MD5_SMALL > 1
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200138 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200139# endif
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200140 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */
141 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */
142 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 /* 4 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200143 };
144#endif
145 uint32_t *words = (void*) ctx->wbuffer;
146 uint32_t A = ctx->hash[0];
147 uint32_t B = ctx->hash[1];
148 uint32_t C = ctx->hash[2];
149 uint32_t D = ctx->hash[3];
150
Denys Vlasenko522041e2011-09-10 13:25:57 +0200151#if MD5_SMALL >= 2 /* 2 or 3 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200152
153 static const char S_array[] ALIGN1 = {
154 7, 12, 17, 22,
155 5, 9, 14, 20,
156 4, 11, 16, 23,
157 6, 10, 15, 21
158 };
159 const uint32_t *pc;
160 const char *pp;
161 const char *ps;
162 int i;
163 uint32_t temp;
164
Denys Vlasenko5368fe52013-01-16 02:20:31 +0100165 if (BB_BIG_ENDIAN)
166 for (i = 0; i < 16; i++)
167 words[i] = SWAP_LE32(words[i]);
Denys Vlasenko302ad142010-10-19 02:16:12 +0200168
Denys Vlasenko522041e2011-09-10 13:25:57 +0200169# if MD5_SMALL == 3
Denys Vlasenko302ad142010-10-19 02:16:12 +0200170 pc = C_array;
171 pp = P_array;
172 ps = S_array - 4;
173
174 for (i = 0; i < 64; i++) {
175 if ((i & 0x0f) == 0)
176 ps += 4;
177 temp = A;
178 switch (i >> 4) {
179 case 0:
180 temp += FF(B, C, D);
181 break;
182 case 1:
183 temp += FG(B, C, D);
184 break;
185 case 2:
186 temp += FH(B, C, D);
187 break;
Denys Vlasenko305958d2015-10-07 19:17:01 +0200188 default: /* case 3 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200189 temp += FI(B, C, D);
190 }
191 temp += words[(int) (*pp++)] + *pc++;
192 temp = rotl32(temp, ps[i & 3]);
193 temp += B;
194 A = D;
195 D = C;
196 C = B;
197 B = temp;
198 }
Denys Vlasenko522041e2011-09-10 13:25:57 +0200199# else /* MD5_SMALL == 2 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200200 pc = C_array;
201 pp = P_array;
202 ps = S_array;
203
204 for (i = 0; i < 16; i++) {
205 temp = A + FF(B, C, D) + words[(int) (*pp++)] + *pc++;
206 temp = rotl32(temp, ps[i & 3]);
207 temp += B;
208 A = D;
209 D = C;
210 C = B;
211 B = temp;
212 }
213 ps += 4;
214 for (i = 0; i < 16; i++) {
215 temp = A + FG(B, C, D) + words[(int) (*pp++)] + *pc++;
216 temp = rotl32(temp, ps[i & 3]);
217 temp += B;
218 A = D;
219 D = C;
220 C = B;
221 B = temp;
222 }
223 ps += 4;
224 for (i = 0; i < 16; i++) {
225 temp = A + FH(B, C, D) + words[(int) (*pp++)] + *pc++;
226 temp = rotl32(temp, ps[i & 3]);
227 temp += B;
228 A = D;
229 D = C;
230 C = B;
231 B = temp;
232 }
233 ps += 4;
234 for (i = 0; i < 16; i++) {
235 temp = A + FI(B, C, D) + words[(int) (*pp++)] + *pc++;
236 temp = rotl32(temp, ps[i & 3]);
237 temp += B;
238 A = D;
239 D = C;
240 C = B;
241 B = temp;
242 }
243# endif
244 /* Add checksum to the starting values */
245 ctx->hash[0] += A;
246 ctx->hash[1] += B;
247 ctx->hash[2] += C;
248 ctx->hash[3] += D;
249
Denys Vlasenko522041e2011-09-10 13:25:57 +0200250#else /* MD5_SMALL == 0 or 1 */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200251
Denys Vlasenko522041e2011-09-10 13:25:57 +0200252# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200253 const uint32_t *pc;
254 const char *pp;
255 int i;
256# endif
257
258 /* First round: using the given function, the context and a constant
259 the next context is computed. Because the algorithm's processing
260 unit is a 32-bit word and it is determined to work on words in
261 little endian byte order we perhaps have to change the byte order
262 before the computation. To reduce the work for the next steps
263 we save swapped words in WORDS array. */
264# undef OP
265# define OP(a, b, c, d, s, T) \
266 do { \
267 a += FF(b, c, d) + (*words IF_BIG_ENDIAN(= SWAP_LE32(*words))) + T; \
268 words++; \
269 a = rotl32(a, s); \
270 a += b; \
271 } while (0)
272
273 /* Round 1 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200274# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200275 pc = C_array;
276 for (i = 0; i < 4; i++) {
277 OP(A, B, C, D, 7, *pc++);
278 OP(D, A, B, C, 12, *pc++);
279 OP(C, D, A, B, 17, *pc++);
280 OP(B, C, D, A, 22, *pc++);
281 }
282# else
283 OP(A, B, C, D, 7, 0xd76aa478);
284 OP(D, A, B, C, 12, 0xe8c7b756);
285 OP(C, D, A, B, 17, 0x242070db);
286 OP(B, C, D, A, 22, 0xc1bdceee);
287 OP(A, B, C, D, 7, 0xf57c0faf);
288 OP(D, A, B, C, 12, 0x4787c62a);
289 OP(C, D, A, B, 17, 0xa8304613);
290 OP(B, C, D, A, 22, 0xfd469501);
291 OP(A, B, C, D, 7, 0x698098d8);
292 OP(D, A, B, C, 12, 0x8b44f7af);
293 OP(C, D, A, B, 17, 0xffff5bb1);
294 OP(B, C, D, A, 22, 0x895cd7be);
295 OP(A, B, C, D, 7, 0x6b901122);
296 OP(D, A, B, C, 12, 0xfd987193);
297 OP(C, D, A, B, 17, 0xa679438e);
298 OP(B, C, D, A, 22, 0x49b40821);
299# endif
300 words -= 16;
301
302 /* For the second to fourth round we have the possibly swapped words
303 in WORDS. Redefine the macro to take an additional first
304 argument specifying the function to use. */
305# undef OP
306# define OP(f, a, b, c, d, k, s, T) \
307 do { \
308 a += f(b, c, d) + words[k] + T; \
309 a = rotl32(a, s); \
310 a += b; \
311 } while (0)
312
313 /* Round 2 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200314# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200315 pp = P_array;
316 for (i = 0; i < 4; i++) {
317 OP(FG, A, B, C, D, (int) (*pp++), 5, *pc++);
318 OP(FG, D, A, B, C, (int) (*pp++), 9, *pc++);
319 OP(FG, C, D, A, B, (int) (*pp++), 14, *pc++);
320 OP(FG, B, C, D, A, (int) (*pp++), 20, *pc++);
321 }
322# else
323 OP(FG, A, B, C, D, 1, 5, 0xf61e2562);
324 OP(FG, D, A, B, C, 6, 9, 0xc040b340);
325 OP(FG, C, D, A, B, 11, 14, 0x265e5a51);
326 OP(FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
327 OP(FG, A, B, C, D, 5, 5, 0xd62f105d);
328 OP(FG, D, A, B, C, 10, 9, 0x02441453);
329 OP(FG, C, D, A, B, 15, 14, 0xd8a1e681);
330 OP(FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
331 OP(FG, A, B, C, D, 9, 5, 0x21e1cde6);
332 OP(FG, D, A, B, C, 14, 9, 0xc33707d6);
333 OP(FG, C, D, A, B, 3, 14, 0xf4d50d87);
334 OP(FG, B, C, D, A, 8, 20, 0x455a14ed);
335 OP(FG, A, B, C, D, 13, 5, 0xa9e3e905);
336 OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8);
337 OP(FG, C, D, A, B, 7, 14, 0x676f02d9);
338 OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
339# endif
340
341 /* Round 3 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200342# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200343 for (i = 0; i < 4; i++) {
344 OP(FH, A, B, C, D, (int) (*pp++), 4, *pc++);
345 OP(FH, D, A, B, C, (int) (*pp++), 11, *pc++);
346 OP(FH, C, D, A, B, (int) (*pp++), 16, *pc++);
347 OP(FH, B, C, D, A, (int) (*pp++), 23, *pc++);
348 }
349# else
350 OP(FH, A, B, C, D, 5, 4, 0xfffa3942);
351 OP(FH, D, A, B, C, 8, 11, 0x8771f681);
352 OP(FH, C, D, A, B, 11, 16, 0x6d9d6122);
353 OP(FH, B, C, D, A, 14, 23, 0xfde5380c);
354 OP(FH, A, B, C, D, 1, 4, 0xa4beea44);
355 OP(FH, D, A, B, C, 4, 11, 0x4bdecfa9);
356 OP(FH, C, D, A, B, 7, 16, 0xf6bb4b60);
357 OP(FH, B, C, D, A, 10, 23, 0xbebfbc70);
358 OP(FH, A, B, C, D, 13, 4, 0x289b7ec6);
359 OP(FH, D, A, B, C, 0, 11, 0xeaa127fa);
360 OP(FH, C, D, A, B, 3, 16, 0xd4ef3085);
361 OP(FH, B, C, D, A, 6, 23, 0x04881d05);
362 OP(FH, A, B, C, D, 9, 4, 0xd9d4d039);
363 OP(FH, D, A, B, C, 12, 11, 0xe6db99e5);
364 OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8);
365 OP(FH, B, C, D, A, 2, 23, 0xc4ac5665);
366# endif
367
368 /* Round 4 */
Denys Vlasenko522041e2011-09-10 13:25:57 +0200369# if MD5_SMALL == 1
Denys Vlasenko302ad142010-10-19 02:16:12 +0200370 for (i = 0; i < 4; i++) {
371 OP(FI, A, B, C, D, (int) (*pp++), 6, *pc++);
372 OP(FI, D, A, B, C, (int) (*pp++), 10, *pc++);
373 OP(FI, C, D, A, B, (int) (*pp++), 15, *pc++);
374 OP(FI, B, C, D, A, (int) (*pp++), 21, *pc++);
375 }
376# else
377 OP(FI, A, B, C, D, 0, 6, 0xf4292244);
378 OP(FI, D, A, B, C, 7, 10, 0x432aff97);
379 OP(FI, C, D, A, B, 14, 15, 0xab9423a7);
380 OP(FI, B, C, D, A, 5, 21, 0xfc93a039);
381 OP(FI, A, B, C, D, 12, 6, 0x655b59c3);
382 OP(FI, D, A, B, C, 3, 10, 0x8f0ccc92);
383 OP(FI, C, D, A, B, 10, 15, 0xffeff47d);
384 OP(FI, B, C, D, A, 1, 21, 0x85845dd1);
385 OP(FI, A, B, C, D, 8, 6, 0x6fa87e4f);
386 OP(FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
387 OP(FI, C, D, A, B, 6, 15, 0xa3014314);
388 OP(FI, B, C, D, A, 13, 21, 0x4e0811a1);
389 OP(FI, A, B, C, D, 4, 6, 0xf7537e82);
390 OP(FI, D, A, B, C, 11, 10, 0xbd3af235);
391 OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
392 OP(FI, B, C, D, A, 9, 21, 0xeb86d391);
393# undef OP
394# endif
395 /* Add checksum to the starting values */
Denys Vlasenko305958d2015-10-07 19:17:01 +0200396 ctx->hash[0] += A;
397 ctx->hash[1] += B;
398 ctx->hash[2] += C;
399 ctx->hash[3] += D;
Denys Vlasenko302ad142010-10-19 02:16:12 +0200400#endif
401}
402#undef FF
403#undef FG
404#undef FH
405#undef FI
406
407/* Initialize structure containing state of computation.
408 * (RFC 1321, 3.3: Step 3)
409 */
410void FAST_FUNC md5_begin(md5_ctx_t *ctx)
411{
412 ctx->hash[0] = 0x67452301;
413 ctx->hash[1] = 0xefcdab89;
414 ctx->hash[2] = 0x98badcfe;
415 ctx->hash[3] = 0x10325476;
416 ctx->total64 = 0;
417 ctx->process_block = md5_process_block64;
418}
419
420/* Used also for sha1 and sha256 */
421void FAST_FUNC md5_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
422{
Denys Vlasenkoabefc3c2020-09-30 22:22:04 +0200423 unsigned bufpos = ctx->total64 & 63;
424
425 ctx->total64 += len;
426
427 while (1) {
428 unsigned remaining = 64 - bufpos;
429 if (remaining > len)
430 remaining = len;
431 /* Copy data into aligned buffer */
432 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
433 len -= remaining;
434 buffer = (const char *)buffer + remaining;
435 bufpos += remaining;
436
437 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
438 bufpos -= 64;
439 if (bufpos != 0)
440 break;
441
442 /* Buffer is filled up, process it */
443 ctx->process_block(ctx);
444 /*bufpos = 0; - already is */
445 }
Denys Vlasenko302ad142010-10-19 02:16:12 +0200446}
447
448/* Process the remaining bytes in the buffer and put result from CTX
449 * in first 16 bytes following RESBUF. The result is always in little
450 * endian byte order, so that a byte-wise output yields to the wanted
451 * ASCII representation of the message digest.
452 */
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100453unsigned FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf)
Denys Vlasenko302ad142010-10-19 02:16:12 +0200454{
455 /* MD5 stores total in LE, need to swap on BE arches: */
456 common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN);
457
Denys Vlasenko7ab94ca2010-10-19 02:33:39 +0200458 /* The MD5 result is in little endian byte order */
Denys Vlasenko5368fe52013-01-16 02:20:31 +0100459 if (BB_BIG_ENDIAN) {
460 ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
461 ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
462 ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
463 ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
464 }
465
Denys Vlasenko302ad142010-10-19 02:16:12 +0200466 memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4);
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100467 return sizeof(ctx->hash[0]) * 4;
Denys Vlasenko302ad142010-10-19 02:16:12 +0200468}
469
470
471/*
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200472 * SHA1 part is:
473 * Copyright 2007 Rob Landley <rob@landley.net>
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000474 *
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200475 * Based on the public domain SHA-1 in C by Steve Reid <steve@edmweb.com>
476 * from http://www.mirrors.wiretapped.net/security/cryptography/hashes/sha1/
Denis Vlasenko9213a9e2006-09-17 16:28:10 +0000477 *
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200478 * Licensed under GPLv2, see file LICENSE in this source tree.
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000479 *
480 * ---------------------------------------------------------------------------
481 *
482 * SHA256 and SHA512 parts are:
483 * Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>.
Denis Vlasenkoddb1b852009-03-12 16:05:02 +0000484 * Shrank by Denys Vlasenko.
485 *
486 * ---------------------------------------------------------------------------
487 *
488 * The best way to test random blocksizes is to go to coreutils/md5_sha1_sum.c
489 * and replace "4096" with something like "2000 + time(NULL) % 2097",
490 * then rebuild and compare "shaNNNsum bigfile" results.
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000491 */
492
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000493static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000494{
Denys Vlasenko965b7952020-11-30 13:03:03 +0100495 static const uint32_t rconsts[] ALIGN4 = {
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200496 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
497 };
498 int i, j;
499 int cnt;
500 uint32_t W[16+16];
501 uint32_t a, b, c, d, e;
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000502
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200503 /* On-stack work buffer frees up one register in the main loop
504 * which otherwise will be needed to hold ctx pointer */
505 for (i = 0; i < 16; i++)
506 W[i] = W[i+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[i]);
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000507
508 a = ctx->hash[0];
509 b = ctx->hash[1];
510 c = ctx->hash[2];
511 d = ctx->hash[3];
512 e = ctx->hash[4];
513
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200514 /* 4 rounds of 20 operations each */
515 cnt = 0;
516 for (i = 0; i < 4; i++) {
517 j = 19;
518 do {
519 uint32_t work;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000520
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200521 work = c ^ d;
522 if (i == 0) {
523 work = (work & b) ^ d;
524 if (j <= 3)
525 goto ge16;
526 /* Used to do SWAP_BE32 here, but this
527 * requires ctx (see comment above) */
528 work += W[cnt];
529 } else {
530 if (i == 2)
531 work = ((b | c) & d) | (b & c);
532 else /* i = 1 or 3 */
533 work ^= b;
534 ge16:
535 W[cnt] = W[cnt+16] = rotl32(W[cnt+13] ^ W[cnt+8] ^ W[cnt+2] ^ W[cnt], 1);
536 work += W[cnt];
537 }
Denys Vlasenko03a5fe32010-10-24 20:51:28 +0200538 work += e + rotl32(a, 5) + rconsts[i];
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000539
Denys Vlasenkof4c93ab2010-10-24 19:27:30 +0200540 /* Rotate by one for next time */
541 e = d;
542 d = c;
543 c = /* b = */ rotl32(b, 30);
544 b = a;
545 a = work;
546 cnt = (cnt + 1) & 15;
547 } while (--j >= 0);
548 }
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000549
550 ctx->hash[0] += a;
551 ctx->hash[1] += b;
552 ctx->hash[2] += c;
553 ctx->hash[3] += d;
554 ctx->hash[4] += e;
555}
556
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000557/* Constants for SHA512 from FIPS 180-2:4.2.3.
558 * SHA256 constants from FIPS 180-2:4.2.2
559 * are the most significant half of first 64 elements
560 * of the same array.
561 */
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100562#undef K
563#if NEED_SHA512
564typedef uint64_t sha_K_int;
565# define K(v) v
566#else
567typedef uint32_t sha_K_int;
568# define K(v) (uint32_t)(v >> 32)
569#endif
Denys Vlasenko965b7952020-11-30 13:03:03 +0100570static const sha_K_int sha_K[] ALIGN8 = {
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100571 K(0x428a2f98d728ae22ULL), K(0x7137449123ef65cdULL),
572 K(0xb5c0fbcfec4d3b2fULL), K(0xe9b5dba58189dbbcULL),
573 K(0x3956c25bf348b538ULL), K(0x59f111f1b605d019ULL),
574 K(0x923f82a4af194f9bULL), K(0xab1c5ed5da6d8118ULL),
575 K(0xd807aa98a3030242ULL), K(0x12835b0145706fbeULL),
576 K(0x243185be4ee4b28cULL), K(0x550c7dc3d5ffb4e2ULL),
577 K(0x72be5d74f27b896fULL), K(0x80deb1fe3b1696b1ULL),
578 K(0x9bdc06a725c71235ULL), K(0xc19bf174cf692694ULL),
579 K(0xe49b69c19ef14ad2ULL), K(0xefbe4786384f25e3ULL),
580 K(0x0fc19dc68b8cd5b5ULL), K(0x240ca1cc77ac9c65ULL),
581 K(0x2de92c6f592b0275ULL), K(0x4a7484aa6ea6e483ULL),
582 K(0x5cb0a9dcbd41fbd4ULL), K(0x76f988da831153b5ULL),
583 K(0x983e5152ee66dfabULL), K(0xa831c66d2db43210ULL),
584 K(0xb00327c898fb213fULL), K(0xbf597fc7beef0ee4ULL),
585 K(0xc6e00bf33da88fc2ULL), K(0xd5a79147930aa725ULL),
586 K(0x06ca6351e003826fULL), K(0x142929670a0e6e70ULL),
587 K(0x27b70a8546d22ffcULL), K(0x2e1b21385c26c926ULL),
588 K(0x4d2c6dfc5ac42aedULL), K(0x53380d139d95b3dfULL),
589 K(0x650a73548baf63deULL), K(0x766a0abb3c77b2a8ULL),
590 K(0x81c2c92e47edaee6ULL), K(0x92722c851482353bULL),
591 K(0xa2bfe8a14cf10364ULL), K(0xa81a664bbc423001ULL),
592 K(0xc24b8b70d0f89791ULL), K(0xc76c51a30654be30ULL),
593 K(0xd192e819d6ef5218ULL), K(0xd69906245565a910ULL),
594 K(0xf40e35855771202aULL), K(0x106aa07032bbd1b8ULL),
595 K(0x19a4c116b8d2d0c8ULL), K(0x1e376c085141ab53ULL),
596 K(0x2748774cdf8eeb99ULL), K(0x34b0bcb5e19b48a8ULL),
597 K(0x391c0cb3c5c95a63ULL), K(0x4ed8aa4ae3418acbULL),
598 K(0x5b9cca4f7763e373ULL), K(0x682e6ff3d6b2b8a3ULL),
599 K(0x748f82ee5defb2fcULL), K(0x78a5636f43172f60ULL),
600 K(0x84c87814a1f0ab72ULL), K(0x8cc702081a6439ecULL),
601 K(0x90befffa23631e28ULL), K(0xa4506cebde82bde9ULL),
602 K(0xbef9a3f7b2c67915ULL), K(0xc67178f2e372532bULL),
603#if NEED_SHA512 /* [64]+ are used for sha512 only */
604 K(0xca273eceea26619cULL), K(0xd186b8c721c0c207ULL),
605 K(0xeada7dd6cde0eb1eULL), K(0xf57d4f7fee6ed178ULL),
606 K(0x06f067aa72176fbaULL), K(0x0a637dc5a2c898a6ULL),
607 K(0x113f9804bef90daeULL), K(0x1b710b35131c471bULL),
608 K(0x28db77f523047d84ULL), K(0x32caab7b40c72493ULL),
609 K(0x3c9ebe0a15c9bebcULL), K(0x431d67c49c100d4cULL),
610 K(0x4cc5d4becb3e42b6ULL), K(0x597f299cfc657e2aULL),
611 K(0x5fcb6fab3ad6faecULL), K(0x6c44198c4a475817ULL),
612#endif
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000613};
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100614#undef K
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000615
Denys Vlasenkofe4ef362009-07-05 20:34:38 +0200616#undef Ch
617#undef Maj
618#undef S0
619#undef S1
620#undef R0
621#undef R1
622
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000623static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000624{
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000625 unsigned t;
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000626 uint32_t W[64], a, b, c, d, e, f, g, h;
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000627 const uint32_t *words = (uint32_t*) ctx->wbuffer;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000628
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000629 /* Operators defined in FIPS 180-2:4.1.2. */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000630#define Ch(x, y, z) ((x & y) ^ (~x & z))
631#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
632#define S0(x) (rotr32(x, 2) ^ rotr32(x, 13) ^ rotr32(x, 22))
633#define S1(x) (rotr32(x, 6) ^ rotr32(x, 11) ^ rotr32(x, 25))
634#define R0(x) (rotr32(x, 7) ^ rotr32(x, 18) ^ (x >> 3))
635#define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
636
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000637 /* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200638 for (t = 0; t < 16; ++t)
Denys Vlasenkob102e122010-10-18 11:39:47 +0200639 W[t] = SWAP_BE32(words[t]);
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000640 for (/*t = 16*/; t < 64; ++t)
641 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000642
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000643 a = ctx->hash[0];
644 b = ctx->hash[1];
645 c = ctx->hash[2];
646 d = ctx->hash[3];
647 e = ctx->hash[4];
648 f = ctx->hash[5];
649 g = ctx->hash[6];
650 h = ctx->hash[7];
651
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000652 /* The actual computation according to FIPS 180-2:6.2.2 step 3. */
653 for (t = 0; t < 64; ++t) {
Denis Vlasenkoa2333c82009-03-28 19:08:23 +0000654 /* Need to fetch upper half of sha_K[t]
655 * (I hope compiler is clever enough to just fetch
656 * upper half)
657 */
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100658 uint32_t K_t = NEED_SHA512 ? (sha_K[t] >> 32) : sha_K[t];
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000659 uint32_t T1 = h + S1(e) + Ch(e, f, g) + K_t + W[t];
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000660 uint32_t T2 = S0(a) + Maj(a, b, c);
661 h = g;
662 g = f;
663 f = e;
664 e = d + T1;
665 d = c;
666 c = b;
667 b = a;
668 a = T1 + T2;
669 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000670#undef Ch
671#undef Maj
672#undef S0
673#undef S1
674#undef R0
675#undef R1
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000676 /* Add the starting values of the context according to FIPS 180-2:6.2.2
677 step 4. */
678 ctx->hash[0] += a;
679 ctx->hash[1] += b;
680 ctx->hash[2] += c;
681 ctx->hash[3] += d;
682 ctx->hash[4] += e;
683 ctx->hash[5] += f;
684 ctx->hash[6] += g;
685 ctx->hash[7] += h;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000686}
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000687
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100688#if NEED_SHA512
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000689static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000690{
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000691 unsigned t;
692 uint64_t W[80];
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000693 /* On i386, having assignments here (not later as sha256 does)
694 * produces 99 bytes smaller code with gcc 4.3.1
695 */
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000696 uint64_t a = ctx->hash[0];
697 uint64_t b = ctx->hash[1];
698 uint64_t c = ctx->hash[2];
699 uint64_t d = ctx->hash[3];
700 uint64_t e = ctx->hash[4];
701 uint64_t f = ctx->hash[5];
702 uint64_t g = ctx->hash[6];
703 uint64_t h = ctx->hash[7];
Denis Vlasenko8ec8d5e2009-03-15 02:56:00 +0000704 const uint64_t *words = (uint64_t*) ctx->wbuffer;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000705
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000706 /* Operators defined in FIPS 180-2:4.1.2. */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000707#define Ch(x, y, z) ((x & y) ^ (~x & z))
708#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
709#define S0(x) (rotr64(x, 28) ^ rotr64(x, 34) ^ rotr64(x, 39))
710#define S1(x) (rotr64(x, 14) ^ rotr64(x, 18) ^ rotr64(x, 41))
711#define R0(x) (rotr64(x, 1) ^ rotr64(x, 8) ^ (x >> 7))
712#define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
713
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000714 /* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200715 for (t = 0; t < 16; ++t)
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200716 W[t] = SWAP_BE64(words[t]);
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000717 for (/*t = 16*/; t < 80; ++t)
718 W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000719
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000720 /* The actual computation according to FIPS 180-2:6.3.2 step 3. */
721 for (t = 0; t < 80; ++t) {
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000722 uint64_t T1 = h + S1(e) + Ch(e, f, g) + sha_K[t] + W[t];
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000723 uint64_t T2 = S0(a) + Maj(a, b, c);
724 h = g;
725 g = f;
726 f = e;
727 e = d + T1;
728 d = c;
729 c = b;
730 b = a;
731 a = T1 + T2;
732 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000733#undef Ch
734#undef Maj
735#undef S0
736#undef S1
737#undef R0
738#undef R1
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000739 /* Add the starting values of the context according to FIPS 180-2:6.3.2
740 step 4. */
741 ctx->hash[0] += a;
742 ctx->hash[1] += b;
743 ctx->hash[2] += c;
744 ctx->hash[3] += d;
745 ctx->hash[4] += e;
746 ctx->hash[5] += f;
747 ctx->hash[6] += g;
748 ctx->hash[7] += h;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000749}
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100750#endif /* NEED_SHA512 */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000751
Denis Vlasenkodefc1ea2008-06-27 02:52:20 +0000752void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000753{
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000754 ctx->hash[0] = 0x67452301;
755 ctx->hash[1] = 0xefcdab89;
756 ctx->hash[2] = 0x98badcfe;
757 ctx->hash[3] = 0x10325476;
758 ctx->hash[4] = 0xc3d2e1f0;
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000759 ctx->total64 = 0;
760 ctx->process_block = sha1_process_block64;
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000761}
762
Denys Vlasenko965b7952020-11-30 13:03:03 +0100763static const uint32_t init256[] ALIGN4 = {
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200764 0,
765 0,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000766 0x6a09e667,
767 0xbb67ae85,
768 0x3c6ef372,
769 0xa54ff53a,
770 0x510e527f,
771 0x9b05688c,
772 0x1f83d9ab,
Denys Vlasenkoa971a192010-10-17 01:35:16 +0200773 0x5be0cd19,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000774};
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100775#if NEED_SHA512
Denys Vlasenko965b7952020-11-30 13:03:03 +0100776static const uint32_t init512_lo[] ALIGN4 = {
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200777 0,
778 0,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000779 0xf3bcc908,
780 0x84caa73b,
781 0xfe94f82b,
782 0x5f1d36f1,
783 0xade682d1,
784 0x2b3e6c1f,
785 0xfb41bd6b,
Denys Vlasenkoa971a192010-10-17 01:35:16 +0200786 0x137e2179,
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000787};
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100788#endif /* NEED_SHA512 */
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000789
Denys Vlasenkof69f2072018-11-26 13:00:28 +0100790// Note: SHA-384 is identical to SHA-512, except that initial hash values are
791// 0xcbbb9d5dc1059ed8, 0x629a292a367cd507, 0x9159015a3070dd17, 0x152fecd8f70e5939,
792// 0x67332667ffc00b31, 0x8eb44a8768581511, 0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4,
793// and the output is constructed by omitting last two 64-bit words of it.
794
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000795/* Initialize structure containing state of computation.
796 (FIPS 180-2:5.3.2) */
797void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
798{
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200799 memcpy(&ctx->total64, init256, sizeof(init256));
800 /*ctx->total64 = 0; - done by prepending two 32-bit zeros to init256 */
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000801 ctx->process_block = sha256_process_block64;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000802}
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000803
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100804#if NEED_SHA512
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000805/* Initialize structure containing state of computation.
806 (FIPS 180-2:5.3.3) */
807void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
808{
Denis Vlasenko98c87f72009-03-11 21:15:51 +0000809 int i;
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200810 /* Two extra iterations zero out ctx->total64[2] */
811 uint64_t *tp = ctx->total64;
Denys Vlasenkoabefc3c2020-09-30 22:22:04 +0200812 for (i = 0; i < 8 + 2; i++)
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200813 tp[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i];
Denys Vlasenkoa971a192010-10-17 01:35:16 +0200814 /*ctx->total64[0] = ctx->total64[1] = 0; - already done */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000815}
816
Denys Vlasenkoc0683ac2010-10-16 20:45:27 +0200817void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000818{
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200819 unsigned bufpos = ctx->total64[0] & 127;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200820 unsigned remaining;
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000821
822 /* First increment the byte count. FIPS 180-2 specifies the possible
823 length of the file up to 2^128 _bits_.
824 We compute the number of _bytes_ and convert to bits later. */
825 ctx->total64[0] += len;
826 if (ctx->total64[0] < len)
827 ctx->total64[1]++;
828
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200829 while (1) {
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200830 remaining = 128 - bufpos;
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200831 if (remaining > len)
832 remaining = len;
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200833 /* Copy data into aligned buffer */
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200834 memcpy(ctx->wbuffer + bufpos, buffer, remaining);
835 len -= remaining;
836 buffer = (const char *)buffer + remaining;
837 bufpos += remaining;
Denys Vlasenkoabefc3c2020-09-30 22:22:04 +0200838
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +0100839 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200840 bufpos -= 128;
841 if (bufpos != 0)
842 break;
Denys Vlasenkoabefc3c2020-09-30 22:22:04 +0200843
Denys Vlasenko4bc3b852010-10-16 23:31:15 +0200844 /* Buffer is filled up, process it */
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000845 sha512_process_block128(ctx);
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200846 /*bufpos = 0; - already is */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000847 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000848}
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100849#endif /* NEED_SHA512 */
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000850
Denis Vlasenko823f10b2009-03-15 04:56:51 +0000851/* Used also for sha256 */
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100852unsigned FAST_FUNC sha1_end(sha1_ctx_t *ctx, void *resbuf)
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000853{
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200854 unsigned hash_size;
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000855
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200856 /* SHA stores total in BE, need to swap on LE arches: */
Denys Vlasenko302ad142010-10-19 02:16:12 +0200857 common64_end(ctx, /*swap_needed:*/ BB_LITTLE_ENDIAN);
Rob Landley5cf7c2d2006-02-21 06:44:43 +0000858
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200859 hash_size = (ctx->process_block == sha1_process_block64) ? 5 : 8;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000860 /* This way we do not impose alignment constraints on resbuf: */
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100861 if (BB_LITTLE_ENDIAN) {
862 unsigned i;
Denys Vlasenkoc48a5c62010-10-18 14:48:30 +0200863 for (i = 0; i < hash_size; ++i)
Denys Vlasenkob102e122010-10-18 11:39:47 +0200864 ctx->hash[i] = SWAP_BE32(ctx->hash[i]);
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100865 }
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100866 hash_size *= sizeof(ctx->hash[0]);
867 memcpy(resbuf, ctx->hash, hash_size);
868 return hash_size;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000869}
870
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100871#if NEED_SHA512
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100872unsigned FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000873{
Denys Vlasenko36ab5852010-10-17 03:00:36 +0200874 unsigned bufpos = ctx->total64[0] & 127;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000875
Denys Vlasenko36ab5852010-10-17 03:00:36 +0200876 /* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... */
Denys Vlasenko273abcb2010-10-16 22:43:34 +0200877 ctx->wbuffer[bufpos++] = 0x80;
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000878
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000879 while (1) {
Denys Vlasenkof6dacc22010-10-17 03:21:51 +0200880 unsigned remaining = 128 - bufpos;
881 memset(ctx->wbuffer + bufpos, 0, remaining);
882 if (remaining >= 16) {
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000883 /* Store the 128-bit counter of bits in the buffer in BE format */
884 uint64_t t;
885 t = ctx->total64[0] << 3;
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200886 t = SWAP_BE64(t);
Denys Vlasenko1f5e81f2013-06-27 01:03:19 +0200887 *(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 8]) = t;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000888 t = (ctx->total64[1] << 3) | (ctx->total64[0] >> 61);
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200889 t = SWAP_BE64(t);
Denys Vlasenko1f5e81f2013-06-27 01:03:19 +0200890 *(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 16]) = t;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000891 }
Denis Vlasenkoe9afc462009-03-15 02:28:05 +0000892 sha512_process_block128(ctx);
Denys Vlasenkof6dacc22010-10-17 03:21:51 +0200893 if (remaining >= 16)
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000894 break;
Denys Vlasenko36ab5852010-10-17 03:00:36 +0200895 bufpos = 0;
Denis Vlasenkoc8329c92009-03-12 19:06:18 +0000896 }
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000897
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100898 if (BB_LITTLE_ENDIAN) {
899 unsigned i;
900 for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
Denys Vlasenko9ff50b82010-10-18 11:40:26 +0200901 ctx->hash[i] = SWAP_BE64(ctx->hash[i]);
Denys Vlasenko245a4f82009-11-07 01:31:14 +0100902 }
Denis Vlasenkocd2cd312009-03-12 15:40:27 +0000903 memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
Denys Vlasenko49ecee02017-01-24 16:00:54 +0100904 return sizeof(ctx->hash);
Denis Vlasenko56dceb92008-11-10 13:32:50 +0000905}
Denys Vlasenkob8935d02017-01-15 20:16:27 +0100906#endif /* NEED_SHA512 */
Lauri Kasanenb8173b62013-01-14 05:20:50 +0100907
908
909/*
910 * The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
911 * Michael Peeters and Gilles Van Assche. For more information, feedback or
912 * questions, please refer to our website: http://keccak.noekeon.org/
913 *
914 * Implementation by Ronny Van Keer,
915 * hereby denoted as "the implementer".
916 *
917 * To the extent possible under law, the implementer has waived all copyright
918 * and related or neighboring rights to the source code in this file.
919 * http://creativecommons.org/publicdomain/zero/1.0/
920 *
921 * Busybox modifications (C) Lauri Kasanen, under the GPLv2.
922 */
923
Denys Vlasenko30a86522013-01-15 01:12:26 +0100924#if CONFIG_SHA3_SMALL < 0
925# define SHA3_SMALL 0
926#elif CONFIG_SHA3_SMALL > 1
927# define SHA3_SMALL 1
928#else
929# define SHA3_SMALL CONFIG_SHA3_SMALL
930#endif
931
Denys Vlasenko2a563ea2014-07-25 17:24:13 +0200932#define OPTIMIZE_SHA3_FOR_32 0
933/*
934 * SHA3 can be optimized for 32-bit CPUs with bit-slicing:
935 * every 64-bit word of state[] can be split into two 32-bit words
936 * by even/odd bits. In this form, all rotations of sha3 round
937 * are 32-bit - and there are lots of them.
938 * However, it requires either splitting/combining state words
939 * before/after sha3 round (code does this now)
940 * or shuffling bits before xor'ing them into state and in sha3_end.
941 * Without shuffling, bit-slicing results in -130 bytes of code
942 * and marginal speedup (but of course it gives wrong result).
943 * With shuffling it works, but +260 code bytes, and slower.
944 * Disabled for now:
945 */
946#if 0 /* LONG_MAX == 0x7fffffff */
947# undef OPTIMIZE_SHA3_FOR_32
948# define OPTIMIZE_SHA3_FOR_32 1
949#endif
950
Denys Vlasenko2a563ea2014-07-25 17:24:13 +0200951#if OPTIMIZE_SHA3_FOR_32
952/* This splits every 64-bit word into a pair of 32-bit words,
953 * even bits go into first word, odd bits go to second one.
954 * The conversion is done in-place.
955 */
956static void split_halves(uint64_t *state)
957{
958 /* Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */
959 uint32_t *s32 = (uint32_t*)state;
960 uint32_t t, x0, x1;
961 int i;
962 for (i = 24; i >= 0; --i) {
963 x0 = s32[0];
964 t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
965 t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
966 t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
967 t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
968 x1 = s32[1];
969 t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
970 t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
971 t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
972 t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
973 *s32++ = (x0 & 0x0000FFFF) | (x1 << 16);
974 *s32++ = (x0 >> 16) | (x1 & 0xFFFF0000);
975 }
976}
977/* The reverse operation */
978static void combine_halves(uint64_t *state)
979{
980 uint32_t *s32 = (uint32_t*)state;
981 uint32_t t, x0, x1;
982 int i;
983 for (i = 24; i >= 0; --i) {
984 x0 = s32[0];
985 x1 = s32[1];
986 t = (x0 & 0x0000FFFF) | (x1 << 16);
987 x1 = (x0 >> 16) | (x1 & 0xFFFF0000);
988 x0 = t;
989 t = (x0 ^ (x0 >> 8)) & 0x0000FF00; x0 = x0 ^ t ^ (t << 8);
990 t = (x0 ^ (x0 >> 4)) & 0x00F000F0; x0 = x0 ^ t ^ (t << 4);
991 t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0C; x0 = x0 ^ t ^ (t << 2);
992 t = (x0 ^ (x0 >> 1)) & 0x22222222; x0 = x0 ^ t ^ (t << 1);
993 *s32++ = x0;
994 t = (x1 ^ (x1 >> 8)) & 0x0000FF00; x1 = x1 ^ t ^ (t << 8);
995 t = (x1 ^ (x1 >> 4)) & 0x00F000F0; x1 = x1 ^ t ^ (t << 4);
996 t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0C; x1 = x1 ^ t ^ (t << 2);
997 t = (x1 ^ (x1 >> 1)) & 0x22222222; x1 = x1 ^ t ^ (t << 1);
998 *s32++ = x1;
999 }
1000}
1001#endif
1002
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001003/*
1004 * In the crypto literature this function is usually called Keccak-f().
Denys Vlasenkoac4100e2013-01-15 16:27:39 +01001005 */
Denys Vlasenkoe4f0f262013-01-16 12:23:23 +01001006static void sha3_process_block72(uint64_t *state)
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001007{
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001008 enum { NROUNDS = 24 };
1009
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001010#if OPTIMIZE_SHA3_FOR_32
1011 /*
Denys Vlasenko965b7952020-11-30 13:03:03 +01001012 static const uint32_t IOTA_CONST_0[NROUNDS] ALIGN4 = {
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001013 0x00000001UL,
1014 0x00000000UL,
1015 0x00000000UL,
1016 0x00000000UL,
1017 0x00000001UL,
1018 0x00000001UL,
1019 0x00000001UL,
1020 0x00000001UL,
1021 0x00000000UL,
1022 0x00000000UL,
1023 0x00000001UL,
1024 0x00000000UL,
1025 0x00000001UL,
1026 0x00000001UL,
1027 0x00000001UL,
1028 0x00000001UL,
1029 0x00000000UL,
1030 0x00000000UL,
1031 0x00000000UL,
1032 0x00000000UL,
1033 0x00000001UL,
1034 0x00000000UL,
1035 0x00000001UL,
1036 0x00000000UL,
1037 };
1038 ** bits are in lsb: 0101 0000 1111 0100 1111 0001
1039 */
1040 uint32_t IOTA_CONST_0bits = (uint32_t)(0x0050f4f1);
Denys Vlasenko965b7952020-11-30 13:03:03 +01001041 static const uint32_t IOTA_CONST_1[NROUNDS] ALIGN4 = {
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001042 0x00000000UL,
1043 0x00000089UL,
1044 0x8000008bUL,
1045 0x80008080UL,
1046 0x0000008bUL,
1047 0x00008000UL,
1048 0x80008088UL,
1049 0x80000082UL,
1050 0x0000000bUL,
1051 0x0000000aUL,
1052 0x00008082UL,
1053 0x00008003UL,
1054 0x0000808bUL,
1055 0x8000000bUL,
1056 0x8000008aUL,
1057 0x80000081UL,
1058 0x80000081UL,
1059 0x80000008UL,
1060 0x00000083UL,
1061 0x80008003UL,
1062 0x80008088UL,
1063 0x80000088UL,
1064 0x00008000UL,
1065 0x80008082UL,
1066 };
1067
1068 uint32_t *const s32 = (uint32_t*)state;
1069 unsigned round;
1070
1071 split_halves(state);
1072
1073 for (round = 0; round < NROUNDS; round++) {
1074 unsigned x;
1075
1076 /* Theta */
1077 {
1078 uint32_t BC[20];
1079 for (x = 0; x < 10; ++x) {
1080 BC[x+10] = BC[x] = s32[x]^s32[x+10]^s32[x+20]^s32[x+30]^s32[x+40];
1081 }
1082 for (x = 0; x < 10; x += 2) {
1083 uint32_t ta, tb;
1084 ta = BC[x+8] ^ rotl32(BC[x+3], 1);
1085 tb = BC[x+9] ^ BC[x+2];
1086 s32[x+0] ^= ta;
1087 s32[x+1] ^= tb;
1088 s32[x+10] ^= ta;
1089 s32[x+11] ^= tb;
1090 s32[x+20] ^= ta;
1091 s32[x+21] ^= tb;
1092 s32[x+30] ^= ta;
1093 s32[x+31] ^= tb;
1094 s32[x+40] ^= ta;
1095 s32[x+41] ^= tb;
1096 }
1097 }
1098 /* RhoPi */
1099 {
1100 uint32_t t0a,t0b, t1a,t1b;
1101 t1a = s32[1*2+0];
1102 t1b = s32[1*2+1];
1103
1104#define RhoPi(PI_LANE, ROT_CONST) \
1105 t0a = s32[PI_LANE*2+0];\
1106 t0b = s32[PI_LANE*2+1];\
1107 if (ROT_CONST & 1) {\
1108 s32[PI_LANE*2+0] = rotl32(t1b, ROT_CONST/2+1);\
1109 s32[PI_LANE*2+1] = ROT_CONST == 1 ? t1a : rotl32(t1a, ROT_CONST/2+0);\
1110 } else {\
1111 s32[PI_LANE*2+0] = rotl32(t1a, ROT_CONST/2);\
1112 s32[PI_LANE*2+1] = rotl32(t1b, ROT_CONST/2);\
1113 }\
1114 t1a = t0a; t1b = t0b;
1115
1116 RhoPi(10, 1)
1117 RhoPi( 7, 3)
1118 RhoPi(11, 6)
1119 RhoPi(17,10)
1120 RhoPi(18,15)
1121 RhoPi( 3,21)
1122 RhoPi( 5,28)
1123 RhoPi(16,36)
1124 RhoPi( 8,45)
1125 RhoPi(21,55)
1126 RhoPi(24, 2)
1127 RhoPi( 4,14)
1128 RhoPi(15,27)
1129 RhoPi(23,41)
1130 RhoPi(19,56)
1131 RhoPi(13, 8)
1132 RhoPi(12,25)
1133 RhoPi( 2,43)
1134 RhoPi(20,62)
1135 RhoPi(14,18)
1136 RhoPi(22,39)
1137 RhoPi( 9,61)
1138 RhoPi( 6,20)
1139 RhoPi( 1,44)
1140#undef RhoPi
1141 }
1142 /* Chi */
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001143 for (x = 0; x <= 40;) {
1144 uint32_t BC0, BC1, BC2, BC3, BC4;
1145 BC0 = s32[x + 0*2];
1146 BC1 = s32[x + 1*2];
1147 BC2 = s32[x + 2*2];
1148 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1149 BC3 = s32[x + 3*2];
1150 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1151 BC4 = s32[x + 4*2];
1152 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1153 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1154 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1155 x++;
1156 BC0 = s32[x + 0*2];
1157 BC1 = s32[x + 1*2];
1158 BC2 = s32[x + 2*2];
1159 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1160 BC3 = s32[x + 3*2];
1161 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1162 BC4 = s32[x + 4*2];
1163 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1164 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1165 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1166 x += 9;
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001167 }
1168 /* Iota */
1169 s32[0] ^= IOTA_CONST_0bits & 1;
1170 IOTA_CONST_0bits >>= 1;
1171 s32[1] ^= IOTA_CONST_1[round];
1172 }
1173
1174 combine_halves(state);
1175#else
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001176 /* Native 64-bit algorithm */
Denys Vlasenko965b7952020-11-30 13:03:03 +01001177 static const uint16_t IOTA_CONST[NROUNDS] ALIGN2 = {
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001178 /* Elements should be 64-bit, but top half is always zero
1179 * or 0x80000000. We encode 63rd bits in a separate word below.
1180 * Same is true for 31th bits, which lets us use 16-bit table
1181 * instead of 64-bit. The speed penalty is lost in the noise.
1182 */
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001183 0x0001,
1184 0x8082,
1185 0x808a,
1186 0x8000,
1187 0x808b,
1188 0x0001,
1189 0x8081,
1190 0x8009,
1191 0x008a,
1192 0x0088,
1193 0x8009,
1194 0x000a,
1195 0x808b,
1196 0x008b,
1197 0x8089,
1198 0x8003,
1199 0x8002,
1200 0x0080,
1201 0x800a,
1202 0x000a,
1203 0x8081,
1204 0x8080,
1205 0x0001,
1206 0x8008,
1207 };
1208 /* bit for CONST[0] is in msb: 0011 0011 0000 0111 1101 1101 */
1209 const uint32_t IOTA_CONST_bit63 = (uint32_t)(0x3307dd00);
1210 /* bit for CONST[0] is in msb: 0001 0110 0011 1000 0001 1011 */
1211 const uint32_t IOTA_CONST_bit31 = (uint32_t)(0x16381b00);
1212
Denys Vlasenko965b7952020-11-30 13:03:03 +01001213 static const uint8_t ROT_CONST[24] ALIGN1 = {
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001214 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
1215 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
1216 };
Denys Vlasenko965b7952020-11-30 13:03:03 +01001217 static const uint8_t PI_LANE[24] ALIGN1 = {
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001218 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
1219 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
1220 };
Denys Vlasenko965b7952020-11-30 13:03:03 +01001221 /*static const uint8_t MOD5[10] ALIGN1 = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, };*/
Denys Vlasenko8fb3ab52013-01-15 22:07:48 +01001222
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001223 unsigned x;
Denys Vlasenko5b7f50f2013-01-15 19:52:30 +01001224 unsigned round;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001225
1226 if (BB_BIG_ENDIAN) {
1227 for (x = 0; x < 25; x++) {
1228 state[x] = SWAP_LE64(state[x]);
1229 }
1230 }
1231
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001232 for (round = 0; round < NROUNDS; ++round) {
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001233 /* Theta */
Denys Vlasenko30a86522013-01-15 01:12:26 +01001234 {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001235 uint64_t BC[10];
Denys Vlasenko30a86522013-01-15 01:12:26 +01001236 for (x = 0; x < 5; ++x) {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001237 BC[x + 5] = BC[x] = state[x]
1238 ^ state[x + 5] ^ state[x + 10]
1239 ^ state[x + 15] ^ state[x + 20];
Denys Vlasenko30a86522013-01-15 01:12:26 +01001240 }
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001241 /* Using 2x5 vector above eliminates the need to use
Denys Vlasenko5b7f50f2013-01-15 19:52:30 +01001242 * BC[MOD5[x+N]] trick below to fetch BC[(x+N) % 5],
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001243 * and the code is a bit _smaller_.
1244 */
Denys Vlasenko30a86522013-01-15 01:12:26 +01001245 for (x = 0; x < 5; ++x) {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001246 uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1);
Denys Vlasenko8fb3ab52013-01-15 22:07:48 +01001247 state[x] ^= temp;
1248 state[x + 5] ^= temp;
1249 state[x + 10] ^= temp;
1250 state[x + 15] ^= temp;
1251 state[x + 20] ^= temp;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001252 }
1253 }
1254
1255 /* Rho Pi */
Denys Vlasenko30a86522013-01-15 01:12:26 +01001256 if (SHA3_SMALL) {
1257 uint64_t t1 = state[1];
1258 for (x = 0; x < 24; ++x) {
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001259 uint64_t t0 = state[PI_LANE[x]];
1260 state[PI_LANE[x]] = rotl64(t1, ROT_CONST[x]);
Denys Vlasenko30a86522013-01-15 01:12:26 +01001261 t1 = t0;
1262 }
1263 } else {
Denys Vlasenkoa55df272013-01-15 15:22:30 +01001264 /* Especially large benefit for 32-bit arch (75% faster):
Denys Vlasenko30a86522013-01-15 01:12:26 +01001265 * 64-bit rotations by non-constant usually are SLOW on those.
1266 * We resort to unrolling here.
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001267 * This optimizes out PI_LANE[] and ROT_CONST[],
Denys Vlasenko30a86522013-01-15 01:12:26 +01001268 * but generates 300-500 more bytes of code.
1269 */
1270 uint64_t t0;
1271 uint64_t t1 = state[1];
1272#define RhoPi_twice(x) \
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001273 t0 = state[PI_LANE[x ]]; \
1274 state[PI_LANE[x ]] = rotl64(t1, ROT_CONST[x ]); \
1275 t1 = state[PI_LANE[x+1]]; \
1276 state[PI_LANE[x+1]] = rotl64(t0, ROT_CONST[x+1]);
Denys Vlasenko30a86522013-01-15 01:12:26 +01001277 RhoPi_twice(0); RhoPi_twice(2);
1278 RhoPi_twice(4); RhoPi_twice(6);
1279 RhoPi_twice(8); RhoPi_twice(10);
1280 RhoPi_twice(12); RhoPi_twice(14);
1281 RhoPi_twice(16); RhoPi_twice(18);
1282 RhoPi_twice(20); RhoPi_twice(22);
1283#undef RhoPi_twice
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001284 }
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001285 /* Chi */
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001286# if LONG_MAX > 0x7fffffff
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001287 for (x = 0; x <= 20; x += 5) {
Denys Vlasenko8fb3ab52013-01-15 22:07:48 +01001288 uint64_t BC0, BC1, BC2, BC3, BC4;
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001289 BC0 = state[x + 0];
1290 BC1 = state[x + 1];
1291 BC2 = state[x + 2];
1292 state[x + 0] = BC0 ^ ((~BC1) & BC2);
1293 BC3 = state[x + 3];
1294 state[x + 1] = BC1 ^ ((~BC2) & BC3);
1295 BC4 = state[x + 4];
1296 state[x + 2] = BC2 ^ ((~BC3) & BC4);
1297 state[x + 3] = BC3 ^ ((~BC4) & BC0);
1298 state[x + 4] = BC4 ^ ((~BC0) & BC1);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001299 }
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001300# else
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001301 /* Reduced register pressure version
1302 * for register-starved 32-bit arches
1303 * (i386: -95 bytes, and it is _faster_)
1304 */
1305 for (x = 0; x <= 40;) {
1306 uint32_t BC0, BC1, BC2, BC3, BC4;
1307 uint32_t *const s32 = (uint32_t*)state;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001308# if SHA3_SMALL
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001309 do_half:
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001310# endif
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001311 BC0 = s32[x + 0*2];
1312 BC1 = s32[x + 1*2];
1313 BC2 = s32[x + 2*2];
1314 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1315 BC3 = s32[x + 3*2];
1316 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1317 BC4 = s32[x + 4*2];
1318 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1319 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1320 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1321 x++;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001322# if SHA3_SMALL
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001323 if (x & 1)
1324 goto do_half;
1325 x += 8;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001326# else
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001327 BC0 = s32[x + 0*2];
1328 BC1 = s32[x + 1*2];
1329 BC2 = s32[x + 2*2];
1330 s32[x + 0*2] = BC0 ^ ((~BC1) & BC2);
1331 BC3 = s32[x + 3*2];
1332 s32[x + 1*2] = BC1 ^ ((~BC2) & BC3);
1333 BC4 = s32[x + 4*2];
1334 s32[x + 2*2] = BC2 ^ ((~BC3) & BC4);
1335 s32[x + 3*2] = BC3 ^ ((~BC4) & BC0);
1336 s32[x + 4*2] = BC4 ^ ((~BC0) & BC1);
1337 x += 9;
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001338# endif
Denys Vlasenko4ff933c2014-07-30 14:18:57 +02001339 }
Denys Vlasenko09a0e222014-07-30 16:26:09 +02001340# endif /* long is 32-bit */
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001341 /* Iota */
Denys Vlasenko5368fe52013-01-16 02:20:31 +01001342 state[0] ^= IOTA_CONST[round]
1343 | (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000)
1344 | (uint64_t)((IOTA_CONST_bit63 << round) & 0x80000000) << 32;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001345 }
1346
1347 if (BB_BIG_ENDIAN) {
1348 for (x = 0; x < 25; x++) {
1349 state[x] = SWAP_LE64(state[x]);
1350 }
1351 }
Denys Vlasenko2a563ea2014-07-25 17:24:13 +02001352#endif
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001353}
1354
1355void FAST_FUNC sha3_begin(sha3_ctx_t *ctx)
1356{
1357 memset(ctx, 0, sizeof(*ctx));
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001358 /* SHA3-512, user can override */
1359 ctx->input_block_bytes = (1600 - 512*2) / 8; /* 72 bytes */
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001360}
1361
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001362void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buffer, size_t len)
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001363{
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001364#if SHA3_SMALL
1365 const uint8_t *data = buffer;
1366 unsigned bufpos = ctx->bytes_queued;
1367
1368 while (1) {
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001369 unsigned remaining = ctx->input_block_bytes - bufpos;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001370 if (remaining > len)
1371 remaining = len;
1372 len -= remaining;
1373 /* XOR data into buffer */
1374 while (remaining != 0) {
1375 uint8_t *buf = (uint8_t*)ctx->state;
1376 buf[bufpos] ^= *data++;
1377 bufpos++;
1378 remaining--;
1379 }
Denys Vlasenkoabefc3c2020-09-30 22:22:04 +02001380
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001381 /* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001382 bufpos -= ctx->input_block_bytes;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001383 if (bufpos != 0)
1384 break;
Denys Vlasenkoabefc3c2020-09-30 22:22:04 +02001385
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001386 /* Buffer is filled up, process it */
1387 sha3_process_block72(ctx->state);
1388 /*bufpos = 0; - already is */
1389 }
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001390 ctx->bytes_queued = bufpos + ctx->input_block_bytes;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001391#else
1392 /* +50 bytes code size, but a bit faster because of long-sized XORs */
1393 const uint8_t *data = buffer;
1394 unsigned bufpos = ctx->bytes_queued;
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001395 unsigned iblk_bytes = ctx->input_block_bytes;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001396
1397 /* If already data in queue, continue queuing first */
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001398 if (bufpos != 0) {
1399 while (len != 0) {
1400 uint8_t *buf = (uint8_t*)ctx->state;
1401 buf[bufpos] ^= *data++;
1402 len--;
1403 bufpos++;
1404 if (bufpos == iblk_bytes) {
1405 bufpos = 0;
1406 goto do_block;
1407 }
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001408 }
1409 }
1410
1411 /* Absorb complete blocks */
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001412 while (len >= iblk_bytes) {
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001413 /* XOR data onto beginning of state[].
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001414 * We try to be efficient - operate one word at a time, not byte.
1415 * Careful wrt unaligned access: can't just use "*(long*)data"!
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001416 */
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001417 unsigned count = iblk_bytes / sizeof(long);
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001418 long *buf = (long*)ctx->state;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001419 do {
1420 long v;
1421 move_from_unaligned_long(v, (long*)data);
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001422 *buf++ ^= v;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001423 data += sizeof(long);
1424 } while (--count);
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001425 len -= iblk_bytes;
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001426 do_block:
Denys Vlasenkoe4f0f262013-01-16 12:23:23 +01001427 sha3_process_block72(ctx->state);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001428 }
1429
1430 /* Queue remaining data bytes */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001431 while (len != 0) {
1432 uint8_t *buf = (uint8_t*)ctx->state;
1433 buf[bufpos] ^= *data++;
1434 bufpos++;
1435 len--;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001436 }
Denys Vlasenko970aa6b2013-01-15 22:19:24 +01001437
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001438 ctx->bytes_queued = bufpos;
1439#endif
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001440}
1441
Denys Vlasenko49ecee02017-01-24 16:00:54 +01001442unsigned FAST_FUNC sha3_end(sha3_ctx_t *ctx, void *resbuf)
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001443{
1444 /* Padding */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001445 uint8_t *buf = (uint8_t*)ctx->state;
Denys Vlasenko71a090f2016-08-29 14:05:25 +02001446 /*
1447 * Keccak block padding is: add 1 bit after last bit of input,
1448 * then add zero bits until the end of block, and add the last 1 bit
1449 * (the last bit in the block) - the "10*1" pattern.
1450 * SHA3 standard appends additional two bits, 01, before that padding:
1451 *
1452 * SHA3-224(M) = KECCAK[448](M||01, 224)
1453 * SHA3-256(M) = KECCAK[512](M||01, 256)
1454 * SHA3-384(M) = KECCAK[768](M||01, 384)
1455 * SHA3-512(M) = KECCAK[1024](M||01, 512)
1456 * (M is the input, || is bit concatenation)
1457 *
1458 * The 6 below contains 01 "SHA3" bits and the first 1 "Keccak" bit:
1459 */
1460 buf[ctx->bytes_queued] ^= 6; /* bit pattern 00000110 */
1461 buf[ctx->input_block_bytes - 1] ^= 0x80;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001462
Denys Vlasenkoe4f0f262013-01-16 12:23:23 +01001463 sha3_process_block72(ctx->state);
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001464
1465 /* Output */
Denys Vlasenko2cfcc9e2013-01-20 00:38:09 +01001466 memcpy(resbuf, ctx->state, 64);
Denys Vlasenko49ecee02017-01-24 16:00:54 +01001467 return 64;
Lauri Kasanenb8173b62013-01-14 05:20:50 +01001468}