sha3sum: new applet

function                                             old     new   delta
KeccakF                                                -     496    +496
KeccakF_RoundConstants                                 -     192    +192
sha3_hash                                              -     171    +171
sha3_end                                               -      40     +40
hash_file                                            274     299     +25
KeccakF_RotationConstants                              -      25     +25
KeccakF_PiLane                                         -      25     +25
packed_usage                                       29213   29232     +19
sha3_begin                                             -      18     +18
KeccakF_Mod5                                           -      10     +10
applet_names                                        2445    2453      +8
applet_main                                         1420    1424      +4
applet_nameofs                                       710     712      +2
------------------------------------------------------------------------------
(add/remove: 8/0 grow/shrink: 9/7 up/down: 1049/-54)         Total: ~995 bytes

Signed-off-by: Lauri Kasanen <curaga@operamail.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
diff --git a/coreutils/Config.src b/coreutils/Config.src
index a28449b..0c44c4b 100644
--- a/coreutils/Config.src
+++ b/coreutils/Config.src
@@ -514,6 +514,12 @@
 	help
 	  Compute and check SHA512 message digest
 
+config SHA3SUM
+	bool "sha3sum"
+	default y
+	help
+	  Compute and check SHA3 (512-bit) message digest
+
 config SLEEP
 	bool "sleep"
 	default y
@@ -766,13 +772,13 @@
 	help
 	  Allow df, du, and ls to have human readable output.
 
-comment "Common options for md5sum, sha1sum, sha256sum, sha512sum"
-	depends on MD5SUM || SHA1SUM || SHA256SUM || SHA512SUM
+comment "Common options for md5sum, sha1sum, sha256sum, sha512sum, sha3sum"
+	depends on MD5SUM || SHA1SUM || SHA256SUM || SHA512SUM || SHA3SUM
 
 config FEATURE_MD5_SHA1_SUM_CHECK
 	bool "Enable -c, -s and -w options"
 	default y
-	depends on MD5SUM || SHA1SUM || SHA256SUM || SHA512SUM
+	depends on MD5SUM || SHA1SUM || SHA256SUM || SHA512SUM || SHA3SUM
 	help
 	  Enabling the -c options allows files to be checked
 	  against pre-calculated hash values.
diff --git a/coreutils/Kbuild.src b/coreutils/Kbuild.src
index d6453f0..b715b9c 100644
--- a/coreutils/Kbuild.src
+++ b/coreutils/Kbuild.src
@@ -62,6 +62,7 @@
 lib-$(CONFIG_SHA1SUM)   += md5_sha1_sum.o
 lib-$(CONFIG_SHA256SUM) += md5_sha1_sum.o
 lib-$(CONFIG_SHA512SUM) += md5_sha1_sum.o
+lib-$(CONFIG_SHA3SUM)   += md5_sha1_sum.o
 lib-$(CONFIG_SLEEP)     += sleep.o
 lib-$(CONFIG_SPLIT)     += split.o
 lib-$(CONFIG_SORT)      += sort.o
diff --git a/coreutils/md5_sha1_sum.c b/coreutils/md5_sha1_sum.c
index 59b520f..92a4d44 100644
--- a/coreutils/md5_sha1_sum.c
+++ b/coreutils/md5_sha1_sum.c
@@ -55,6 +55,16 @@
 //usage:     "\n	-s	Don't output anything, status code shows success"
 //usage:     "\n	-w	Warn about improperly formatted checksum lines"
 //usage:	)
+//usage:
+//usage:#define sha3sum_trivial_usage
+//usage:	IF_FEATURE_MD5_SHA1_SUM_CHECK("[-c[sw]] ")"[FILE]..."
+//usage:#define sha3sum_full_usage "\n\n"
+//usage:       "Print" IF_FEATURE_MD5_SHA1_SUM_CHECK(" or check") " SHA3-512 checksums"
+//usage:	IF_FEATURE_MD5_SHA1_SUM_CHECK( "\n"
+//usage:     "\n	-c	Check sums against list in FILEs"
+//usage:     "\n	-s	Don't output anything, status code shows success"
+//usage:     "\n	-w	Warn about improperly formatted checksum lines"
+//usage:	)
 
 #include "libbb.h"
 
@@ -65,6 +75,7 @@
 	HASH_MD5 = 's', /* "md5>s<um" */
 	HASH_SHA1 = '1',
 	HASH_SHA256 = '2',
+	HASH_SHA3 = '3',
 	HASH_SHA512 = '5',
 };
 
@@ -86,6 +97,7 @@
 {
 	int src_fd, hash_len, count;
 	union _ctx_ {
+		sha3_ctx_t sha3;
 		sha512_ctx_t sha512;
 		sha256_ctx_t sha256;
 		sha1_ctx_t sha1;
@@ -124,6 +136,11 @@
 		update = (void*)sha512_hash;
 		final = (void*)sha512_end;
 		hash_len = 64;
+	} else if (ENABLE_SHA3SUM && hash_algo == HASH_SHA3) {
+		sha3_begin(&context.sha3);
+		update = (void*)sha3_hash;
+		final = (void*)sha3_end;
+		hash_len = 64;
 	} else {
 		xfunc_die(); /* can't reach this */
 	}
diff --git a/include/applets.src.h b/include/applets.src.h
index 597b1c9..29ab167 100644
--- a/include/applets.src.h
+++ b/include/applets.src.h
@@ -328,6 +328,7 @@
 IF_SETSID(APPLET(setsid, BB_DIR_USR_BIN, BB_SUID_DROP))
 IF_SETUIDGID(APPLET_ODDNAME(setuidgid, chpst, BB_DIR_USR_BIN, BB_SUID_DROP, setuidgid))
 IF_SHA1SUM(APPLET_NOEXEC(sha1sum, md5_sha1_sum, BB_DIR_USR_BIN, BB_SUID_DROP, sha1sum))
+IF_SHA3SUM(APPLET_NOEXEC(sha3sum, md5_sha1_sum, BB_DIR_USR_BIN, BB_SUID_DROP, sha3sum))
 IF_SHA256SUM(APPLET_NOEXEC(sha256sum, md5_sha1_sum, BB_DIR_USR_BIN, BB_SUID_DROP, sha256sum))
 IF_SHA512SUM(APPLET_NOEXEC(sha512sum, md5_sha1_sum, BB_DIR_USR_BIN, BB_SUID_DROP, sha512sum))
 IF_SHOWKEY(APPLET(showkey, BB_DIR_USR_BIN, BB_SUID_DROP))
diff --git a/include/libbb.h b/include/libbb.h
index 2059567..6ac7d2c 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -1636,6 +1636,10 @@
 	uint64_t hash[8];
 	uint8_t wbuffer[128]; /* always correctly aligned for uint64_t */
 } sha512_ctx_t;
+typedef struct sha3_ctx_t {
+	uint64_t state[25];
+	unsigned bytes_queued;
+} sha3_ctx_t;
 void md5_begin(md5_ctx_t *ctx) FAST_FUNC;
 void md5_hash(md5_ctx_t *ctx, const void *data, size_t length) FAST_FUNC;
 void md5_end(md5_ctx_t *ctx, void *resbuf) FAST_FUNC;
@@ -1648,6 +1652,9 @@
 void sha512_begin(sha512_ctx_t *ctx) FAST_FUNC;
 void sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len) FAST_FUNC;
 void sha512_end(sha512_ctx_t *ctx, void *resbuf) FAST_FUNC;
+void sha3_begin(sha3_ctx_t *ctx) FAST_FUNC;
+void sha3_hash(sha3_ctx_t *ctx, const void *buffer, size_t len) FAST_FUNC;
+void sha3_end(sha3_ctx_t *ctx, uint8_t *resbuf) FAST_FUNC;
 
 extern uint32_t *global_crc32_table;
 uint32_t *crc32_filltable(uint32_t *tbl256, int endian) FAST_FUNC;
diff --git a/include/platform.h b/include/platform.h
index 4025561..1282306 100644
--- a/include/platform.h
+++ b/include/platform.h
@@ -205,6 +205,7 @@
 
 #include <stdint.h>
 typedef int      bb__aliased_int      FIX_ALIASING;
+typedef long     bb__aliased_long     FIX_ALIASING;
 typedef uint16_t bb__aliased_uint16_t FIX_ALIASING;
 typedef uint32_t bb__aliased_uint32_t FIX_ALIASING;
 
@@ -212,7 +213,8 @@
  * a lvalue. This makes it more likely to not swap them by mistake
  */
 #if defined(i386) || defined(__x86_64__) || defined(__powerpc__)
-# define move_from_unaligned_int(v, intp) ((v) = *(bb__aliased_int*)(intp))
+# define move_from_unaligned_int(v, intp)  ((v) = *(bb__aliased_int*)(intp))
+# define move_from_unaligned_long(v, longp) ((v) = *(bb__aliased_long*)(longp))
 # define move_from_unaligned16(v, u16p) ((v) = *(bb__aliased_uint16_t*)(u16p))
 # define move_from_unaligned32(v, u32p) ((v) = *(bb__aliased_uint32_t*)(u32p))
 # define move_to_unaligned16(u16p, v)   (*(bb__aliased_uint16_t*)(u16p) = (v))
@@ -221,6 +223,7 @@
 #else
 /* performs reasonably well (gcc usually inlines memcpy here) */
 # define move_from_unaligned_int(v, intp) (memcpy(&(v), (intp), sizeof(int)))
+# define move_from_unaligned_long(v, longp) (memcpy(&(v), (longp), sizeof(long)))
 # define move_from_unaligned16(v, u16p) (memcpy(&(v), (u16p), 2))
 # define move_from_unaligned32(v, u32p) (memcpy(&(v), (u32p), 4))
 # define move_to_unaligned16(u16p, v) do { \
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c
index a313c2a..06a2400 100644
--- a/libbb/hash_md5_sha.c
+++ b/libbb/hash_md5_sha.c
@@ -31,6 +31,11 @@
 	return (x >> n) | (x << (64 - n));
 }
 
+/* rotl64 only used for sha3 currently */
+static ALWAYS_INLINE uint64_t rotl64(uint64_t x, unsigned n)
+{
+	return (x << n) | (x >> (64 - n));
+}
 
 /* Feed data through a temporary buffer.
  * The internal buffer remembers previous data until it has 64
@@ -896,3 +901,192 @@
 	}
 	memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
 }
+
+
+/*
+ * The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+ * Michael Peeters and Gilles Van Assche. For more information, feedback or
+ * questions, please refer to our website: http://keccak.noekeon.org/
+ *
+ * Implementation by Ronny Van Keer,
+ * hereby denoted as "the implementer".
+ *
+ * To the extent possible under law, the implementer has waived all copyright
+ * and related or neighboring rights to the source code in this file.
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ *
+ * Busybox modifications (C) Lauri Kasanen, under the GPLv2.
+ */
+
+enum {
+	cKeccakR_SizeInBytes = 576 / 8,
+	cKeccakNumberOfRounds = 24,
+};
+
+static const uint64_t KeccakF_RoundConstants[cKeccakNumberOfRounds] = {
+	0x0000000000000001ULL,
+	0x0000000000008082ULL,
+	0x800000000000808aULL,
+	0x8000000080008000ULL,
+	0x000000000000808bULL,
+	0x0000000080000001ULL,
+	0x8000000080008081ULL,
+	0x8000000000008009ULL,
+	0x000000000000008aULL,
+	0x0000000000000088ULL,
+	0x0000000080008009ULL,
+	0x000000008000000aULL,
+	0x000000008000808bULL,
+	0x800000000000008bULL,
+	0x8000000000008089ULL,
+	0x8000000000008003ULL,
+	0x8000000000008002ULL,
+	0x8000000000000080ULL,
+	0x000000000000800aULL,
+	0x800000008000000aULL,
+	0x8000000080008081ULL,
+	0x8000000000008080ULL,
+	0x0000000080000001ULL,
+	0x8000000080008008ULL
+};
+
+static const uint8_t KeccakF_RotationConstants[25] = {
+	1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, 27, 41, 56, 8, 25, 43, 62,
+	18, 39, 61, 20, 44
+};
+
+static const uint8_t KeccakF_PiLane[25] = {
+	10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 15, 23, 19, 13, 12, 2, 20,
+	14, 22, 9, 6, 1
+};
+
+static const uint8_t KeccakF_Mod5[10] = {
+	0, 1, 2, 3, 4, 0, 1, 2, 3, 4
+};
+
+static void KeccakF(uint64_t *state)
+{
+	uint8_t x, y;
+	uint64_t temp;
+	uint64_t BC[5];
+	int round;
+
+	if (BB_BIG_ENDIAN) {
+		for (x = 0; x < 25; x++) {
+			state[x] = SWAP_LE64(state[x]);
+		}
+	}
+
+	for (round = 0; round < cKeccakNumberOfRounds; ++round) {
+		/* Theta */
+		for (x = 0; x < 5; ++x) {
+			BC[x] = state[x] ^ state[5 + x] ^ state[10 + x] ^
+				state[15 + x] ^ state[20 + x];
+		}
+		for (x = 0; x < 5; ++x) {
+			temp = BC[KeccakF_Mod5[x + 4]] ^
+				rotl64(BC[KeccakF_Mod5[x + 1]], 1);
+
+			for (y = 0; y <= 20; y += 5) {
+				state[y + x] ^= temp;
+			}
+		}
+
+		/* Rho Pi */
+		temp = state[1];
+		for (x = 0; x < 24; ++x) {
+			BC[0] = state[KeccakF_PiLane[x]];
+			state[KeccakF_PiLane[x]] =
+			    rotl64(temp, KeccakF_RotationConstants[x]);
+			temp = BC[0];
+		}
+
+		/* Chi */
+		for (y = 0; y < 25; y += 5) {
+			BC[0] = state[y + 0];
+			BC[1] = state[y + 1];
+			BC[2] = state[y + 2];
+			BC[3] = state[y + 3];
+			BC[4] = state[y + 4];
+			for (x = 0; x < 5; ++x) {
+				state[y + x] =
+				    BC[x] ^ ((~BC[KeccakF_Mod5[x + 1]]) &
+					     BC[KeccakF_Mod5[x + 2]]);
+			}
+		}
+
+		/* Iota */
+		state[0] ^= KeccakF_RoundConstants[round];
+	}
+
+	if (BB_BIG_ENDIAN) {
+		for (x = 0; x < 25; x++) {
+			state[x] = SWAP_LE64(state[x]);
+		}
+	}
+}
+
+void FAST_FUNC sha3_begin(sha3_ctx_t *ctx)
+{
+	memset(ctx, 0, sizeof(*ctx));
+}
+
+void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buf, size_t bytes)
+{
+	const uint8_t *data = buf;
+
+	/* If already data in queue, continue queuing first */
+	while (bytes != 0 && ctx->bytes_queued != 0) {
+		uint8_t *buffer = (uint8_t*)ctx->state;
+		buffer[ctx->bytes_queued] ^= *data++;
+		bytes--;
+		ctx->bytes_queued++;
+		if (ctx->bytes_queued == cKeccakR_SizeInBytes) {
+			KeccakF(ctx->state);
+			ctx->bytes_queued = 0;
+		}
+	}
+
+	/* Absorb complete blocks */
+	while (bytes >= cKeccakR_SizeInBytes) {
+		/* XOR data onto beginning of state[].
+		 * We try to be efficient - operate on word at a time, not byte.
+		 * Yet safe wrt unaligned access: can't just use "*(long*)data"...
+		 */
+		unsigned count = cKeccakR_SizeInBytes / sizeof(long);
+		long *buffer = (long*)ctx->state;
+		do {
+			long v;
+			move_from_unaligned_long(v, (long*)data);
+			*buffer++ ^= v;
+			data += sizeof(long);
+		} while (--count);
+
+		KeccakF(ctx->state);
+		bytes -= cKeccakR_SizeInBytes;
+	}
+
+	/* Queue remaining data bytes */
+	while (bytes != 0) {
+		uint8_t *buffer = (uint8_t*)ctx->state;
+		buffer[ctx->bytes_queued] ^= *data++;
+		ctx->bytes_queued++;
+		bytes--;
+	}
+}
+
+void FAST_FUNC sha3_end(sha3_ctx_t *ctx, uint8_t *hashval)
+{
+	/* Padding */
+	uint8_t *buffer = (uint8_t*)ctx->state;
+	/* 0 is the number of bits in last, incomplete byte
+	 * (that is, zero: we never have incomplete bytes):
+	 */
+	buffer[ctx->bytes_queued] ^= 1 << 0;
+	buffer[cKeccakR_SizeInBytes - 1] ^= 0x80;
+
+	KeccakF(ctx->state);
+
+	/* Output */
+	memcpy(hashval, ctx->state, 64);
+}
diff --git a/testsuite/sha3sum.tests b/testsuite/sha3sum.tests
new file mode 100755
index 0000000..82fada6
--- /dev/null
+++ b/testsuite/sha3sum.tests
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+. ./md5sum.tests sha3sum c29d77bc548fa2b20a04c861400a5360879c52156e2a54a3415b99a9a3123e1d5f36714a24eca8c1f05a8e2d8ba859c930d41141f64a255c6794436fc99c486a