A really nice patch from Manuel Novoa III for compile time
configurable size/speed tradeoffs.
diff --git a/md5sum.c b/md5sum.c
index dcb05c1..643f827 100644
--- a/md5sum.c
+++ b/md5sum.c
@@ -20,6 +20,24 @@
/* Written by Ulrich Drepper <drepper@gnu.ai.mit.edu> */
/* Hacked to work with BusyBox by Alfred M. Szmidt <ams@trillian.itslinux.org> */
+/*
+ * June 29, 2001 Manuel Novoa III
+ *
+ * Added MD5SUM_SIZE_VS_SPEED configuration option.
+ *
+ * Current valid values, with data from my system for comparison, are:
+ * (using uClibc and running on linux-2.4.4.tar.bz2)
+ * user times (sec) text size (386)
+ * 0 (fastest) 1.1 6144
+ * 1 1.4 5392
+ * 2 3.0 5088
+ * 3 (smallest) 5.1 4912
+ */
+
+#define MD5SUM_SIZE_VS_SPEED 2
+
+/**********************************************************************/
+
#include <stdio.h>
#include <errno.h>
#include <ctype.h>
@@ -184,9 +202,11 @@
+#if MD5SUM_SIZE_VS_SPEED == 0
/* This array contains the bytes used to pad the buffer to the next
64-byte boundary. (RFC 1321, 3.1: Step 1) */
static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ };
+#endif
/* Initialize structure containing state of computation.
(RFC 1321, 3.3: Step 3) */
@@ -233,7 +253,12 @@
++ctx->total[1];
pad = bytes >= 56 ? 64 + 56 - bytes : 56 - bytes;
+#if MD5SUM_SIZE_VS_SPEED > 0
+ memset(&ctx->buffer[bytes], 0, pad);
+ ctx->buffer[bytes] = 0x80;
+#else
memcpy(&ctx->buffer[bytes], fillbuf, pad);
+#endif
/* Put the 64-bit file length in *bits* at the end of the buffer. */
*(md5_uint32 *) & ctx->buffer[bytes + pad] = SWAP(ctx->total[0] << 3);
@@ -369,6 +394,49 @@
const md5_uint32 *words = buffer;
size_t nwords = len / sizeof(md5_uint32);
const md5_uint32 *endp = words + nwords;
+#if MD5SUM_SIZE_VS_SPEED > 0
+ static const md5_uint32 C_array[] = {
+ /* round 1 */
+ 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
+ 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
+ 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
+ 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
+ /* round 2 */
+ 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
+ 0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8,
+ 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
+ 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
+ /* round 3 */
+ 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
+ 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
+ 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
+ 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
+ /* round 4 */
+ 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
+ 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
+ 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
+ 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
+ };
+
+ static const char P_array[] = {
+#if MD5SUM_SIZE_VS_SPEED > 1
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */
+#endif
+ 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */
+ 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */
+ 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 /* 4 */
+ };
+
+#if MD5SUM_SIZE_VS_SPEED > 1
+ static const char S_array[] = {
+ 7, 12, 17, 22,
+ 5, 9, 14, 20,
+ 4, 11, 16, 23,
+ 6, 10, 15, 21
+ };
+#endif
+#endif
+
md5_uint32 A = ctx->A;
md5_uint32 B = ctx->B;
md5_uint32 C = ctx->C;
@@ -390,6 +458,79 @@
md5_uint32 C_save = C;
md5_uint32 D_save = D;
+#if MD5SUM_SIZE_VS_SPEED > 1
+#define CYCLIC(w, s) (w = (w << s) | (w >> (32 - s)))
+
+ const md5_uint32 *pc;
+ const char *pp;
+ const char *ps;
+ int i;
+ md5_uint32 temp;
+
+ for ( i=0 ; i < 16 ; i++ ) {
+ cwp[i] = SWAP(words[i]);
+ }
+ words += 16;
+
+#if MD5SUM_SIZE_VS_SPEED > 2
+ pc = C_array; pp = P_array; ps = S_array - 4;
+
+ for ( i = 0 ; i < 64 ; i++ ) {
+ if ((i&0x0f) == 0) ps += 4;
+ temp = A;
+ switch (i>>4) {
+ case 0:
+ temp += FF(B,C,D);
+ break;
+ case 1:
+ temp += FG(B,C,D);
+ break;
+ case 2:
+ temp += FH(B,C,D);
+ break;
+ case 3:
+ temp += FI(B,C,D);
+ break;
+ }
+ temp += cwp[(int)(*pp++)] + *pc++;
+ temp = CYCLIC (temp, ps[i&3]);
+ temp += B;
+ A = D; D = C; C = B; B = temp;
+ }
+#else
+ pc = C_array; pp = P_array; ps = S_array;
+
+ for ( i = 0 ; i < 16 ; i++ ) {
+ temp = A + FF(B,C,D) + cwp[(int)(*pp++)] + *pc++;
+ temp = CYCLIC (temp, ps[i&3]);
+ temp += B;
+ A = D; D = C; C = B; B = temp;
+ }
+
+ ps += 4;
+ for ( i = 0 ; i < 16 ; i++ ) {
+ temp = A + FG(B,C,D) + cwp[(int)(*pp++)] + *pc++;
+ temp = CYCLIC (temp, ps[i&3]);
+ temp += B;
+ A = D; D = C; C = B; B = temp;
+ }
+ ps += 4;
+ for ( i = 0 ; i < 16 ; i++ ) {
+ temp = A + FH(B,C,D) + cwp[(int)(*pp++)] + *pc++;
+ temp = CYCLIC (temp, ps[i&3]);
+ temp += B;
+ A = D; D = C; C = B; B = temp;
+ }
+ ps += 4;
+ for ( i = 0 ; i < 16 ; i++ ) {
+ temp = A + FI(B,C,D) + cwp[(int)(*pp++)] + *pc++;
+ temp = CYCLIC (temp, ps[i&3]);
+ temp += B;
+ A = D; D = C; C = B; B = temp;
+ }
+
+#endif
+#else
/* First round: using the given function, the context and a constant
the next context is computed. Because the algorithms processing
unit is a 32-bit word and it is determined to work on words in
@@ -417,7 +558,22 @@
T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64
*/
+#if MD5SUM_SIZE_VS_SPEED == 1
+ const md5_uint32 *pc;
+ const char *pp;
+ int i;
+#endif
+
/* Round 1. */
+#if MD5SUM_SIZE_VS_SPEED == 1
+ pc = C_array;
+ for ( i=0 ; i < 4 ; i++ ) {
+ OP(A, B, C, D, 7, *pc++);
+ OP(D, A, B, C, 12, *pc++);
+ OP(C, D, A, B, 17, *pc++);
+ OP(B, C, D, A, 22, *pc++);
+ }
+#else
OP(A, B, C, D, 7, 0xd76aa478);
OP(D, A, B, C, 12, 0xe8c7b756);
OP(C, D, A, B, 17, 0x242070db);
@@ -434,6 +590,7 @@
OP(D, A, B, C, 12, 0xfd987193);
OP(C, D, A, B, 17, 0xa679438e);
OP(B, C, D, A, 22, 0x49b40821);
+#endif
/* For the second to fourth round we have the possibly swapped words
in CORRECT_WORDS. Redefine the macro to take an additional first
@@ -449,6 +606,15 @@
while (0)
/* Round 2. */
+#if MD5SUM_SIZE_VS_SPEED == 1
+ pp = P_array;
+ for ( i=0 ; i < 4 ; i++ ) {
+ OP(FG, A, B, C, D, (int)(*pp++), 5, *pc++);
+ OP(FG, D, A, B, C, (int)(*pp++), 9, *pc++);
+ OP(FG, C, D, A, B, (int)(*pp++), 14, *pc++);
+ OP(FG, B, C, D, A, (int)(*pp++), 20, *pc++);
+ }
+#else
OP(FG, A, B, C, D, 1, 5, 0xf61e2562);
OP(FG, D, A, B, C, 6, 9, 0xc040b340);
OP(FG, C, D, A, B, 11, 14, 0x265e5a51);
@@ -465,8 +631,17 @@
OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8);
OP(FG, C, D, A, B, 7, 14, 0x676f02d9);
OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
+#endif
/* Round 3. */
+#if MD5SUM_SIZE_VS_SPEED == 1
+ for ( i=0 ; i < 4 ; i++ ) {
+ OP(FH, A, B, C, D, (int)(*pp++), 4, *pc++);
+ OP(FH, D, A, B, C, (int)(*pp++), 11, *pc++);
+ OP(FH, C, D, A, B, (int)(*pp++), 16, *pc++);
+ OP(FH, B, C, D, A, (int)(*pp++), 23, *pc++);
+ }
+#else
OP(FH, A, B, C, D, 5, 4, 0xfffa3942);
OP(FH, D, A, B, C, 8, 11, 0x8771f681);
OP(FH, C, D, A, B, 11, 16, 0x6d9d6122);
@@ -483,8 +658,17 @@
OP(FH, D, A, B, C, 12, 11, 0xe6db99e5);
OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8);
OP(FH, B, C, D, A, 2, 23, 0xc4ac5665);
+#endif
/* Round 4. */
+#if MD5SUM_SIZE_VS_SPEED == 1
+ for ( i=0 ; i < 4 ; i++ ) {
+ OP(FI, A, B, C, D, (int)(*pp++), 6, *pc++);
+ OP(FI, D, A, B, C, (int)(*pp++), 10, *pc++);
+ OP(FI, C, D, A, B, (int)(*pp++), 15, *pc++);
+ OP(FI, B, C, D, A, (int)(*pp++), 21, *pc++);
+ }
+#else
OP(FI, A, B, C, D, 0, 6, 0xf4292244);
OP(FI, D, A, B, C, 7, 10, 0x432aff97);
OP(FI, C, D, A, B, 14, 15, 0xab9423a7);
@@ -501,6 +685,8 @@
OP(FI, D, A, B, C, 11, 10, 0xbd3af235);
OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
OP(FI, B, C, D, A, 9, 21, 0xeb86d391);
+#endif
+#endif
/* Add the starting values of the context. */
A += A_save;