attack the biggest stack users:
-mkfs_minix_main [busybox_unstripped]:                  4288
-mkfs_minix_main [busybox_unstripped]:                  4276
-grave [busybox_unstripped]:                            4260
(bzip2 users too - not listed)

price we pay in code size increase:
mainSort                                            2458    2515     +57
grave                                               1005    1058     +53
sendMTFValues                                       2177    2195     +18
BZ2_blockSort                                        122     125      +3
mkfs_minix_main                                     3070    3022     -48
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 4/1 up/down: 131/-48)            Total: 83 bytes

diff --git a/archival/bz/blocksort.c b/archival/bz/blocksort.c
index cddbfcb..0e73ffe 100644
--- a/archival/bz/blocksort.c
+++ b/archival/bz/blocksort.c
@@ -721,7 +721,8 @@
 #define CLEARMASK (~(SETMASK))
 
 static NOINLINE
-void mainSort(uint32_t*   ptr,
+void mainSort(EState* state,
+		uint32_t* ptr,
 		uint8_t*  block,
 		uint16_t* quadrant,
 		uint32_t* ftab,
@@ -729,13 +730,18 @@
 		int32_t*  budget)
 {
 	int32_t  i, j, k, ss, sb;
-	int32_t  runningOrder[256];
-	Bool     bigDone[256];
-	int32_t  copyStart[256];
-	int32_t  copyEnd  [256];
 	uint8_t  c1;
 	int32_t  numQSorted;
 	uint16_t s;
+	Bool     bigDone[256];
+	/* bbox: moved to EState to save stack
+	int32_t  runningOrder[256];
+	int32_t  copyStart[256];
+	int32_t  copyEnd  [256];
+	*/
+#define runningOrder (state->mainSort__runningOrder)
+#define copyStart    (state->mainSort__copyStart)
+#define copyEnd      (state->mainSort__copyEnd)
 
 	/*-- set up the 2-byte frequency table --*/
 	/* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */
@@ -985,6 +991,9 @@
 			AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
 		}
 	}
+#undef runningOrder
+#undef copyStart
+#undef copyEnd
 }
 
 #undef BIGFREQ
@@ -1041,7 +1050,7 @@
 		 */
 		budget = nblock * ((wfact-1) / 3);
 
-		mainSort(ptr, block, quadrant, ftab, nblock, &budget);
+		mainSort(s, ptr, block, quadrant, ftab, nblock, &budget);
 		if (budget < 0) {
 			fallbackSort(s->arr1, s->arr2, ftab, nblock);
 		}
diff --git a/archival/bz/bzlib_private.h b/archival/bz/bzlib_private.h
index 02f177e..48676a3 100644
--- a/archival/bz/bzlib_private.h
+++ b/archival/bz/bzlib_private.h
@@ -178,13 +178,22 @@
 	uint8_t  selector   [BZ_MAX_SELECTORS];
 	uint8_t  selectorMtf[BZ_MAX_SELECTORS];
 
-	uint8_t  len  [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
-	int32_t  code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
-	int32_t  rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+	uint8_t  len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+
+	/* stack-saving measures: these can be local, but they are too big */
+	int32_t  sendMTFValues__code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+	int32_t  sendMTFValues__rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
 #if CONFIG_BZIP2_FEATURE_SPEED >= 5
 	/* second dimension: only 3 needed; 4 makes index calculations faster */
-	uint32_t len_pack[BZ_MAX_ALPHA_SIZE][4];
+	uint32_t sendMTFValues__len_pack[BZ_MAX_ALPHA_SIZE][4];
 #endif
+	int32_t  BZ2_hbMakeCodeLengths__heap  [BZ_MAX_ALPHA_SIZE + 2];
+	int32_t  BZ2_hbMakeCodeLengths__weight[BZ_MAX_ALPHA_SIZE * 2];
+	int32_t  BZ2_hbMakeCodeLengths__parent[BZ_MAX_ALPHA_SIZE * 2];
+
+	int32_t  mainSort__runningOrder[256];
+	int32_t  mainSort__copyStart[256];
+	int32_t  mainSort__copyEnd[256];
 } EState;
 
 
@@ -203,7 +212,7 @@
 BZ2_hbAssignCodes(int32_t*, uint8_t*, int32_t, int32_t, int32_t);
 
 static void
-BZ2_hbMakeCodeLengths(uint8_t*, int32_t*, int32_t, int32_t);
+BZ2_hbMakeCodeLengths(EState*, uint8_t*, int32_t*, int32_t, int32_t);
 
 /*-------------------------------------------------------------*/
 /*--- end                                   bzlib_private.h ---*/
diff --git a/archival/bz/compress.c b/archival/bz/compress.c
index b72edbb..640b887 100644
--- a/archival/bz/compress.c
+++ b/archival/bz/compress.c
@@ -264,13 +264,16 @@
 	 * are also globals only used in this proc.
 	 * Made global to keep stack frame size small.
 	 */
+#define code     sendMTFValues__code
+#define rfreq    sendMTFValues__rfreq
+#define len_pack sendMTFValues__len_pack
 
 	uint16_t cost[BZ_N_GROUPS];
 	int32_t  fave[BZ_N_GROUPS];
 
 	uint16_t* mtfv = s->mtfv;
 
-	alphaSize = s->nInUse+2;
+	alphaSize = s->nInUse + 2;
 	for (t = 0; t < BZ_N_GROUPS; t++)
 		for (v = 0; v < alphaSize; v++)
 			s->len[t][v] = BZ_GREATER_ICOST;
@@ -453,7 +456,7 @@
 		/* maxLen was changed from 20 to 17 in bzip2-1.0.3.  See
 		 * comment in huffman.c for details. */
 		for (t = 0; t < nGroups; t++)
-			BZ2_hbMakeCodeLengths(&(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/);
+			BZ2_hbMakeCodeLengths(s, &(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/);
 	}
 
 	AssertH(nGroups < 8, 3002);
@@ -602,6 +605,9 @@
 		selCtr++;
 	}
 	AssertH(selCtr == nSelectors, 3007);
+#undef code
+#undef rfreq
+#undef len_pack
 }
 
 
diff --git a/archival/bz/huffman.c b/archival/bz/huffman.c
index 02838c4..676b1af 100644
--- a/archival/bz/huffman.c
+++ b/archival/bz/huffman.c
@@ -98,7 +98,8 @@
 
 /*---------------------------------------------------*/
 static
-void BZ2_hbMakeCodeLengths(uint8_t *len,
+void BZ2_hbMakeCodeLengths(EState *s,
+		uint8_t *len,
 		int32_t *freq,
 		int32_t alphaSize,
 		int32_t maxLen)
@@ -110,9 +111,14 @@
 	int32_t nNodes, nHeap, n1, n2, i, j, k;
 	Bool  tooLong;
 
+	/* bbox: moved to EState to save stack
 	int32_t heap  [BZ_MAX_ALPHA_SIZE + 2];
 	int32_t weight[BZ_MAX_ALPHA_SIZE * 2];
 	int32_t parent[BZ_MAX_ALPHA_SIZE * 2];
+	*/
+#define heap   (s->BZ2_hbMakeCodeLengths__heap)
+#define weight (s->BZ2_hbMakeCodeLengths__weight)
+#define parent (s->BZ2_hbMakeCodeLengths__parent)
 
 	for (i = 0; i < alphaSize; i++)
 		weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
@@ -189,6 +195,9 @@
 			weight[i] = j << 8;
 		}
 	}
+#undef heap
+#undef weight
+#undef parent
 }