attack the biggest stack users:
-mkfs_minix_main [busybox_unstripped]: 4288
-mkfs_minix_main [busybox_unstripped]: 4276
-grave [busybox_unstripped]: 4260
(bzip2 users too - not listed)
price we pay in code size increase:
mainSort 2458 2515 +57
grave 1005 1058 +53
sendMTFValues 2177 2195 +18
BZ2_blockSort 122 125 +3
mkfs_minix_main 3070 3022 -48
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 4/1 up/down: 131/-48) Total: 83 bytes
diff --git a/archival/bz/blocksort.c b/archival/bz/blocksort.c
index cddbfcb..0e73ffe 100644
--- a/archival/bz/blocksort.c
+++ b/archival/bz/blocksort.c
@@ -721,7 +721,8 @@
#define CLEARMASK (~(SETMASK))
static NOINLINE
-void mainSort(uint32_t* ptr,
+void mainSort(EState* state,
+ uint32_t* ptr,
uint8_t* block,
uint16_t* quadrant,
uint32_t* ftab,
@@ -729,13 +730,18 @@
int32_t* budget)
{
int32_t i, j, k, ss, sb;
- int32_t runningOrder[256];
- Bool bigDone[256];
- int32_t copyStart[256];
- int32_t copyEnd [256];
uint8_t c1;
int32_t numQSorted;
uint16_t s;
+ Bool bigDone[256];
+ /* bbox: moved to EState to save stack
+ int32_t runningOrder[256];
+ int32_t copyStart[256];
+ int32_t copyEnd [256];
+ */
+#define runningOrder (state->mainSort__runningOrder)
+#define copyStart (state->mainSort__copyStart)
+#define copyEnd (state->mainSort__copyEnd)
/*-- set up the 2-byte frequency table --*/
/* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */
@@ -985,6 +991,9 @@
AssertH(((bbSize-1) >> shifts) <= 65535, 1002);
}
}
+#undef runningOrder
+#undef copyStart
+#undef copyEnd
}
#undef BIGFREQ
@@ -1041,7 +1050,7 @@
*/
budget = nblock * ((wfact-1) / 3);
- mainSort(ptr, block, quadrant, ftab, nblock, &budget);
+ mainSort(s, ptr, block, quadrant, ftab, nblock, &budget);
if (budget < 0) {
fallbackSort(s->arr1, s->arr2, ftab, nblock);
}
diff --git a/archival/bz/bzlib_private.h b/archival/bz/bzlib_private.h
index 02f177e..48676a3 100644
--- a/archival/bz/bzlib_private.h
+++ b/archival/bz/bzlib_private.h
@@ -178,13 +178,22 @@
uint8_t selector [BZ_MAX_SELECTORS];
uint8_t selectorMtf[BZ_MAX_SELECTORS];
- uint8_t len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- int32_t code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
- int32_t rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+
+ /* stack-saving measures: these can be local, but they are too big */
+ int32_t sendMTFValues__code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ int32_t sendMTFValues__rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
#if CONFIG_BZIP2_FEATURE_SPEED >= 5
/* second dimension: only 3 needed; 4 makes index calculations faster */
- uint32_t len_pack[BZ_MAX_ALPHA_SIZE][4];
+ uint32_t sendMTFValues__len_pack[BZ_MAX_ALPHA_SIZE][4];
#endif
+ int32_t BZ2_hbMakeCodeLengths__heap [BZ_MAX_ALPHA_SIZE + 2];
+ int32_t BZ2_hbMakeCodeLengths__weight[BZ_MAX_ALPHA_SIZE * 2];
+ int32_t BZ2_hbMakeCodeLengths__parent[BZ_MAX_ALPHA_SIZE * 2];
+
+ int32_t mainSort__runningOrder[256];
+ int32_t mainSort__copyStart[256];
+ int32_t mainSort__copyEnd[256];
} EState;
@@ -203,7 +212,7 @@
BZ2_hbAssignCodes(int32_t*, uint8_t*, int32_t, int32_t, int32_t);
static void
-BZ2_hbMakeCodeLengths(uint8_t*, int32_t*, int32_t, int32_t);
+BZ2_hbMakeCodeLengths(EState*, uint8_t*, int32_t*, int32_t, int32_t);
/*-------------------------------------------------------------*/
/*--- end bzlib_private.h ---*/
diff --git a/archival/bz/compress.c b/archival/bz/compress.c
index b72edbb..640b887 100644
--- a/archival/bz/compress.c
+++ b/archival/bz/compress.c
@@ -264,13 +264,16 @@
* are also globals only used in this proc.
* Made global to keep stack frame size small.
*/
+#define code sendMTFValues__code
+#define rfreq sendMTFValues__rfreq
+#define len_pack sendMTFValues__len_pack
uint16_t cost[BZ_N_GROUPS];
int32_t fave[BZ_N_GROUPS];
uint16_t* mtfv = s->mtfv;
- alphaSize = s->nInUse+2;
+ alphaSize = s->nInUse + 2;
for (t = 0; t < BZ_N_GROUPS; t++)
for (v = 0; v < alphaSize; v++)
s->len[t][v] = BZ_GREATER_ICOST;
@@ -453,7 +456,7 @@
/* maxLen was changed from 20 to 17 in bzip2-1.0.3. See
* comment in huffman.c for details. */
for (t = 0; t < nGroups; t++)
- BZ2_hbMakeCodeLengths(&(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/);
+ BZ2_hbMakeCodeLengths(s, &(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/);
}
AssertH(nGroups < 8, 3002);
@@ -602,6 +605,9 @@
selCtr++;
}
AssertH(selCtr == nSelectors, 3007);
+#undef code
+#undef rfreq
+#undef len_pack
}
diff --git a/archival/bz/huffman.c b/archival/bz/huffman.c
index 02838c4..676b1af 100644
--- a/archival/bz/huffman.c
+++ b/archival/bz/huffman.c
@@ -98,7 +98,8 @@
/*---------------------------------------------------*/
static
-void BZ2_hbMakeCodeLengths(uint8_t *len,
+void BZ2_hbMakeCodeLengths(EState *s,
+ uint8_t *len,
int32_t *freq,
int32_t alphaSize,
int32_t maxLen)
@@ -110,9 +111,14 @@
int32_t nNodes, nHeap, n1, n2, i, j, k;
Bool tooLong;
+ /* bbox: moved to EState to save stack
int32_t heap [BZ_MAX_ALPHA_SIZE + 2];
int32_t weight[BZ_MAX_ALPHA_SIZE * 2];
int32_t parent[BZ_MAX_ALPHA_SIZE * 2];
+ */
+#define heap (s->BZ2_hbMakeCodeLengths__heap)
+#define weight (s->BZ2_hbMakeCodeLengths__weight)
+#define parent (s->BZ2_hbMakeCodeLengths__parent)
for (i = 0; i < alphaSize; i++)
weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
@@ -189,6 +195,9 @@
weight[i] = j << 8;
}
}
+#undef heap
+#undef weight
+#undef parent
}