blob: dc851cd3fb407e567b17de05b518c4cad098a83b [file] [log] [blame]
Denis Vlasenko77f1ec12007-10-13 03:36:03 +00001/*
2 * bzip2 is written by Julian Seward <jseward@bzip.org>.
3 * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4 * See README and LICENSE files in this directory for more information.
5 */
6
7/*-------------------------------------------------------------*/
8/*--- Huffman coding low-level stuff ---*/
9/*--- huffman.c ---*/
10/*-------------------------------------------------------------*/
11
12/* ------------------------------------------------------------------
13This file is part of bzip2/libbzip2, a program and library for
14lossless, block-sorting data compression.
15
16bzip2/libbzip2 version 1.0.4 of 20 December 2006
17Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18
19Please read the WARNING, DISCLAIMER and PATENTS sections in the
20README file.
21
22This program is released under the terms of the license contained
23in the file LICENSE.
24------------------------------------------------------------------ */
25
26/* #include "bzlib_private.h" */
27
28/*---------------------------------------------------*/
29#define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
30#define DEPTHOF(zz1) ((zz1) & 0x000000ff)
31#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
32
33#define ADDWEIGHTS(zw1,zw2) \
34 (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
35 (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
36
37#define UPHEAP(z) \
38{ \
39 int32_t zz, tmp; \
40 zz = z; \
41 tmp = heap[zz]; \
42 while (weight[tmp] < weight[heap[zz >> 1]]) { \
43 heap[zz] = heap[zz >> 1]; \
44 zz >>= 1; \
45 } \
46 heap[zz] = tmp; \
47}
48
Denis Vlasenko94359932007-10-14 01:37:53 +000049
50/* 90 bytes, 0.3% of overall compress speed */
Denys Vlasenko4cae0442018-02-07 01:33:25 +010051#if BZIP2_SPEED >= 1
Denis Vlasenko94359932007-10-14 01:37:53 +000052
53/* macro works better than inline (gcc 4.2.1) */
54#define DOWNHEAP1(heap, weight, Heap) \
Denis Vlasenko77f1ec12007-10-13 03:36:03 +000055{ \
56 int32_t zz, yy, tmp; \
Denis Vlasenko94359932007-10-14 01:37:53 +000057 zz = 1; \
58 tmp = heap[zz]; \
Denis Vlasenko77f1ec12007-10-13 03:36:03 +000059 while (1) { \
60 yy = zz << 1; \
61 if (yy > nHeap) \
62 break; \
63 if (yy < nHeap \
64 && weight[heap[yy+1]] < weight[heap[yy]]) \
65 yy++; \
66 if (weight[tmp] < weight[heap[yy]]) \
67 break; \
68 heap[zz] = heap[yy]; \
69 zz = yy; \
70 } \
71 heap[zz] = tmp; \
72}
73
Denis Vlasenko94359932007-10-14 01:37:53 +000074#else
75
76static
77void DOWNHEAP1(int32_t *heap, int32_t *weight, int32_t nHeap)
78{
79 int32_t zz, yy, tmp;
80 zz = 1;
81 tmp = heap[zz];
82 while (1) {
83 yy = zz << 1;
84 if (yy > nHeap)
85 break;
86 if (yy < nHeap
87 && weight[heap[yy + 1]] < weight[heap[yy]])
88 yy++;
89 if (weight[tmp] < weight[heap[yy]])
90 break;
91 heap[zz] = heap[yy];
92 zz = yy;
93 }
94 heap[zz] = tmp;
95}
96
97#endif
Denis Vlasenko77f1ec12007-10-13 03:36:03 +000098
99/*---------------------------------------------------*/
100static
Denis Vlasenkoab801872007-12-02 08:35:37 +0000101void BZ2_hbMakeCodeLengths(EState *s,
102 uint8_t *len,
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000103 int32_t *freq,
104 int32_t alphaSize,
105 int32_t maxLen)
106{
107 /*
108 * Nodes and heap entries run from 1. Entry 0
109 * for both the heap and nodes is a sentinel.
110 */
111 int32_t nNodes, nHeap, n1, n2, i, j, k;
112 Bool tooLong;
113
Denis Vlasenkoab801872007-12-02 08:35:37 +0000114 /* bbox: moved to EState to save stack
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000115 int32_t heap [BZ_MAX_ALPHA_SIZE + 2];
116 int32_t weight[BZ_MAX_ALPHA_SIZE * 2];
117 int32_t parent[BZ_MAX_ALPHA_SIZE * 2];
Denis Vlasenkoab801872007-12-02 08:35:37 +0000118 */
119#define heap (s->BZ2_hbMakeCodeLengths__heap)
120#define weight (s->BZ2_hbMakeCodeLengths__weight)
121#define parent (s->BZ2_hbMakeCodeLengths__parent)
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000122
123 for (i = 0; i < alphaSize; i++)
124 weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
125
126 while (1) {
127 nNodes = alphaSize;
128 nHeap = 0;
129
130 heap[0] = 0;
131 weight[0] = 0;
132 parent[0] = -2;
133
134 for (i = 1; i <= alphaSize; i++) {
135 parent[i] = -1;
136 nHeap++;
137 heap[nHeap] = i;
138 UPHEAP(nHeap);
139 }
140
141 AssertH(nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001);
Denis Vlasenko3f5fdc72007-10-14 04:55:59 +0000142
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000143 while (nHeap > 1) {
Denis Vlasenko94359932007-10-14 01:37:53 +0000144 n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap);
145 n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap);
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000146 nNodes++;
147 parent[n1] = parent[n2] = nNodes;
148 weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
149 parent[nNodes] = -1;
150 nHeap++;
151 heap[nHeap] = nNodes;
152 UPHEAP(nHeap);
153 }
154
155 AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002);
156
157 tooLong = False;
158 for (i = 1; i <= alphaSize; i++) {
159 j = 0;
160 k = i;
161 while (parent[k] >= 0) {
162 k = parent[k];
163 j++;
164 }
165 len[i-1] = j;
166 if (j > maxLen)
167 tooLong = True;
168 }
Denis Vlasenko3f5fdc72007-10-14 04:55:59 +0000169
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000170 if (!tooLong)
171 break;
172
173 /* 17 Oct 04: keep-going condition for the following loop used
174 to be 'i < alphaSize', which missed the last element,
175 theoretically leading to the possibility of the compressor
176 looping. However, this count-scaling step is only needed if
177 one of the generated Huffman code words is longer than
178 maxLen, which up to and including version 1.0.2 was 20 bits,
179 which is extremely unlikely. In version 1.0.3 maxLen was
180 changed to 17 bits, which has minimal effect on compression
181 ratio, but does mean this scaling step is used from time to
182 time, enough to verify that it works.
183
184 This means that bzip2-1.0.3 and later will only produce
185 Huffman codes with a maximum length of 17 bits. However, in
186 order to preserve backwards compatibility with bitstreams
187 produced by versions pre-1.0.3, the decompressor must still
188 handle lengths of up to 20. */
189
190 for (i = 1; i <= alphaSize; i++) {
191 j = weight[i] >> 8;
Denis Vlasenko6a9154b2007-10-14 07:49:48 +0000192 /* bbox: yes, it is a signed division.
193 * don't replace with shift! */
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000194 j = 1 + (j / 2);
195 weight[i] = j << 8;
196 }
197 }
Denis Vlasenkoab801872007-12-02 08:35:37 +0000198#undef heap
199#undef weight
200#undef parent
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000201}
202
203
204/*---------------------------------------------------*/
205static
206void BZ2_hbAssignCodes(int32_t *code,
Denis Vlasenkoef3aabe2007-10-14 00:43:01 +0000207 uint8_t *length,
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000208 int32_t minLen,
209 int32_t maxLen,
210 int32_t alphaSize)
211{
212 int32_t n, vec, i;
213
214 vec = 0;
215 for (n = minLen; n <= maxLen; n++) {
216 for (i = 0; i < alphaSize; i++) {
217 if (length[i] == n) {
218 code[i] = vec;
219 vec++;
Denys Vlasenko1cbcb022018-02-03 15:26:00 +0100220 }
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000221 }
222 vec <<= 1;
223 }
224}
225
226
227/*-------------------------------------------------------------*/
228/*--- end huffman.c ---*/
229/*-------------------------------------------------------------*/