blob: 02838c4960a707bad3ef4fab84527874d0eba9b0 [file] [log] [blame]
Denis Vlasenko77f1ec12007-10-13 03:36:03 +00001/*
2 * bzip2 is written by Julian Seward <jseward@bzip.org>.
3 * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4 * See README and LICENSE files in this directory for more information.
5 */
6
7/*-------------------------------------------------------------*/
8/*--- Huffman coding low-level stuff ---*/
9/*--- huffman.c ---*/
10/*-------------------------------------------------------------*/
11
12/* ------------------------------------------------------------------
13This file is part of bzip2/libbzip2, a program and library for
14lossless, block-sorting data compression.
15
16bzip2/libbzip2 version 1.0.4 of 20 December 2006
17Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18
19Please read the WARNING, DISCLAIMER and PATENTS sections in the
20README file.
21
22This program is released under the terms of the license contained
23in the file LICENSE.
24------------------------------------------------------------------ */
25
26/* #include "bzlib_private.h" */
27
28/*---------------------------------------------------*/
29#define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
30#define DEPTHOF(zz1) ((zz1) & 0x000000ff)
31#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
32
33#define ADDWEIGHTS(zw1,zw2) \
34 (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
35 (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
36
37#define UPHEAP(z) \
38{ \
39 int32_t zz, tmp; \
40 zz = z; \
41 tmp = heap[zz]; \
42 while (weight[tmp] < weight[heap[zz >> 1]]) { \
43 heap[zz] = heap[zz >> 1]; \
44 zz >>= 1; \
45 } \
46 heap[zz] = tmp; \
47}
48
Denis Vlasenko94359932007-10-14 01:37:53 +000049
50/* 90 bytes, 0.3% of overall compress speed */
51#if CONFIG_BZIP2_FEATURE_SPEED >= 1
52
53/* macro works better than inline (gcc 4.2.1) */
54#define DOWNHEAP1(heap, weight, Heap) \
Denis Vlasenko77f1ec12007-10-13 03:36:03 +000055{ \
56 int32_t zz, yy, tmp; \
Denis Vlasenko94359932007-10-14 01:37:53 +000057 zz = 1; \
58 tmp = heap[zz]; \
Denis Vlasenko77f1ec12007-10-13 03:36:03 +000059 while (1) { \
60 yy = zz << 1; \
61 if (yy > nHeap) \
62 break; \
63 if (yy < nHeap \
64 && weight[heap[yy+1]] < weight[heap[yy]]) \
65 yy++; \
66 if (weight[tmp] < weight[heap[yy]]) \
67 break; \
68 heap[zz] = heap[yy]; \
69 zz = yy; \
70 } \
71 heap[zz] = tmp; \
72}
73
Denis Vlasenko94359932007-10-14 01:37:53 +000074#else
75
76static
77void DOWNHEAP1(int32_t *heap, int32_t *weight, int32_t nHeap)
78{
79 int32_t zz, yy, tmp;
80 zz = 1;
81 tmp = heap[zz];
82 while (1) {
83 yy = zz << 1;
84 if (yy > nHeap)
85 break;
86 if (yy < nHeap
87 && weight[heap[yy + 1]] < weight[heap[yy]])
88 yy++;
89 if (weight[tmp] < weight[heap[yy]])
90 break;
91 heap[zz] = heap[yy];
92 zz = yy;
93 }
94 heap[zz] = tmp;
95}
96
97#endif
Denis Vlasenko77f1ec12007-10-13 03:36:03 +000098
99/*---------------------------------------------------*/
100static
Denis Vlasenkoef3aabe2007-10-14 00:43:01 +0000101void BZ2_hbMakeCodeLengths(uint8_t *len,
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000102 int32_t *freq,
103 int32_t alphaSize,
104 int32_t maxLen)
105{
106 /*
107 * Nodes and heap entries run from 1. Entry 0
108 * for both the heap and nodes is a sentinel.
109 */
110 int32_t nNodes, nHeap, n1, n2, i, j, k;
111 Bool tooLong;
112
113 int32_t heap [BZ_MAX_ALPHA_SIZE + 2];
114 int32_t weight[BZ_MAX_ALPHA_SIZE * 2];
115 int32_t parent[BZ_MAX_ALPHA_SIZE * 2];
116
117 for (i = 0; i < alphaSize; i++)
118 weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
119
120 while (1) {
121 nNodes = alphaSize;
122 nHeap = 0;
123
124 heap[0] = 0;
125 weight[0] = 0;
126 parent[0] = -2;
127
128 for (i = 1; i <= alphaSize; i++) {
129 parent[i] = -1;
130 nHeap++;
131 heap[nHeap] = i;
132 UPHEAP(nHeap);
133 }
134
135 AssertH(nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001);
Denis Vlasenko3f5fdc72007-10-14 04:55:59 +0000136
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000137 while (nHeap > 1) {
Denis Vlasenko94359932007-10-14 01:37:53 +0000138 n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap);
139 n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap);
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000140 nNodes++;
141 parent[n1] = parent[n2] = nNodes;
142 weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
143 parent[nNodes] = -1;
144 nHeap++;
145 heap[nHeap] = nNodes;
146 UPHEAP(nHeap);
147 }
148
149 AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002);
150
151 tooLong = False;
152 for (i = 1; i <= alphaSize; i++) {
153 j = 0;
154 k = i;
155 while (parent[k] >= 0) {
156 k = parent[k];
157 j++;
158 }
159 len[i-1] = j;
160 if (j > maxLen)
161 tooLong = True;
162 }
Denis Vlasenko3f5fdc72007-10-14 04:55:59 +0000163
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000164 if (!tooLong)
165 break;
166
167 /* 17 Oct 04: keep-going condition for the following loop used
168 to be 'i < alphaSize', which missed the last element,
169 theoretically leading to the possibility of the compressor
170 looping. However, this count-scaling step is only needed if
171 one of the generated Huffman code words is longer than
172 maxLen, which up to and including version 1.0.2 was 20 bits,
173 which is extremely unlikely. In version 1.0.3 maxLen was
174 changed to 17 bits, which has minimal effect on compression
175 ratio, but does mean this scaling step is used from time to
176 time, enough to verify that it works.
177
178 This means that bzip2-1.0.3 and later will only produce
179 Huffman codes with a maximum length of 17 bits. However, in
180 order to preserve backwards compatibility with bitstreams
181 produced by versions pre-1.0.3, the decompressor must still
182 handle lengths of up to 20. */
183
184 for (i = 1; i <= alphaSize; i++) {
185 j = weight[i] >> 8;
Denis Vlasenko6a9154b2007-10-14 07:49:48 +0000186 /* bbox: yes, it is a signed division.
187 * don't replace with shift! */
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000188 j = 1 + (j / 2);
189 weight[i] = j << 8;
190 }
191 }
192}
193
194
195/*---------------------------------------------------*/
196static
197void BZ2_hbAssignCodes(int32_t *code,
Denis Vlasenkoef3aabe2007-10-14 00:43:01 +0000198 uint8_t *length,
Denis Vlasenko77f1ec12007-10-13 03:36:03 +0000199 int32_t minLen,
200 int32_t maxLen,
201 int32_t alphaSize)
202{
203 int32_t n, vec, i;
204
205 vec = 0;
206 for (n = minLen; n <= maxLen; n++) {
207 for (i = 0; i < alphaSize; i++) {
208 if (length[i] == n) {
209 code[i] = vec;
210 vec++;
211 };
212 }
213 vec <<= 1;
214 }
215}
216
217
218/*-------------------------------------------------------------*/
219/*--- end huffman.c ---*/
220/*-------------------------------------------------------------*/