blob: d01af11c24c6b5c4ea580126e6465e68846a4071 [file] [log] [blame]
Denis Vlasenko77f1ec12007-10-13 03:36:03 +00001/*
2 * bzip2 is written by Julian Seward <jseward@bzip.org>.
3 * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>.
4 * See README and LICENSE files in this directory for more information.
5 */
6
7/*-------------------------------------------------------------*/
8/*--- Huffman coding low-level stuff ---*/
9/*--- huffman.c ---*/
10/*-------------------------------------------------------------*/
11
12/* ------------------------------------------------------------------
13This file is part of bzip2/libbzip2, a program and library for
14lossless, block-sorting data compression.
15
16bzip2/libbzip2 version 1.0.4 of 20 December 2006
17Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
18
19Please read the WARNING, DISCLAIMER and PATENTS sections in the
20README file.
21
22This program is released under the terms of the license contained
23in the file LICENSE.
24------------------------------------------------------------------ */
25
26/* #include "bzlib_private.h" */
27
28/*---------------------------------------------------*/
29#define WEIGHTOF(zz0) ((zz0) & 0xffffff00)
30#define DEPTHOF(zz1) ((zz1) & 0x000000ff)
31#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
32
33#define ADDWEIGHTS(zw1,zw2) \
34 (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \
35 (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
36
37#define UPHEAP(z) \
38{ \
39 int32_t zz, tmp; \
40 zz = z; \
41 tmp = heap[zz]; \
42 while (weight[tmp] < weight[heap[zz >> 1]]) { \
43 heap[zz] = heap[zz >> 1]; \
44 zz >>= 1; \
45 } \
46 heap[zz] = tmp; \
47}
48
49#define DOWNHEAP(z) \
50{ \
51 int32_t zz, yy, tmp; \
52 zz = z; tmp = heap[zz]; \
53 while (1) { \
54 yy = zz << 1; \
55 if (yy > nHeap) \
56 break; \
57 if (yy < nHeap \
58 && weight[heap[yy+1]] < weight[heap[yy]]) \
59 yy++; \
60 if (weight[tmp] < weight[heap[yy]]) \
61 break; \
62 heap[zz] = heap[yy]; \
63 zz = yy; \
64 } \
65 heap[zz] = tmp; \
66}
67
68
69/*---------------------------------------------------*/
70static
71void BZ2_hbMakeCodeLengths(UChar *len,
72 int32_t *freq,
73 int32_t alphaSize,
74 int32_t maxLen)
75{
76 /*
77 * Nodes and heap entries run from 1. Entry 0
78 * for both the heap and nodes is a sentinel.
79 */
80 int32_t nNodes, nHeap, n1, n2, i, j, k;
81 Bool tooLong;
82
83 int32_t heap [BZ_MAX_ALPHA_SIZE + 2];
84 int32_t weight[BZ_MAX_ALPHA_SIZE * 2];
85 int32_t parent[BZ_MAX_ALPHA_SIZE * 2];
86
87 for (i = 0; i < alphaSize; i++)
88 weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
89
90 while (1) {
91 nNodes = alphaSize;
92 nHeap = 0;
93
94 heap[0] = 0;
95 weight[0] = 0;
96 parent[0] = -2;
97
98 for (i = 1; i <= alphaSize; i++) {
99 parent[i] = -1;
100 nHeap++;
101 heap[nHeap] = i;
102 UPHEAP(nHeap);
103 }
104
105 AssertH(nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001);
106
107 while (nHeap > 1) {
108 n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
109 n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
110 nNodes++;
111 parent[n1] = parent[n2] = nNodes;
112 weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
113 parent[nNodes] = -1;
114 nHeap++;
115 heap[nHeap] = nNodes;
116 UPHEAP(nHeap);
117 }
118
119 AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002);
120
121 tooLong = False;
122 for (i = 1; i <= alphaSize; i++) {
123 j = 0;
124 k = i;
125 while (parent[k] >= 0) {
126 k = parent[k];
127 j++;
128 }
129 len[i-1] = j;
130 if (j > maxLen)
131 tooLong = True;
132 }
133
134 if (!tooLong)
135 break;
136
137 /* 17 Oct 04: keep-going condition for the following loop used
138 to be 'i < alphaSize', which missed the last element,
139 theoretically leading to the possibility of the compressor
140 looping. However, this count-scaling step is only needed if
141 one of the generated Huffman code words is longer than
142 maxLen, which up to and including version 1.0.2 was 20 bits,
143 which is extremely unlikely. In version 1.0.3 maxLen was
144 changed to 17 bits, which has minimal effect on compression
145 ratio, but does mean this scaling step is used from time to
146 time, enough to verify that it works.
147
148 This means that bzip2-1.0.3 and later will only produce
149 Huffman codes with a maximum length of 17 bits. However, in
150 order to preserve backwards compatibility with bitstreams
151 produced by versions pre-1.0.3, the decompressor must still
152 handle lengths of up to 20. */
153
154 for (i = 1; i <= alphaSize; i++) {
155 j = weight[i] >> 8;
156 j = 1 + (j / 2);
157 weight[i] = j << 8;
158 }
159 }
160}
161
162
163/*---------------------------------------------------*/
164static
165void BZ2_hbAssignCodes(int32_t *code,
166 UChar *length,
167 int32_t minLen,
168 int32_t maxLen,
169 int32_t alphaSize)
170{
171 int32_t n, vec, i;
172
173 vec = 0;
174 for (n = minLen; n <= maxLen; n++) {
175 for (i = 0; i < alphaSize; i++) {
176 if (length[i] == n) {
177 code[i] = vec;
178 vec++;
179 };
180 }
181 vec <<= 1;
182 }
183}
184
185
186/*-------------------------------------------------------------*/
187/*--- end huffman.c ---*/
188/*-------------------------------------------------------------*/