blob: 8ce3cba748c8322d6632d77dbe0fb221eb1a87c4 [file] [log] [blame]
"Robert P. J. Day"63fc1a92006-07-02 19:47:05 +00001/* vi: set sw=4 ts=4: */
Robert Griebl94a6a952002-05-19 19:00:14 +00002#include "libbb.h"
3
Robert Griebl94a6a952002-05-19 19:00:14 +00004/* uncompress for busybox -- (c) 2002 Robert Griebl
5 *
Eric Andersenc7bda1c2004-03-15 08:29:22 +00006 * based on the original compress42.c source
Robert Griebl94a6a952002-05-19 19:00:14 +00007 * (see disclaimer below)
8 */
9
10
11/* (N)compress42.c - File compression ala IEEE Computer, Mar 1992.
12 *
13 * Authors:
14 * Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
15 * Jim McKie (decvax!mcvax!jim)
16 * Steve Davies (decvax!vax135!petsd!peora!srd)
17 * Ken Turkowski (decvax!decwrl!turtlevax!ken)
18 * James A. Woods (decvax!ihnp4!ames!jaw)
19 * Joe Orost (decvax!vax135!petsd!joe)
20 * Dave Mack (csu@alembic.acs.com)
21 * Peter Jannesen, Network Communication Systems
22 * (peter@ncs.nl)
23 *
24 * marc@suse.de : a small security fix for a buffer overflow
25 *
26 * [... History snipped ...]
27 *
28 */
Glenn L McGrath2e41d0c2002-09-27 06:46:02 +000029#include <stdio.h>
30#include <string.h>
31#include <unistd.h>
Robert Griebl94a6a952002-05-19 19:00:14 +000032
Eric Andersenaff114c2004-04-14 17:51:38 +000033/* Default input buffer size */
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000034#define IBUFSIZ 2048
Robert Griebl94a6a952002-05-19 19:00:14 +000035
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000036/* Default output buffer size */
37#define OBUFSIZ 2048
Robert Griebl94a6a952002-05-19 19:00:14 +000038
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000039/* Defines for third byte of header */
40#define MAGIC_1 (char_type)'\037' /* First byte of compressed file */
41#define MAGIC_2 (char_type)'\235' /* Second byte of compressed file */
42#define BIT_MASK 0x1f /* Mask for 'number of compresssion bits' */
43 /* Masks 0x20 and 0x40 are free. */
44 /* I think 0x20 should mean that there is */
45 /* a fourth header byte (for expansion). */
46#define BLOCK_MODE 0x80 /* Block compresssion if table is full and */
47 /* compression rate is dropping flush tables */
48 /* the next two codes should not be changed lightly, as they must not */
49 /* lie within the contiguous general code space. */
50#define FIRST 257 /* first free entry */
51#define CLEAR 256 /* table clear output code */
Robert Griebl94a6a952002-05-19 19:00:14 +000052
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000053#define INIT_BITS 9 /* initial number of bits/code */
Robert Griebl94a6a952002-05-19 19:00:14 +000054
55
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000056/* machine variants which require cc -Dmachine: pdp11, z8000, DOS */
Robert Griebl94a6a952002-05-19 19:00:14 +000057#define FAST
58
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000059#define HBITS 17 /* 50% occupancy */
Robert Griebl94a6a952002-05-19 19:00:14 +000060#define HSIZE (1<<HBITS)
61#define HMASK (HSIZE-1)
62#define HPRIME 9941
63#define BITS 16
64#undef MAXSEG_64K
Robert Griebl94a6a952002-05-19 19:00:14 +000065#define MAXCODE(n) (1L << (n))
66
Robert Griebl94a6a952002-05-19 19:00:14 +000067#define htabof(i) htab[i]
68#define codetabof(i) codetab[i]
69#define tab_prefixof(i) codetabof(i)
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000070#define tab_suffixof(i) ((unsigned char *)(htab))[i]
71#define de_stack ((unsigned char *)&(htab[HSIZE-1]))
Bernhard Reutner-Fischer5d725462006-09-06 15:28:32 +000072#define clear_htab() memset(htab, -1, HSIZE)
Robert Griebl94a6a952002-05-19 19:00:14 +000073#define clear_tab_prefixof() memset(codetab, 0, 256);
74
75
Robert Griebl94a6a952002-05-19 19:00:14 +000076/*
77 * Decompress stdin to stdout. This routine adapts to the codes in the
78 * file building the "string" table on-the-fly; requiring no table to
79 * be stored in the compressed file. The tables used herein are shared
80 * with those of the compress() routine. See the definitions above.
81 */
82
Denis Vlasenko97a8dd32006-10-01 15:55:11 +000083USE_DESKTOP(long long) int
84uncompress(int fd_in, int fd_out)
Robert Griebl94a6a952002-05-19 19:00:14 +000085{
Denis Vlasenko97a8dd32006-10-01 15:55:11 +000086 USE_DESKTOP(long long total_written = 0;)
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000087 unsigned char *stackp;
88 long int code;
89 int finchar;
90 long int oldcode;
91 long int incode;
92 int inbits;
93 int posbits;
94 int outpos;
95 int insize;
96 int bitmask;
97 long int free_ent;
98 long int maxcode;
99 long int maxmaxcode;
100 int n_bits;
101 int rsize = 0;
Bernhard Reutner-Fischer5d725462006-09-06 15:28:32 +0000102 RESERVE_CONFIG_UBUFFER(inbuf, IBUFSIZ + 64);
103 RESERVE_CONFIG_UBUFFER(outbuf, OBUFSIZ + 2048);
104 unsigned char htab[HSIZE];
105 unsigned short codetab[HSIZE];
Denis Vlasenko447b5432007-01-05 19:49:02 +0000106
107 /* Hmm, these were statics - why?! */
108 /* user settable max # bits/code */
109 int maxbits; /* = BITS; */
110 /* block compress mode -C compatible with 2.0 */
111 int block_mode; /* = BLOCK_MODE; */
112
Bernhard Reutner-Fischer5d725462006-09-06 15:28:32 +0000113 memset(inbuf, 0, IBUFSIZ + 64);
114 memset(outbuf, 0, OBUFSIZ + 2048);
Robert Griebl94a6a952002-05-19 19:00:14 +0000115
116 insize = 0;
117
Denis Vlasenko447b5432007-01-05 19:49:02 +0000118 /* xread isn't good here, we have to return - caller may want
119 * to do some cleanup (e.g. delete incomplete unpacked file etc) */
120 if (full_read(fd_in, inbuf, 1) != 1) {
121 bb_error_msg("short read");
122 return -1;
123 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000124
125 maxbits = inbuf[0] & BIT_MASK;
126 block_mode = inbuf[0] & BLOCK_MODE;
127 maxmaxcode = MAXCODE(maxbits);
128
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000129 if (maxbits > BITS) {
Denis Vlasenko447b5432007-01-05 19:49:02 +0000130 bb_error_msg("compressed with %d bits, can only handle "
131 "%d bits", maxbits, BITS);
Robert Griebl94a6a952002-05-19 19:00:14 +0000132 return -1;
133 }
134
Denis Vlasenko447b5432007-01-05 19:49:02 +0000135 n_bits = INIT_BITS;
136 maxcode = MAXCODE(INIT_BITS) - 1;
137 bitmask = (1 << INIT_BITS) - 1;
Robert Griebl94a6a952002-05-19 19:00:14 +0000138 oldcode = -1;
139 finchar = 0;
140 outpos = 0;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000141 posbits = 0 << 3;
Robert Griebl94a6a952002-05-19 19:00:14 +0000142
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000143 free_ent = ((block_mode) ? FIRST : 256);
Robert Griebl94a6a952002-05-19 19:00:14 +0000144
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000145 /* As above, initialize the first 256 entries in the table. */
146 clear_tab_prefixof();
Robert Griebl94a6a952002-05-19 19:00:14 +0000147
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000148 for (code = 255; code >= 0; --code) {
149 tab_suffixof(code) = (unsigned char) code;
150 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000151
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000152 do {
Denis Vlasenko447b5432007-01-05 19:49:02 +0000153 resetbuf:
Robert Griebl94a6a952002-05-19 19:00:14 +0000154 {
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000155 int i;
156 int e;
157 int o;
Robert Griebl94a6a952002-05-19 19:00:14 +0000158
Denis Vlasenko447b5432007-01-05 19:49:02 +0000159 o = posbits >> 3;
160 e = insize - o;
Robert Griebl94a6a952002-05-19 19:00:14 +0000161
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000162 for (i = 0; i < e; ++i)
163 inbuf[i] = inbuf[i + o];
Robert Griebl94a6a952002-05-19 19:00:14 +0000164
165 insize = e;
166 posbits = 0;
167 }
168
Bernhard Reutner-Fischer5d725462006-09-06 15:28:32 +0000169 if (insize < (int) (IBUFSIZ + 64) - IBUFSIZ) {
Glenn L McGrath1a2d75f2003-11-21 22:17:28 +0000170 rsize = safe_read(fd_in, inbuf + insize, IBUFSIZ);
Denis Vlasenko447b5432007-01-05 19:49:02 +0000171//error check??
Robert Griebl94a6a952002-05-19 19:00:14 +0000172 insize += rsize;
173 }
174
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000175 inbits = ((rsize > 0) ? (insize - insize % n_bits) << 3 :
176 (insize << 3) - (n_bits - 1));
Robert Griebl94a6a952002-05-19 19:00:14 +0000177
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000178 while (inbits > posbits) {
179 if (free_ent > maxcode) {
180 posbits =
181 ((posbits - 1) +
182 ((n_bits << 3) -
183 (posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
Robert Griebl94a6a952002-05-19 19:00:14 +0000184 ++n_bits;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000185 if (n_bits == maxbits) {
Robert Griebl94a6a952002-05-19 19:00:14 +0000186 maxcode = maxmaxcode;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000187 } else {
188 maxcode = MAXCODE(n_bits) - 1;
189 }
190 bitmask = (1 << n_bits) - 1;
Robert Griebl94a6a952002-05-19 19:00:14 +0000191 goto resetbuf;
192 }
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000193 {
194 unsigned char *p = &inbuf[posbits >> 3];
Robert Griebl94a6a952002-05-19 19:00:14 +0000195
Denis Vlasenko447b5432007-01-05 19:49:02 +0000196 code = ((((long) (p[0])) | ((long) (p[1]) << 8) |
197 ((long) (p[2]) << 16)) >> (posbits & 0x7)) & bitmask;
Robert Griebl94a6a952002-05-19 19:00:14 +0000198 }
199 posbits += n_bits;
Robert Griebl94a6a952002-05-19 19:00:14 +0000200
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000201
202 if (oldcode == -1) {
Denis Vlasenko97a8dd32006-10-01 15:55:11 +0000203 oldcode = code;
204 finchar = (int) oldcode;
205 outbuf[outpos++] = (unsigned char) finchar;
Robert Griebl94a6a952002-05-19 19:00:14 +0000206 continue;
207 }
208
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000209 if (code == CLEAR && block_mode) {
Robert Griebl94a6a952002-05-19 19:00:14 +0000210 clear_tab_prefixof();
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000211 free_ent = FIRST - 1;
212 posbits =
213 ((posbits - 1) +
214 ((n_bits << 3) -
215 (posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
Denis Vlasenko447b5432007-01-05 19:49:02 +0000216 n_bits = INIT_BITS;
217 maxcode = MAXCODE(INIT_BITS) - 1;
218 bitmask = (1 << INIT_BITS) - 1;
Robert Griebl94a6a952002-05-19 19:00:14 +0000219 goto resetbuf;
220 }
221
222 incode = code;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000223 stackp = de_stack;
Robert Griebl94a6a952002-05-19 19:00:14 +0000224
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000225 /* Special case for KwKwK string. */
226 if (code >= free_ent) {
227 if (code > free_ent) {
228 unsigned char *p;
Robert Griebl94a6a952002-05-19 19:00:14 +0000229
230 posbits -= n_bits;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000231 p = &inbuf[posbits >> 3];
Robert Griebl94a6a952002-05-19 19:00:14 +0000232
Manuel Novoa III cad53642003-03-19 09:13:01 +0000233 bb_error_msg
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000234 ("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)",
235 insize, posbits, p[-1], p[0], p[1], p[2], p[3],
236 (posbits & 07));
Manuel Novoa III cad53642003-03-19 09:13:01 +0000237 bb_error_msg("uncompress: corrupt input");
Robert Griebl94a6a952002-05-19 19:00:14 +0000238 return -1;
239 }
240
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000241 *--stackp = (unsigned char) finchar;
242 code = oldcode;
Robert Griebl94a6a952002-05-19 19:00:14 +0000243 }
244
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000245 /* Generate output characters in reverse order */
246 while ((long int) code >= (long int) 256) {
247 *--stackp = tab_suffixof(code);
248 code = tab_prefixof(code);
Robert Griebl94a6a952002-05-19 19:00:14 +0000249 }
250
Denis Vlasenko447b5432007-01-05 19:49:02 +0000251 finchar = tab_suffixof(code);
252 *--stackp = (unsigned char) finchar;
Robert Griebl94a6a952002-05-19 19:00:14 +0000253
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000254 /* And put them out in forward order */
Robert Griebl94a6a952002-05-19 19:00:14 +0000255 {
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000256 int i;
Robert Griebl94a6a952002-05-19 19:00:14 +0000257
Denis Vlasenko447b5432007-01-05 19:49:02 +0000258 i = de_stack - stackp;
259 if (outpos + i >= OBUFSIZ) {
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000260 do {
261 if (i > OBUFSIZ - outpos) {
262 i = OBUFSIZ - outpos;
263 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000264
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000265 if (i > 0) {
266 memcpy(outbuf + outpos, stackp, i);
Robert Griebl94a6a952002-05-19 19:00:14 +0000267 outpos += i;
268 }
269
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000270 if (outpos >= OBUFSIZ) {
Denis Vlasenko447b5432007-01-05 19:49:02 +0000271 full_write(fd_out, outbuf, outpos);
272//error check??
Denis Vlasenko97a8dd32006-10-01 15:55:11 +0000273 USE_DESKTOP(total_written += outpos;)
Robert Griebl94a6a952002-05-19 19:00:14 +0000274 outpos = 0;
275 }
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000276 stackp += i;
Denis Vlasenko447b5432007-01-05 19:49:02 +0000277 i = de_stack - stackp;
278 } while (i > 0);
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000279 } else {
280 memcpy(outbuf + outpos, stackp, i);
Robert Griebl94a6a952002-05-19 19:00:14 +0000281 outpos += i;
282 }
283 }
284
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000285 /* Generate the new entry. */
Denis Vlasenko447b5432007-01-05 19:49:02 +0000286 code = free_ent;
287 if (code < maxmaxcode) {
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000288 tab_prefixof(code) = (unsigned short) oldcode;
289 tab_suffixof(code) = (unsigned char) finchar;
290 free_ent = code + 1;
291 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000292
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000293 /* Remember previous code. */
294 oldcode = incode;
Robert Griebl94a6a952002-05-19 19:00:14 +0000295 }
296
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000297 } while (rsize > 0);
Robert Griebl94a6a952002-05-19 19:00:14 +0000298
Glenn L McGrath2e41d0c2002-09-27 06:46:02 +0000299 if (outpos > 0) {
Denis Vlasenko447b5432007-01-05 19:49:02 +0000300 full_write(fd_out, outbuf, outpos);
301//error check??
Denis Vlasenko97a8dd32006-10-01 15:55:11 +0000302 USE_DESKTOP(total_written += outpos;)
Glenn L McGrath2e41d0c2002-09-27 06:46:02 +0000303 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000304
Bernhard Reutner-Fischer5d725462006-09-06 15:28:32 +0000305 RELEASE_CONFIG_BUFFER(inbuf);
306 RELEASE_CONFIG_BUFFER(outbuf);
Denis Vlasenko97a8dd32006-10-01 15:55:11 +0000307 return USE_DESKTOP(total_written) + 0;
Robert Griebl94a6a952002-05-19 19:00:14 +0000308}