blob: 161570040da4acd693d3cf8487072508e7303bec [file] [log] [blame]
"Robert P. J. Day"63fc1a92006-07-02 19:47:05 +00001/* vi: set sw=4 ts=4: */
Robert Griebl94a6a952002-05-19 19:00:14 +00002#include "libbb.h"
3
Robert Griebl94a6a952002-05-19 19:00:14 +00004/* uncompress for busybox -- (c) 2002 Robert Griebl
5 *
Eric Andersenc7bda1c2004-03-15 08:29:22 +00006 * based on the original compress42.c source
Robert Griebl94a6a952002-05-19 19:00:14 +00007 * (see disclaimer below)
8 */
9
Robert Griebl94a6a952002-05-19 19:00:14 +000010/* (N)compress42.c - File compression ala IEEE Computer, Mar 1992.
11 *
12 * Authors:
13 * Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
14 * Jim McKie (decvax!mcvax!jim)
15 * Steve Davies (decvax!vax135!petsd!peora!srd)
16 * Ken Turkowski (decvax!decwrl!turtlevax!ken)
17 * James A. Woods (decvax!ihnp4!ames!jaw)
18 * Joe Orost (decvax!vax135!petsd!joe)
19 * Dave Mack (csu@alembic.acs.com)
20 * Peter Jannesen, Network Communication Systems
21 * (peter@ncs.nl)
22 *
23 * marc@suse.de : a small security fix for a buffer overflow
24 *
25 * [... History snipped ...]
26 *
27 */
Robert Griebl94a6a952002-05-19 19:00:14 +000028
Eric Andersenaff114c2004-04-14 17:51:38 +000029/* Default input buffer size */
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000030#define IBUFSIZ 2048
Robert Griebl94a6a952002-05-19 19:00:14 +000031
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000032/* Default output buffer size */
33#define OBUFSIZ 2048
Robert Griebl94a6a952002-05-19 19:00:14 +000034
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000035/* Defines for third byte of header */
Denis Vlasenkoe8a07882007-06-10 15:08:44 +000036#define BIT_MASK 0x1f /* Mask for 'number of compresssion bits' */
37 /* Masks 0x20 and 0x40 are free. */
38 /* I think 0x20 should mean that there is */
39 /* a fourth header byte (for expansion). */
40#define BLOCK_MODE 0x80 /* Block compression if table is full and */
41 /* compression rate is dropping flush tables */
42 /* the next two codes should not be changed lightly, as they must not */
43 /* lie within the contiguous general code space. */
44#define FIRST 257 /* first free entry */
45#define CLEAR 256 /* table clear output code */
Robert Griebl94a6a952002-05-19 19:00:14 +000046
Denis Vlasenkoe8a07882007-06-10 15:08:44 +000047#define INIT_BITS 9 /* initial number of bits/code */
Robert Griebl94a6a952002-05-19 19:00:14 +000048
49
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000050/* machine variants which require cc -Dmachine: pdp11, z8000, DOS */
Denis Vlasenkoe8a07882007-06-10 15:08:44 +000051#define HBITS 17 /* 50% occupancy */
52#define HSIZE (1<<HBITS)
53#define HMASK (HSIZE-1) /* unused */
54#define HPRIME 9941 /* unused */
55#define BITS 16
56#define BITS_STR "16"
57#undef MAXSEG_64K /* unused */
58#define MAXCODE(n) (1L << (n))
Robert Griebl94a6a952002-05-19 19:00:14 +000059
Denis Vlasenkoe8a07882007-06-10 15:08:44 +000060#define htabof(i) htab[i]
61#define codetabof(i) codetab[i]
62#define tab_prefixof(i) codetabof(i)
63#define tab_suffixof(i) ((unsigned char *)(htab))[i]
64#define de_stack ((unsigned char *)&(htab[HSIZE-1]))
65#define clear_tab_prefixof() memset(codetab, 0, 256)
Robert Griebl94a6a952002-05-19 19:00:14 +000066
Robert Griebl94a6a952002-05-19 19:00:14 +000067/*
68 * Decompress stdin to stdout. This routine adapts to the codes in the
69 * file building the "string" table on-the-fly; requiring no table to
Denis Vlasenkoe8a07882007-06-10 15:08:44 +000070 * be stored in the compressed file.
Robert Griebl94a6a952002-05-19 19:00:14 +000071 */
72
Denis Vlasenkodefc1ea2008-06-27 02:52:20 +000073USE_DESKTOP(long long) int FAST_FUNC
Denis Vlasenko97a8dd32006-10-01 15:55:11 +000074uncompress(int fd_in, int fd_out)
Robert Griebl94a6a952002-05-19 19:00:14 +000075{
Denis Vlasenko97a8dd32006-10-01 15:55:11 +000076 USE_DESKTOP(long long total_written = 0;)
Denis Vlasenkoe8a07882007-06-10 15:08:44 +000077 USE_DESKTOP(long long) int retval = -1;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000078 unsigned char *stackp;
Denis Vlasenko87468852007-04-13 23:22:00 +000079 long code;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000080 int finchar;
Denis Vlasenko87468852007-04-13 23:22:00 +000081 long oldcode;
82 long incode;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000083 int inbits;
84 int posbits;
85 int outpos;
86 int insize;
87 int bitmask;
Denis Vlasenko87468852007-04-13 23:22:00 +000088 long free_ent;
89 long maxcode;
90 long maxmaxcode;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000091 int n_bits;
92 int rsize = 0;
Denis Vlasenkoe8a07882007-06-10 15:08:44 +000093 unsigned char *inbuf; /* were eating insane amounts of stack - */
94 unsigned char *outbuf; /* bad for some embedded targets */
95 unsigned char *htab;
96 unsigned short *codetab;
Denis Vlasenko447b5432007-01-05 19:49:02 +000097
98 /* Hmm, these were statics - why?! */
99 /* user settable max # bits/code */
100 int maxbits; /* = BITS; */
101 /* block compress mode -C compatible with 2.0 */
102 int block_mode; /* = BLOCK_MODE; */
103
Denis Vlasenkoe8a07882007-06-10 15:08:44 +0000104 inbuf = xzalloc(IBUFSIZ + 64);
105 outbuf = xzalloc(OBUFSIZ + 2048);
106 htab = xzalloc(HSIZE); /* wsn't zeroed out before, maybe can xmalloc? */
107 codetab = xzalloc(HSIZE * sizeof(codetab[0]));
Robert Griebl94a6a952002-05-19 19:00:14 +0000108
109 insize = 0;
110
Denis Vlasenko447b5432007-01-05 19:49:02 +0000111 /* xread isn't good here, we have to return - caller may want
112 * to do some cleanup (e.g. delete incomplete unpacked file etc) */
113 if (full_read(fd_in, inbuf, 1) != 1) {
114 bb_error_msg("short read");
Denis Vlasenkoe8a07882007-06-10 15:08:44 +0000115 goto err;
Denis Vlasenko447b5432007-01-05 19:49:02 +0000116 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000117
118 maxbits = inbuf[0] & BIT_MASK;
119 block_mode = inbuf[0] & BLOCK_MODE;
120 maxmaxcode = MAXCODE(maxbits);
121
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000122 if (maxbits > BITS) {
Denis Vlasenko447b5432007-01-05 19:49:02 +0000123 bb_error_msg("compressed with %d bits, can only handle "
Denis Vlasenkoe8a07882007-06-10 15:08:44 +0000124 BITS_STR" bits", maxbits);
125 goto err;
Robert Griebl94a6a952002-05-19 19:00:14 +0000126 }
127
Denis Vlasenko447b5432007-01-05 19:49:02 +0000128 n_bits = INIT_BITS;
129 maxcode = MAXCODE(INIT_BITS) - 1;
130 bitmask = (1 << INIT_BITS) - 1;
Robert Griebl94a6a952002-05-19 19:00:14 +0000131 oldcode = -1;
132 finchar = 0;
133 outpos = 0;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000134 posbits = 0 << 3;
Robert Griebl94a6a952002-05-19 19:00:14 +0000135
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000136 free_ent = ((block_mode) ? FIRST : 256);
Robert Griebl94a6a952002-05-19 19:00:14 +0000137
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000138 /* As above, initialize the first 256 entries in the table. */
Denis Vlasenkoe8a07882007-06-10 15:08:44 +0000139 /*clear_tab_prefixof(); - done by xzalloc */
Robert Griebl94a6a952002-05-19 19:00:14 +0000140
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000141 for (code = 255; code >= 0; --code) {
142 tab_suffixof(code) = (unsigned char) code;
143 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000144
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000145 do {
Denis Vlasenko447b5432007-01-05 19:49:02 +0000146 resetbuf:
Robert Griebl94a6a952002-05-19 19:00:14 +0000147 {
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000148 int i;
149 int e;
150 int o;
Robert Griebl94a6a952002-05-19 19:00:14 +0000151
Denis Vlasenko447b5432007-01-05 19:49:02 +0000152 o = posbits >> 3;
153 e = insize - o;
Robert Griebl94a6a952002-05-19 19:00:14 +0000154
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000155 for (i = 0; i < e; ++i)
156 inbuf[i] = inbuf[i + o];
Robert Griebl94a6a952002-05-19 19:00:14 +0000157
158 insize = e;
159 posbits = 0;
160 }
161
Bernhard Reutner-Fischer5d725462006-09-06 15:28:32 +0000162 if (insize < (int) (IBUFSIZ + 64) - IBUFSIZ) {
Glenn L McGrath1a2d75f2003-11-21 22:17:28 +0000163 rsize = safe_read(fd_in, inbuf + insize, IBUFSIZ);
Denis Vlasenko447b5432007-01-05 19:49:02 +0000164//error check??
Robert Griebl94a6a952002-05-19 19:00:14 +0000165 insize += rsize;
166 }
167
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000168 inbits = ((rsize > 0) ? (insize - insize % n_bits) << 3 :
169 (insize << 3) - (n_bits - 1));
Robert Griebl94a6a952002-05-19 19:00:14 +0000170
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000171 while (inbits > posbits) {
172 if (free_ent > maxcode) {
173 posbits =
174 ((posbits - 1) +
175 ((n_bits << 3) -
176 (posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
Robert Griebl94a6a952002-05-19 19:00:14 +0000177 ++n_bits;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000178 if (n_bits == maxbits) {
Robert Griebl94a6a952002-05-19 19:00:14 +0000179 maxcode = maxmaxcode;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000180 } else {
181 maxcode = MAXCODE(n_bits) - 1;
182 }
183 bitmask = (1 << n_bits) - 1;
Robert Griebl94a6a952002-05-19 19:00:14 +0000184 goto resetbuf;
185 }
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000186 {
187 unsigned char *p = &inbuf[posbits >> 3];
Robert Griebl94a6a952002-05-19 19:00:14 +0000188
Denis Vlasenko447b5432007-01-05 19:49:02 +0000189 code = ((((long) (p[0])) | ((long) (p[1]) << 8) |
190 ((long) (p[2]) << 16)) >> (posbits & 0x7)) & bitmask;
Robert Griebl94a6a952002-05-19 19:00:14 +0000191 }
192 posbits += n_bits;
Robert Griebl94a6a952002-05-19 19:00:14 +0000193
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000194
195 if (oldcode == -1) {
Denis Vlasenko97a8dd32006-10-01 15:55:11 +0000196 oldcode = code;
197 finchar = (int) oldcode;
198 outbuf[outpos++] = (unsigned char) finchar;
Robert Griebl94a6a952002-05-19 19:00:14 +0000199 continue;
200 }
201
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000202 if (code == CLEAR && block_mode) {
Robert Griebl94a6a952002-05-19 19:00:14 +0000203 clear_tab_prefixof();
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000204 free_ent = FIRST - 1;
205 posbits =
206 ((posbits - 1) +
207 ((n_bits << 3) -
208 (posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
Denis Vlasenko447b5432007-01-05 19:49:02 +0000209 n_bits = INIT_BITS;
210 maxcode = MAXCODE(INIT_BITS) - 1;
211 bitmask = (1 << INIT_BITS) - 1;
Robert Griebl94a6a952002-05-19 19:00:14 +0000212 goto resetbuf;
213 }
214
215 incode = code;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000216 stackp = de_stack;
Robert Griebl94a6a952002-05-19 19:00:14 +0000217
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000218 /* Special case for KwKwK string. */
219 if (code >= free_ent) {
220 if (code > free_ent) {
221 unsigned char *p;
Robert Griebl94a6a952002-05-19 19:00:14 +0000222
223 posbits -= n_bits;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000224 p = &inbuf[posbits >> 3];
Robert Griebl94a6a952002-05-19 19:00:14 +0000225
Manuel Novoa III cad53642003-03-19 09:13:01 +0000226 bb_error_msg
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000227 ("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)",
228 insize, posbits, p[-1], p[0], p[1], p[2], p[3],
229 (posbits & 07));
Manuel Novoa III cad53642003-03-19 09:13:01 +0000230 bb_error_msg("uncompress: corrupt input");
Denis Vlasenkoe8a07882007-06-10 15:08:44 +0000231 goto err;
Robert Griebl94a6a952002-05-19 19:00:14 +0000232 }
233
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000234 *--stackp = (unsigned char) finchar;
235 code = oldcode;
Robert Griebl94a6a952002-05-19 19:00:14 +0000236 }
237
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000238 /* Generate output characters in reverse order */
Denis Vlasenko87468852007-04-13 23:22:00 +0000239 while ((long) code >= (long) 256) {
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000240 *--stackp = tab_suffixof(code);
241 code = tab_prefixof(code);
Robert Griebl94a6a952002-05-19 19:00:14 +0000242 }
243
Denis Vlasenko447b5432007-01-05 19:49:02 +0000244 finchar = tab_suffixof(code);
245 *--stackp = (unsigned char) finchar;
Robert Griebl94a6a952002-05-19 19:00:14 +0000246
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000247 /* And put them out in forward order */
Robert Griebl94a6a952002-05-19 19:00:14 +0000248 {
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000249 int i;
Robert Griebl94a6a952002-05-19 19:00:14 +0000250
Denis Vlasenko447b5432007-01-05 19:49:02 +0000251 i = de_stack - stackp;
252 if (outpos + i >= OBUFSIZ) {
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000253 do {
254 if (i > OBUFSIZ - outpos) {
255 i = OBUFSIZ - outpos;
256 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000257
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000258 if (i > 0) {
259 memcpy(outbuf + outpos, stackp, i);
Robert Griebl94a6a952002-05-19 19:00:14 +0000260 outpos += i;
261 }
262
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000263 if (outpos >= OBUFSIZ) {
Denis Vlasenko447b5432007-01-05 19:49:02 +0000264 full_write(fd_out, outbuf, outpos);
265//error check??
Denis Vlasenko97a8dd32006-10-01 15:55:11 +0000266 USE_DESKTOP(total_written += outpos;)
Robert Griebl94a6a952002-05-19 19:00:14 +0000267 outpos = 0;
268 }
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000269 stackp += i;
Denis Vlasenko447b5432007-01-05 19:49:02 +0000270 i = de_stack - stackp;
271 } while (i > 0);
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000272 } else {
273 memcpy(outbuf + outpos, stackp, i);
Robert Griebl94a6a952002-05-19 19:00:14 +0000274 outpos += i;
275 }
276 }
277
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000278 /* Generate the new entry. */
Denis Vlasenko447b5432007-01-05 19:49:02 +0000279 code = free_ent;
280 if (code < maxmaxcode) {
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000281 tab_prefixof(code) = (unsigned short) oldcode;
282 tab_suffixof(code) = (unsigned char) finchar;
283 free_ent = code + 1;
284 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000285
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000286 /* Remember previous code. */
287 oldcode = incode;
Robert Griebl94a6a952002-05-19 19:00:14 +0000288 }
289
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000290 } while (rsize > 0);
Robert Griebl94a6a952002-05-19 19:00:14 +0000291
Glenn L McGrath2e41d0c2002-09-27 06:46:02 +0000292 if (outpos > 0) {
Denis Vlasenko447b5432007-01-05 19:49:02 +0000293 full_write(fd_out, outbuf, outpos);
294//error check??
Denis Vlasenko97a8dd32006-10-01 15:55:11 +0000295 USE_DESKTOP(total_written += outpos;)
Glenn L McGrath2e41d0c2002-09-27 06:46:02 +0000296 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000297
Denis Vlasenkoe8a07882007-06-10 15:08:44 +0000298 retval = USE_DESKTOP(total_written) + 0;
299 err:
300 free(inbuf);
301 free(outbuf);
302 free(htab);
303 free(codetab);
304 return retval;
Robert Griebl94a6a952002-05-19 19:00:14 +0000305}