blob: a3fcf64e22c76b53cf187d37b666459a39b81cc1 [file] [log] [blame]
Robert Griebl94a6a952002-05-19 19:00:14 +00001#include "config.h"
2#include "libbb.h"
3
Robert Griebl94a6a952002-05-19 19:00:14 +00004/* uncompress for busybox -- (c) 2002 Robert Griebl
5 *
Eric Andersenc7bda1c2004-03-15 08:29:22 +00006 * based on the original compress42.c source
Robert Griebl94a6a952002-05-19 19:00:14 +00007 * (see disclaimer below)
8 */
9
10
11/* (N)compress42.c - File compression ala IEEE Computer, Mar 1992.
12 *
13 * Authors:
14 * Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
15 * Jim McKie (decvax!mcvax!jim)
16 * Steve Davies (decvax!vax135!petsd!peora!srd)
17 * Ken Turkowski (decvax!decwrl!turtlevax!ken)
18 * James A. Woods (decvax!ihnp4!ames!jaw)
19 * Joe Orost (decvax!vax135!petsd!joe)
20 * Dave Mack (csu@alembic.acs.com)
21 * Peter Jannesen, Network Communication Systems
22 * (peter@ncs.nl)
23 *
24 * marc@suse.de : a small security fix for a buffer overflow
25 *
26 * [... History snipped ...]
27 *
28 */
Glenn L McGrath2e41d0c2002-09-27 06:46:02 +000029#include <stdio.h>
30#include <string.h>
31#include <unistd.h>
Robert Griebl94a6a952002-05-19 19:00:14 +000032
Eric Andersenaff114c2004-04-14 17:51:38 +000033/* Default input buffer size */
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000034#define IBUFSIZ 2048
Robert Griebl94a6a952002-05-19 19:00:14 +000035
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000036/* Default output buffer size */
37#define OBUFSIZ 2048
Robert Griebl94a6a952002-05-19 19:00:14 +000038
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000039/* Defines for third byte of header */
40#define MAGIC_1 (char_type)'\037' /* First byte of compressed file */
41#define MAGIC_2 (char_type)'\235' /* Second byte of compressed file */
42#define BIT_MASK 0x1f /* Mask for 'number of compresssion bits' */
43 /* Masks 0x20 and 0x40 are free. */
44 /* I think 0x20 should mean that there is */
45 /* a fourth header byte (for expansion). */
46#define BLOCK_MODE 0x80 /* Block compresssion if table is full and */
47 /* compression rate is dropping flush tables */
48 /* the next two codes should not be changed lightly, as they must not */
49 /* lie within the contiguous general code space. */
50#define FIRST 257 /* first free entry */
51#define CLEAR 256 /* table clear output code */
Robert Griebl94a6a952002-05-19 19:00:14 +000052
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000053#define INIT_BITS 9 /* initial number of bits/code */
Robert Griebl94a6a952002-05-19 19:00:14 +000054
55
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000056/* machine variants which require cc -Dmachine: pdp11, z8000, DOS */
Robert Griebl94a6a952002-05-19 19:00:14 +000057#define FAST
58
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000059#define HBITS 17 /* 50% occupancy */
Robert Griebl94a6a952002-05-19 19:00:14 +000060#define HSIZE (1<<HBITS)
61#define HMASK (HSIZE-1)
62#define HPRIME 9941
63#define BITS 16
64#undef MAXSEG_64K
Robert Griebl94a6a952002-05-19 19:00:14 +000065#define MAXCODE(n) (1L << (n))
66
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000067/* Block compress mode -C compatible with 2.0 */
Eric Andersen14f5c8d2005-04-16 19:39:00 +000068static int block_mode = BLOCK_MODE;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000069
70/* user settable max # bits/code */
Eric Andersen14f5c8d2005-04-16 19:39:00 +000071static int maxbits = BITS;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000072
73/* Exitcode of compress (-1 no file compressed) */
Eric Andersen14f5c8d2005-04-16 19:39:00 +000074static int exit_code = -1;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000075
76/* Input buffer */
Eric Andersen14f5c8d2005-04-16 19:39:00 +000077static unsigned char inbuf[IBUFSIZ + 64];
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000078
79/* Output buffer */
Eric Andersen14f5c8d2005-04-16 19:39:00 +000080static unsigned char outbuf[OBUFSIZ + 2048];
Robert Griebl94a6a952002-05-19 19:00:14 +000081
82
Eric Andersen14f5c8d2005-04-16 19:39:00 +000083static long int htab[HSIZE];
84static unsigned short codetab[HSIZE];
Robert Griebl94a6a952002-05-19 19:00:14 +000085
86#define htabof(i) htab[i]
87#define codetabof(i) codetab[i]
88#define tab_prefixof(i) codetabof(i)
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000089#define tab_suffixof(i) ((unsigned char *)(htab))[i]
90#define de_stack ((unsigned char *)&(htab[HSIZE-1]))
Robert Griebl94a6a952002-05-19 19:00:14 +000091#define clear_htab() memset(htab, -1, sizeof(htab))
92#define clear_tab_prefixof() memset(codetab, 0, 256);
93
94
Robert Griebl94a6a952002-05-19 19:00:14 +000095/*
96 * Decompress stdin to stdout. This routine adapts to the codes in the
97 * file building the "string" table on-the-fly; requiring no table to
98 * be stored in the compressed file. The tables used herein are shared
99 * with those of the compress() routine. See the definitions above.
100 */
101
Glenn L McGrath2e41d0c2002-09-27 06:46:02 +0000102extern int uncompress(int fd_in, int fd_out)
Robert Griebl94a6a952002-05-19 19:00:14 +0000103{
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000104 unsigned char *stackp;
105 long int code;
106 int finchar;
107 long int oldcode;
108 long int incode;
109 int inbits;
110 int posbits;
111 int outpos;
112 int insize;
113 int bitmask;
114 long int free_ent;
115 long int maxcode;
116 long int maxmaxcode;
117 int n_bits;
118 int rsize = 0;
Robert Griebl94a6a952002-05-19 19:00:14 +0000119
120 insize = 0;
121
Manuel Novoa III cad53642003-03-19 09:13:01 +0000122 inbuf[0] = bb_xread_char(fd_in);
Robert Griebl94a6a952002-05-19 19:00:14 +0000123
124 maxbits = inbuf[0] & BIT_MASK;
125 block_mode = inbuf[0] & BLOCK_MODE;
126 maxmaxcode = MAXCODE(maxbits);
127
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000128 if (maxbits > BITS) {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000129 bb_error_msg("compressed with %d bits, can only handle %d bits", maxbits,
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000130 BITS);
Robert Griebl94a6a952002-05-19 19:00:14 +0000131 return -1;
132 }
133
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000134 maxcode = MAXCODE(n_bits = INIT_BITS) - 1;
135 bitmask = (1 << n_bits) - 1;
Robert Griebl94a6a952002-05-19 19:00:14 +0000136 oldcode = -1;
137 finchar = 0;
138 outpos = 0;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000139 posbits = 0 << 3;
Robert Griebl94a6a952002-05-19 19:00:14 +0000140
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000141 free_ent = ((block_mode) ? FIRST : 256);
Robert Griebl94a6a952002-05-19 19:00:14 +0000142
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000143 /* As above, initialize the first 256 entries in the table. */
144 clear_tab_prefixof();
Robert Griebl94a6a952002-05-19 19:00:14 +0000145
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000146 for (code = 255; code >= 0; --code) {
147 tab_suffixof(code) = (unsigned char) code;
148 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000149
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000150 do {
151 resetbuf:;
Robert Griebl94a6a952002-05-19 19:00:14 +0000152 {
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000153 int i;
154 int e;
155 int o;
Robert Griebl94a6a952002-05-19 19:00:14 +0000156
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000157 e = insize - (o = (posbits >> 3));
Robert Griebl94a6a952002-05-19 19:00:14 +0000158
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000159 for (i = 0; i < e; ++i)
160 inbuf[i] = inbuf[i + o];
Robert Griebl94a6a952002-05-19 19:00:14 +0000161
162 insize = e;
163 posbits = 0;
164 }
165
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000166 if (insize < (int) sizeof(inbuf) - IBUFSIZ) {
Glenn L McGrath1a2d75f2003-11-21 22:17:28 +0000167 rsize = safe_read(fd_in, inbuf + insize, IBUFSIZ);
Robert Griebl94a6a952002-05-19 19:00:14 +0000168 insize += rsize;
169 }
170
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000171 inbits = ((rsize > 0) ? (insize - insize % n_bits) << 3 :
172 (insize << 3) - (n_bits - 1));
Robert Griebl94a6a952002-05-19 19:00:14 +0000173
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000174 while (inbits > posbits) {
175 if (free_ent > maxcode) {
176 posbits =
177 ((posbits - 1) +
178 ((n_bits << 3) -
179 (posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
Robert Griebl94a6a952002-05-19 19:00:14 +0000180 ++n_bits;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000181 if (n_bits == maxbits) {
Robert Griebl94a6a952002-05-19 19:00:14 +0000182 maxcode = maxmaxcode;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000183 } else {
184 maxcode = MAXCODE(n_bits) - 1;
185 }
186 bitmask = (1 << n_bits) - 1;
Robert Griebl94a6a952002-05-19 19:00:14 +0000187 goto resetbuf;
188 }
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000189 {
190 unsigned char *p = &inbuf[posbits >> 3];
Robert Griebl94a6a952002-05-19 19:00:14 +0000191
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000192 code =
193 ((((long) (p[0])) | ((long) (p[1]) << 8) |
194 ((long) (p[2]) << 16)) >> (posbits & 0x7)) & bitmask;
Robert Griebl94a6a952002-05-19 19:00:14 +0000195 }
196 posbits += n_bits;
Robert Griebl94a6a952002-05-19 19:00:14 +0000197
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000198
199 if (oldcode == -1) {
200 outbuf[outpos++] = (unsigned char) (finchar =
201 (int) (oldcode = code));
Robert Griebl94a6a952002-05-19 19:00:14 +0000202 continue;
203 }
204
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000205 if (code == CLEAR && block_mode) {
Robert Griebl94a6a952002-05-19 19:00:14 +0000206 clear_tab_prefixof();
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000207 free_ent = FIRST - 1;
208 posbits =
209 ((posbits - 1) +
210 ((n_bits << 3) -
211 (posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
212 maxcode = MAXCODE(n_bits = INIT_BITS) - 1;
213 bitmask = (1 << n_bits) - 1;
Robert Griebl94a6a952002-05-19 19:00:14 +0000214 goto resetbuf;
215 }
216
217 incode = code;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000218 stackp = de_stack;
Robert Griebl94a6a952002-05-19 19:00:14 +0000219
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000220 /* Special case for KwKwK string. */
221 if (code >= free_ent) {
222 if (code > free_ent) {
223 unsigned char *p;
Robert Griebl94a6a952002-05-19 19:00:14 +0000224
225 posbits -= n_bits;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000226 p = &inbuf[posbits >> 3];
Robert Griebl94a6a952002-05-19 19:00:14 +0000227
Manuel Novoa III cad53642003-03-19 09:13:01 +0000228 bb_error_msg
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000229 ("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)",
230 insize, posbits, p[-1], p[0], p[1], p[2], p[3],
231 (posbits & 07));
Manuel Novoa III cad53642003-03-19 09:13:01 +0000232 bb_error_msg("uncompress: corrupt input");
Robert Griebl94a6a952002-05-19 19:00:14 +0000233 return -1;
234 }
235
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000236 *--stackp = (unsigned char) finchar;
237 code = oldcode;
Robert Griebl94a6a952002-05-19 19:00:14 +0000238 }
239
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000240 /* Generate output characters in reverse order */
241 while ((long int) code >= (long int) 256) {
242 *--stackp = tab_suffixof(code);
243 code = tab_prefixof(code);
Robert Griebl94a6a952002-05-19 19:00:14 +0000244 }
245
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000246 *--stackp = (unsigned char) (finchar = tab_suffixof(code));
Robert Griebl94a6a952002-05-19 19:00:14 +0000247
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000248 /* And put them out in forward order */
Robert Griebl94a6a952002-05-19 19:00:14 +0000249 {
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000250 int i;
Robert Griebl94a6a952002-05-19 19:00:14 +0000251
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000252 if (outpos + (i = (de_stack - stackp)) >= OBUFSIZ) {
253 do {
254 if (i > OBUFSIZ - outpos) {
255 i = OBUFSIZ - outpos;
256 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000257
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000258 if (i > 0) {
259 memcpy(outbuf + outpos, stackp, i);
Robert Griebl94a6a952002-05-19 19:00:14 +0000260 outpos += i;
261 }
262
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000263 if (outpos >= OBUFSIZ) {
Glenn L McGrath2e41d0c2002-09-27 06:46:02 +0000264 write(fd_out, outbuf, outpos);
Robert Griebl94a6a952002-05-19 19:00:14 +0000265 outpos = 0;
266 }
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000267 stackp += i;
268 } while ((i = (de_stack - stackp)) > 0);
269 } else {
270 memcpy(outbuf + outpos, stackp, i);
Robert Griebl94a6a952002-05-19 19:00:14 +0000271 outpos += i;
272 }
273 }
274
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000275 /* Generate the new entry. */
276 if ((code = free_ent) < maxmaxcode) {
277 tab_prefixof(code) = (unsigned short) oldcode;
278 tab_suffixof(code) = (unsigned char) finchar;
279 free_ent = code + 1;
280 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000281
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000282 /* Remember previous code. */
283 oldcode = incode;
Robert Griebl94a6a952002-05-19 19:00:14 +0000284 }
285
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000286 } while (rsize > 0);
Robert Griebl94a6a952002-05-19 19:00:14 +0000287
Glenn L McGrath2e41d0c2002-09-27 06:46:02 +0000288 if (outpos > 0) {
289 write(fd_out, outbuf, outpos);
290 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000291
292 return 0;
293}