blob: 1e859dcde8bc388dbf5d2c8bf6ff308e9c2a8b12 [file] [log] [blame]
Robert Griebl94a6a952002-05-19 19:00:14 +00001#include "libbb.h"
2
Robert Griebl94a6a952002-05-19 19:00:14 +00003/* uncompress for busybox -- (c) 2002 Robert Griebl
4 *
Eric Andersenc7bda1c2004-03-15 08:29:22 +00005 * based on the original compress42.c source
Robert Griebl94a6a952002-05-19 19:00:14 +00006 * (see disclaimer below)
7 */
8
9
10/* (N)compress42.c - File compression ala IEEE Computer, Mar 1992.
11 *
12 * Authors:
13 * Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
14 * Jim McKie (decvax!mcvax!jim)
15 * Steve Davies (decvax!vax135!petsd!peora!srd)
16 * Ken Turkowski (decvax!decwrl!turtlevax!ken)
17 * James A. Woods (decvax!ihnp4!ames!jaw)
18 * Joe Orost (decvax!vax135!petsd!joe)
19 * Dave Mack (csu@alembic.acs.com)
20 * Peter Jannesen, Network Communication Systems
21 * (peter@ncs.nl)
22 *
23 * marc@suse.de : a small security fix for a buffer overflow
24 *
25 * [... History snipped ...]
26 *
27 */
Glenn L McGrath2e41d0c2002-09-27 06:46:02 +000028#include <stdio.h>
29#include <string.h>
30#include <unistd.h>
Robert Griebl94a6a952002-05-19 19:00:14 +000031
Eric Andersenaff114c2004-04-14 17:51:38 +000032/* Default input buffer size */
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000033#define IBUFSIZ 2048
Robert Griebl94a6a952002-05-19 19:00:14 +000034
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000035/* Default output buffer size */
36#define OBUFSIZ 2048
Robert Griebl94a6a952002-05-19 19:00:14 +000037
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000038/* Defines for third byte of header */
39#define MAGIC_1 (char_type)'\037' /* First byte of compressed file */
40#define MAGIC_2 (char_type)'\235' /* Second byte of compressed file */
41#define BIT_MASK 0x1f /* Mask for 'number of compresssion bits' */
42 /* Masks 0x20 and 0x40 are free. */
43 /* I think 0x20 should mean that there is */
44 /* a fourth header byte (for expansion). */
45#define BLOCK_MODE 0x80 /* Block compresssion if table is full and */
46 /* compression rate is dropping flush tables */
47 /* the next two codes should not be changed lightly, as they must not */
48 /* lie within the contiguous general code space. */
49#define FIRST 257 /* first free entry */
50#define CLEAR 256 /* table clear output code */
Robert Griebl94a6a952002-05-19 19:00:14 +000051
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000052#define INIT_BITS 9 /* initial number of bits/code */
Robert Griebl94a6a952002-05-19 19:00:14 +000053
54
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000055/* machine variants which require cc -Dmachine: pdp11, z8000, DOS */
Robert Griebl94a6a952002-05-19 19:00:14 +000056#define FAST
57
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000058#define HBITS 17 /* 50% occupancy */
Robert Griebl94a6a952002-05-19 19:00:14 +000059#define HSIZE (1<<HBITS)
60#define HMASK (HSIZE-1)
61#define HPRIME 9941
62#define BITS 16
63#undef MAXSEG_64K
Robert Griebl94a6a952002-05-19 19:00:14 +000064#define MAXCODE(n) (1L << (n))
65
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000066/* Block compress mode -C compatible with 2.0 */
Eric Andersen14f5c8d2005-04-16 19:39:00 +000067static int block_mode = BLOCK_MODE;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000068
69/* user settable max # bits/code */
Eric Andersen14f5c8d2005-04-16 19:39:00 +000070static int maxbits = BITS;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000071
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000072/* Input buffer */
Eric Andersen14f5c8d2005-04-16 19:39:00 +000073static unsigned char inbuf[IBUFSIZ + 64];
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000074
75/* Output buffer */
Eric Andersen14f5c8d2005-04-16 19:39:00 +000076static unsigned char outbuf[OBUFSIZ + 2048];
Robert Griebl94a6a952002-05-19 19:00:14 +000077
78
Rob Landley2ad1e7c2006-05-11 15:23:23 +000079static unsigned char htab[HSIZE];
Eric Andersen14f5c8d2005-04-16 19:39:00 +000080static unsigned short codetab[HSIZE];
Robert Griebl94a6a952002-05-19 19:00:14 +000081
82#define htabof(i) htab[i]
83#define codetabof(i) codetab[i]
84#define tab_prefixof(i) codetabof(i)
Glenn L McGrathfedbfe42002-11-28 09:09:47 +000085#define tab_suffixof(i) ((unsigned char *)(htab))[i]
86#define de_stack ((unsigned char *)&(htab[HSIZE-1]))
Robert Griebl94a6a952002-05-19 19:00:14 +000087#define clear_htab() memset(htab, -1, sizeof(htab))
88#define clear_tab_prefixof() memset(codetab, 0, 256);
89
90
Robert Griebl94a6a952002-05-19 19:00:14 +000091/*
92 * Decompress stdin to stdout. This routine adapts to the codes in the
93 * file building the "string" table on-the-fly; requiring no table to
94 * be stored in the compressed file. The tables used herein are shared
95 * with those of the compress() routine. See the definitions above.
96 */
97
Rob Landleydfba7412006-03-06 20:47:33 +000098int uncompress(int fd_in, int fd_out)
Robert Griebl94a6a952002-05-19 19:00:14 +000099{
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000100 unsigned char *stackp;
101 long int code;
102 int finchar;
103 long int oldcode;
104 long int incode;
105 int inbits;
106 int posbits;
107 int outpos;
108 int insize;
109 int bitmask;
110 long int free_ent;
111 long int maxcode;
112 long int maxmaxcode;
113 int n_bits;
114 int rsize = 0;
Robert Griebl94a6a952002-05-19 19:00:14 +0000115
116 insize = 0;
117
Manuel Novoa III cad53642003-03-19 09:13:01 +0000118 inbuf[0] = bb_xread_char(fd_in);
Robert Griebl94a6a952002-05-19 19:00:14 +0000119
120 maxbits = inbuf[0] & BIT_MASK;
121 block_mode = inbuf[0] & BLOCK_MODE;
122 maxmaxcode = MAXCODE(maxbits);
123
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000124 if (maxbits > BITS) {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000125 bb_error_msg("compressed with %d bits, can only handle %d bits", maxbits,
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000126 BITS);
Robert Griebl94a6a952002-05-19 19:00:14 +0000127 return -1;
128 }
129
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000130 maxcode = MAXCODE(n_bits = INIT_BITS) - 1;
131 bitmask = (1 << n_bits) - 1;
Robert Griebl94a6a952002-05-19 19:00:14 +0000132 oldcode = -1;
133 finchar = 0;
134 outpos = 0;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000135 posbits = 0 << 3;
Robert Griebl94a6a952002-05-19 19:00:14 +0000136
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000137 free_ent = ((block_mode) ? FIRST : 256);
Robert Griebl94a6a952002-05-19 19:00:14 +0000138
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000139 /* As above, initialize the first 256 entries in the table. */
140 clear_tab_prefixof();
Robert Griebl94a6a952002-05-19 19:00:14 +0000141
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000142 for (code = 255; code >= 0; --code) {
143 tab_suffixof(code) = (unsigned char) code;
144 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000145
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000146 do {
147 resetbuf:;
Robert Griebl94a6a952002-05-19 19:00:14 +0000148 {
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000149 int i;
150 int e;
151 int o;
Robert Griebl94a6a952002-05-19 19:00:14 +0000152
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000153 e = insize - (o = (posbits >> 3));
Robert Griebl94a6a952002-05-19 19:00:14 +0000154
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000155 for (i = 0; i < e; ++i)
156 inbuf[i] = inbuf[i + o];
Robert Griebl94a6a952002-05-19 19:00:14 +0000157
158 insize = e;
159 posbits = 0;
160 }
161
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000162 if (insize < (int) sizeof(inbuf) - IBUFSIZ) {
Glenn L McGrath1a2d75f2003-11-21 22:17:28 +0000163 rsize = safe_read(fd_in, inbuf + insize, IBUFSIZ);
Robert Griebl94a6a952002-05-19 19:00:14 +0000164 insize += rsize;
165 }
166
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000167 inbits = ((rsize > 0) ? (insize - insize % n_bits) << 3 :
168 (insize << 3) - (n_bits - 1));
Robert Griebl94a6a952002-05-19 19:00:14 +0000169
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000170 while (inbits > posbits) {
171 if (free_ent > maxcode) {
172 posbits =
173 ((posbits - 1) +
174 ((n_bits << 3) -
175 (posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
Robert Griebl94a6a952002-05-19 19:00:14 +0000176 ++n_bits;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000177 if (n_bits == maxbits) {
Robert Griebl94a6a952002-05-19 19:00:14 +0000178 maxcode = maxmaxcode;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000179 } else {
180 maxcode = MAXCODE(n_bits) - 1;
181 }
182 bitmask = (1 << n_bits) - 1;
Robert Griebl94a6a952002-05-19 19:00:14 +0000183 goto resetbuf;
184 }
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000185 {
186 unsigned char *p = &inbuf[posbits >> 3];
Robert Griebl94a6a952002-05-19 19:00:14 +0000187
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000188 code =
189 ((((long) (p[0])) | ((long) (p[1]) << 8) |
190 ((long) (p[2]) << 16)) >> (posbits & 0x7)) & bitmask;
Robert Griebl94a6a952002-05-19 19:00:14 +0000191 }
192 posbits += n_bits;
Robert Griebl94a6a952002-05-19 19:00:14 +0000193
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000194
195 if (oldcode == -1) {
196 outbuf[outpos++] = (unsigned char) (finchar =
197 (int) (oldcode = code));
Robert Griebl94a6a952002-05-19 19:00:14 +0000198 continue;
199 }
200
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000201 if (code == CLEAR && block_mode) {
Robert Griebl94a6a952002-05-19 19:00:14 +0000202 clear_tab_prefixof();
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000203 free_ent = FIRST - 1;
204 posbits =
205 ((posbits - 1) +
206 ((n_bits << 3) -
207 (posbits - 1 + (n_bits << 3)) % (n_bits << 3)));
208 maxcode = MAXCODE(n_bits = INIT_BITS) - 1;
209 bitmask = (1 << n_bits) - 1;
Robert Griebl94a6a952002-05-19 19:00:14 +0000210 goto resetbuf;
211 }
212
213 incode = code;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000214 stackp = de_stack;
Robert Griebl94a6a952002-05-19 19:00:14 +0000215
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000216 /* Special case for KwKwK string. */
217 if (code >= free_ent) {
218 if (code > free_ent) {
219 unsigned char *p;
Robert Griebl94a6a952002-05-19 19:00:14 +0000220
221 posbits -= n_bits;
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000222 p = &inbuf[posbits >> 3];
Robert Griebl94a6a952002-05-19 19:00:14 +0000223
Manuel Novoa III cad53642003-03-19 09:13:01 +0000224 bb_error_msg
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000225 ("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)",
226 insize, posbits, p[-1], p[0], p[1], p[2], p[3],
227 (posbits & 07));
Manuel Novoa III cad53642003-03-19 09:13:01 +0000228 bb_error_msg("uncompress: corrupt input");
Robert Griebl94a6a952002-05-19 19:00:14 +0000229 return -1;
230 }
231
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000232 *--stackp = (unsigned char) finchar;
233 code = oldcode;
Robert Griebl94a6a952002-05-19 19:00:14 +0000234 }
235
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000236 /* Generate output characters in reverse order */
237 while ((long int) code >= (long int) 256) {
238 *--stackp = tab_suffixof(code);
239 code = tab_prefixof(code);
Robert Griebl94a6a952002-05-19 19:00:14 +0000240 }
241
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000242 *--stackp = (unsigned char) (finchar = tab_suffixof(code));
Robert Griebl94a6a952002-05-19 19:00:14 +0000243
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000244 /* And put them out in forward order */
Robert Griebl94a6a952002-05-19 19:00:14 +0000245 {
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000246 int i;
Robert Griebl94a6a952002-05-19 19:00:14 +0000247
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000248 if (outpos + (i = (de_stack - stackp)) >= OBUFSIZ) {
249 do {
250 if (i > OBUFSIZ - outpos) {
251 i = OBUFSIZ - outpos;
252 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000253
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000254 if (i > 0) {
255 memcpy(outbuf + outpos, stackp, i);
Robert Griebl94a6a952002-05-19 19:00:14 +0000256 outpos += i;
257 }
258
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000259 if (outpos >= OBUFSIZ) {
Glenn L McGrath2e41d0c2002-09-27 06:46:02 +0000260 write(fd_out, outbuf, outpos);
Robert Griebl94a6a952002-05-19 19:00:14 +0000261 outpos = 0;
262 }
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000263 stackp += i;
264 } while ((i = (de_stack - stackp)) > 0);
265 } else {
266 memcpy(outbuf + outpos, stackp, i);
Robert Griebl94a6a952002-05-19 19:00:14 +0000267 outpos += i;
268 }
269 }
270
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000271 /* Generate the new entry. */
272 if ((code = free_ent) < maxmaxcode) {
273 tab_prefixof(code) = (unsigned short) oldcode;
274 tab_suffixof(code) = (unsigned char) finchar;
275 free_ent = code + 1;
276 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000277
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000278 /* Remember previous code. */
279 oldcode = incode;
Robert Griebl94a6a952002-05-19 19:00:14 +0000280 }
281
Glenn L McGrathfedbfe42002-11-28 09:09:47 +0000282 } while (rsize > 0);
Robert Griebl94a6a952002-05-19 19:00:14 +0000283
Glenn L McGrath2e41d0c2002-09-27 06:46:02 +0000284 if (outpos > 0) {
285 write(fd_out, outbuf, outpos);
286 }
Robert Griebl94a6a952002-05-19 19:00:14 +0000287
288 return 0;
289}