Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 1 | #include "config.h" |
| 2 | #include "libbb.h" |
| 3 | |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 4 | /* uncompress for busybox -- (c) 2002 Robert Griebl |
| 5 | * |
Eric Andersen | c7bda1c | 2004-03-15 08:29:22 +0000 | [diff] [blame^] | 6 | * based on the original compress42.c source |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 7 | * (see disclaimer below) |
| 8 | */ |
| 9 | |
| 10 | |
| 11 | /* (N)compress42.c - File compression ala IEEE Computer, Mar 1992. |
| 12 | * |
| 13 | * Authors: |
| 14 | * Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) |
| 15 | * Jim McKie (decvax!mcvax!jim) |
| 16 | * Steve Davies (decvax!vax135!petsd!peora!srd) |
| 17 | * Ken Turkowski (decvax!decwrl!turtlevax!ken) |
| 18 | * James A. Woods (decvax!ihnp4!ames!jaw) |
| 19 | * Joe Orost (decvax!vax135!petsd!joe) |
| 20 | * Dave Mack (csu@alembic.acs.com) |
| 21 | * Peter Jannesen, Network Communication Systems |
| 22 | * (peter@ncs.nl) |
| 23 | * |
| 24 | * marc@suse.de : a small security fix for a buffer overflow |
| 25 | * |
| 26 | * [... History snipped ...] |
| 27 | * |
| 28 | */ |
Glenn L McGrath | 2e41d0c | 2002-09-27 06:46:02 +0000 | [diff] [blame] | 29 | #include <stdio.h> |
| 30 | #include <string.h> |
| 31 | #include <unistd.h> |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 32 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 33 | /* Defailt input buffer size */ |
| 34 | #define IBUFSIZ 2048 |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 35 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 36 | /* Default output buffer size */ |
| 37 | #define OBUFSIZ 2048 |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 38 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 39 | /* Defines for third byte of header */ |
| 40 | #define MAGIC_1 (char_type)'\037' /* First byte of compressed file */ |
| 41 | #define MAGIC_2 (char_type)'\235' /* Second byte of compressed file */ |
| 42 | #define BIT_MASK 0x1f /* Mask for 'number of compresssion bits' */ |
| 43 | /* Masks 0x20 and 0x40 are free. */ |
| 44 | /* I think 0x20 should mean that there is */ |
| 45 | /* a fourth header byte (for expansion). */ |
| 46 | #define BLOCK_MODE 0x80 /* Block compresssion if table is full and */ |
| 47 | /* compression rate is dropping flush tables */ |
| 48 | /* the next two codes should not be changed lightly, as they must not */ |
| 49 | /* lie within the contiguous general code space. */ |
| 50 | #define FIRST 257 /* first free entry */ |
| 51 | #define CLEAR 256 /* table clear output code */ |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 52 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 53 | #define INIT_BITS 9 /* initial number of bits/code */ |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 54 | |
| 55 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 56 | /* machine variants which require cc -Dmachine: pdp11, z8000, DOS */ |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 57 | #define FAST |
| 58 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 59 | #define HBITS 17 /* 50% occupancy */ |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 60 | #define HSIZE (1<<HBITS) |
| 61 | #define HMASK (HSIZE-1) |
| 62 | #define HPRIME 9941 |
| 63 | #define BITS 16 |
| 64 | #undef MAXSEG_64K |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 65 | #define MAXCODE(n) (1L << (n)) |
| 66 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 67 | /* Block compress mode -C compatible with 2.0 */ |
| 68 | int block_mode = BLOCK_MODE; |
| 69 | |
| 70 | /* user settable max # bits/code */ |
| 71 | int maxbits = BITS; |
| 72 | |
| 73 | /* Exitcode of compress (-1 no file compressed) */ |
| 74 | int exit_code = -1; |
| 75 | |
| 76 | /* Input buffer */ |
| 77 | unsigned char inbuf[IBUFSIZ + 64]; |
| 78 | |
| 79 | /* Output buffer */ |
| 80 | unsigned char outbuf[OBUFSIZ + 2048]; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 81 | |
| 82 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 83 | long int htab[HSIZE]; |
| 84 | unsigned short codetab[HSIZE]; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 85 | |
| 86 | #define htabof(i) htab[i] |
| 87 | #define codetabof(i) codetab[i] |
| 88 | #define tab_prefixof(i) codetabof(i) |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 89 | #define tab_suffixof(i) ((unsigned char *)(htab))[i] |
| 90 | #define de_stack ((unsigned char *)&(htab[HSIZE-1])) |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 91 | #define clear_htab() memset(htab, -1, sizeof(htab)) |
| 92 | #define clear_tab_prefixof() memset(codetab, 0, 256); |
| 93 | |
| 94 | |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 95 | /* |
| 96 | * Decompress stdin to stdout. This routine adapts to the codes in the |
| 97 | * file building the "string" table on-the-fly; requiring no table to |
| 98 | * be stored in the compressed file. The tables used herein are shared |
| 99 | * with those of the compress() routine. See the definitions above. |
| 100 | */ |
| 101 | |
Glenn L McGrath | 2e41d0c | 2002-09-27 06:46:02 +0000 | [diff] [blame] | 102 | extern int uncompress(int fd_in, int fd_out) |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 103 | { |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 104 | unsigned char *stackp; |
| 105 | long int code; |
| 106 | int finchar; |
| 107 | long int oldcode; |
| 108 | long int incode; |
| 109 | int inbits; |
| 110 | int posbits; |
| 111 | int outpos; |
| 112 | int insize; |
| 113 | int bitmask; |
| 114 | long int free_ent; |
| 115 | long int maxcode; |
| 116 | long int maxmaxcode; |
| 117 | int n_bits; |
| 118 | int rsize = 0; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 119 | |
| 120 | insize = 0; |
| 121 | |
Manuel Novoa III | cad5364 | 2003-03-19 09:13:01 +0000 | [diff] [blame] | 122 | inbuf[0] = bb_xread_char(fd_in); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 123 | |
| 124 | maxbits = inbuf[0] & BIT_MASK; |
| 125 | block_mode = inbuf[0] & BLOCK_MODE; |
| 126 | maxmaxcode = MAXCODE(maxbits); |
| 127 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 128 | if (maxbits > BITS) { |
Manuel Novoa III | cad5364 | 2003-03-19 09:13:01 +0000 | [diff] [blame] | 129 | bb_error_msg("compressed with %d bits, can only handle %d bits", maxbits, |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 130 | BITS); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 131 | return -1; |
| 132 | } |
| 133 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 134 | maxcode = MAXCODE(n_bits = INIT_BITS) - 1; |
| 135 | bitmask = (1 << n_bits) - 1; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 136 | oldcode = -1; |
| 137 | finchar = 0; |
| 138 | outpos = 0; |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 139 | posbits = 0 << 3; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 140 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 141 | free_ent = ((block_mode) ? FIRST : 256); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 142 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 143 | /* As above, initialize the first 256 entries in the table. */ |
| 144 | clear_tab_prefixof(); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 145 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 146 | for (code = 255; code >= 0; --code) { |
| 147 | tab_suffixof(code) = (unsigned char) code; |
| 148 | } |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 149 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 150 | do { |
| 151 | resetbuf:; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 152 | { |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 153 | int i; |
| 154 | int e; |
| 155 | int o; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 156 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 157 | e = insize - (o = (posbits >> 3)); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 158 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 159 | for (i = 0; i < e; ++i) |
| 160 | inbuf[i] = inbuf[i + o]; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 161 | |
| 162 | insize = e; |
| 163 | posbits = 0; |
| 164 | } |
| 165 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 166 | if (insize < (int) sizeof(inbuf) - IBUFSIZ) { |
Glenn L McGrath | 1a2d75f | 2003-11-21 22:17:28 +0000 | [diff] [blame] | 167 | rsize = safe_read(fd_in, inbuf + insize, IBUFSIZ); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 168 | insize += rsize; |
| 169 | } |
| 170 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 171 | inbits = ((rsize > 0) ? (insize - insize % n_bits) << 3 : |
| 172 | (insize << 3) - (n_bits - 1)); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 173 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 174 | while (inbits > posbits) { |
| 175 | if (free_ent > maxcode) { |
| 176 | posbits = |
| 177 | ((posbits - 1) + |
| 178 | ((n_bits << 3) - |
| 179 | (posbits - 1 + (n_bits << 3)) % (n_bits << 3))); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 180 | ++n_bits; |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 181 | if (n_bits == maxbits) { |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 182 | maxcode = maxmaxcode; |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 183 | } else { |
| 184 | maxcode = MAXCODE(n_bits) - 1; |
| 185 | } |
| 186 | bitmask = (1 << n_bits) - 1; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 187 | goto resetbuf; |
| 188 | } |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 189 | { |
| 190 | unsigned char *p = &inbuf[posbits >> 3]; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 191 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 192 | code = |
| 193 | ((((long) (p[0])) | ((long) (p[1]) << 8) | |
| 194 | ((long) (p[2]) << 16)) >> (posbits & 0x7)) & bitmask; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 195 | } |
| 196 | posbits += n_bits; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 197 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 198 | |
| 199 | if (oldcode == -1) { |
| 200 | outbuf[outpos++] = (unsigned char) (finchar = |
| 201 | (int) (oldcode = code)); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 202 | continue; |
| 203 | } |
| 204 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 205 | if (code == CLEAR && block_mode) { |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 206 | clear_tab_prefixof(); |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 207 | free_ent = FIRST - 1; |
| 208 | posbits = |
| 209 | ((posbits - 1) + |
| 210 | ((n_bits << 3) - |
| 211 | (posbits - 1 + (n_bits << 3)) % (n_bits << 3))); |
| 212 | maxcode = MAXCODE(n_bits = INIT_BITS) - 1; |
| 213 | bitmask = (1 << n_bits) - 1; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 214 | goto resetbuf; |
| 215 | } |
| 216 | |
| 217 | incode = code; |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 218 | stackp = de_stack; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 219 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 220 | /* Special case for KwKwK string. */ |
| 221 | if (code >= free_ent) { |
| 222 | if (code > free_ent) { |
| 223 | unsigned char *p; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 224 | |
| 225 | posbits -= n_bits; |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 226 | p = &inbuf[posbits >> 3]; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 227 | |
Manuel Novoa III | cad5364 | 2003-03-19 09:13:01 +0000 | [diff] [blame] | 228 | bb_error_msg |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 229 | ("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)", |
| 230 | insize, posbits, p[-1], p[0], p[1], p[2], p[3], |
| 231 | (posbits & 07)); |
Manuel Novoa III | cad5364 | 2003-03-19 09:13:01 +0000 | [diff] [blame] | 232 | bb_error_msg("uncompress: corrupt input"); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 233 | return -1; |
| 234 | } |
| 235 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 236 | *--stackp = (unsigned char) finchar; |
| 237 | code = oldcode; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 238 | } |
| 239 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 240 | /* Generate output characters in reverse order */ |
| 241 | while ((long int) code >= (long int) 256) { |
| 242 | *--stackp = tab_suffixof(code); |
| 243 | code = tab_prefixof(code); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 244 | } |
| 245 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 246 | *--stackp = (unsigned char) (finchar = tab_suffixof(code)); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 247 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 248 | /* And put them out in forward order */ |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 249 | { |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 250 | int i; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 251 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 252 | if (outpos + (i = (de_stack - stackp)) >= OBUFSIZ) { |
| 253 | do { |
| 254 | if (i > OBUFSIZ - outpos) { |
| 255 | i = OBUFSIZ - outpos; |
| 256 | } |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 257 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 258 | if (i > 0) { |
| 259 | memcpy(outbuf + outpos, stackp, i); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 260 | outpos += i; |
| 261 | } |
| 262 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 263 | if (outpos >= OBUFSIZ) { |
Glenn L McGrath | 2e41d0c | 2002-09-27 06:46:02 +0000 | [diff] [blame] | 264 | write(fd_out, outbuf, outpos); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 265 | outpos = 0; |
| 266 | } |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 267 | stackp += i; |
| 268 | } while ((i = (de_stack - stackp)) > 0); |
| 269 | } else { |
| 270 | memcpy(outbuf + outpos, stackp, i); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 271 | outpos += i; |
| 272 | } |
| 273 | } |
| 274 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 275 | /* Generate the new entry. */ |
| 276 | if ((code = free_ent) < maxmaxcode) { |
| 277 | tab_prefixof(code) = (unsigned short) oldcode; |
| 278 | tab_suffixof(code) = (unsigned char) finchar; |
| 279 | free_ent = code + 1; |
| 280 | } |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 281 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 282 | /* Remember previous code. */ |
| 283 | oldcode = incode; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 284 | } |
| 285 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 286 | } while (rsize > 0); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 287 | |
Glenn L McGrath | 2e41d0c | 2002-09-27 06:46:02 +0000 | [diff] [blame] | 288 | if (outpos > 0) { |
| 289 | write(fd_out, outbuf, outpos); |
| 290 | } |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 291 | |
| 292 | return 0; |
| 293 | } |