Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 1 | #include "config.h" |
| 2 | #include "libbb.h" |
| 3 | |
Glenn L McGrath | 2fc54a9 | 2002-11-03 12:50:33 +0000 | [diff] [blame] | 4 | #if defined CONFIG_UNCOMPRESS || defined CONFIG_FEATURE_GUNZIP_UNCOMPRESS |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 5 | |
| 6 | /* uncompress for busybox -- (c) 2002 Robert Griebl |
| 7 | * |
| 8 | * based on the original compress42.c source |
| 9 | * (see disclaimer below) |
| 10 | */ |
| 11 | |
| 12 | |
| 13 | /* (N)compress42.c - File compression ala IEEE Computer, Mar 1992. |
| 14 | * |
| 15 | * Authors: |
| 16 | * Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) |
| 17 | * Jim McKie (decvax!mcvax!jim) |
| 18 | * Steve Davies (decvax!vax135!petsd!peora!srd) |
| 19 | * Ken Turkowski (decvax!decwrl!turtlevax!ken) |
| 20 | * James A. Woods (decvax!ihnp4!ames!jaw) |
| 21 | * Joe Orost (decvax!vax135!petsd!joe) |
| 22 | * Dave Mack (csu@alembic.acs.com) |
| 23 | * Peter Jannesen, Network Communication Systems |
| 24 | * (peter@ncs.nl) |
| 25 | * |
| 26 | * marc@suse.de : a small security fix for a buffer overflow |
| 27 | * |
| 28 | * [... History snipped ...] |
| 29 | * |
| 30 | */ |
Glenn L McGrath | 2e41d0c | 2002-09-27 06:46:02 +0000 | [diff] [blame] | 31 | #include <stdio.h> |
| 32 | #include <string.h> |
| 33 | #include <unistd.h> |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 34 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 35 | /* Defailt input buffer size */ |
| 36 | #define IBUFSIZ 2048 |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 37 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 38 | /* Default output buffer size */ |
| 39 | #define OBUFSIZ 2048 |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 40 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 41 | /* Defines for third byte of header */ |
| 42 | #define MAGIC_1 (char_type)'\037' /* First byte of compressed file */ |
| 43 | #define MAGIC_2 (char_type)'\235' /* Second byte of compressed file */ |
| 44 | #define BIT_MASK 0x1f /* Mask for 'number of compresssion bits' */ |
| 45 | /* Masks 0x20 and 0x40 are free. */ |
| 46 | /* I think 0x20 should mean that there is */ |
| 47 | /* a fourth header byte (for expansion). */ |
| 48 | #define BLOCK_MODE 0x80 /* Block compresssion if table is full and */ |
| 49 | /* compression rate is dropping flush tables */ |
| 50 | /* the next two codes should not be changed lightly, as they must not */ |
| 51 | /* lie within the contiguous general code space. */ |
| 52 | #define FIRST 257 /* first free entry */ |
| 53 | #define CLEAR 256 /* table clear output code */ |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 54 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 55 | #define INIT_BITS 9 /* initial number of bits/code */ |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 56 | |
| 57 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 58 | /* machine variants which require cc -Dmachine: pdp11, z8000, DOS */ |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 59 | #define FAST |
| 60 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 61 | #define HBITS 17 /* 50% occupancy */ |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 62 | #define HSIZE (1<<HBITS) |
| 63 | #define HMASK (HSIZE-1) |
| 64 | #define HPRIME 9941 |
| 65 | #define BITS 16 |
| 66 | #undef MAXSEG_64K |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 67 | #define MAXCODE(n) (1L << (n)) |
| 68 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 69 | /* Block compress mode -C compatible with 2.0 */ |
| 70 | int block_mode = BLOCK_MODE; |
| 71 | |
| 72 | /* user settable max # bits/code */ |
| 73 | int maxbits = BITS; |
| 74 | |
| 75 | /* Exitcode of compress (-1 no file compressed) */ |
| 76 | int exit_code = -1; |
| 77 | |
| 78 | /* Input buffer */ |
| 79 | unsigned char inbuf[IBUFSIZ + 64]; |
| 80 | |
| 81 | /* Output buffer */ |
| 82 | unsigned char outbuf[OBUFSIZ + 2048]; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 83 | |
| 84 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 85 | long int htab[HSIZE]; |
| 86 | unsigned short codetab[HSIZE]; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 87 | |
| 88 | #define htabof(i) htab[i] |
| 89 | #define codetabof(i) codetab[i] |
| 90 | #define tab_prefixof(i) codetabof(i) |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 91 | #define tab_suffixof(i) ((unsigned char *)(htab))[i] |
| 92 | #define de_stack ((unsigned char *)&(htab[HSIZE-1])) |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 93 | #define clear_htab() memset(htab, -1, sizeof(htab)) |
| 94 | #define clear_tab_prefixof() memset(codetab, 0, 256); |
| 95 | |
| 96 | |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 97 | /* |
| 98 | * Decompress stdin to stdout. This routine adapts to the codes in the |
| 99 | * file building the "string" table on-the-fly; requiring no table to |
| 100 | * be stored in the compressed file. The tables used herein are shared |
| 101 | * with those of the compress() routine. See the definitions above. |
| 102 | */ |
| 103 | |
Glenn L McGrath | 2e41d0c | 2002-09-27 06:46:02 +0000 | [diff] [blame] | 104 | extern int uncompress(int fd_in, int fd_out) |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 105 | { |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 106 | unsigned char *stackp; |
| 107 | long int code; |
| 108 | int finchar; |
| 109 | long int oldcode; |
| 110 | long int incode; |
| 111 | int inbits; |
| 112 | int posbits; |
| 113 | int outpos; |
| 114 | int insize; |
| 115 | int bitmask; |
| 116 | long int free_ent; |
| 117 | long int maxcode; |
| 118 | long int maxmaxcode; |
| 119 | int n_bits; |
| 120 | int rsize = 0; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 121 | |
| 122 | insize = 0; |
| 123 | |
Manuel Novoa III | cad5364 | 2003-03-19 09:13:01 +0000 | [diff] [blame] | 124 | inbuf[0] = bb_xread_char(fd_in); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 125 | |
| 126 | maxbits = inbuf[0] & BIT_MASK; |
| 127 | block_mode = inbuf[0] & BLOCK_MODE; |
| 128 | maxmaxcode = MAXCODE(maxbits); |
| 129 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 130 | if (maxbits > BITS) { |
Manuel Novoa III | cad5364 | 2003-03-19 09:13:01 +0000 | [diff] [blame] | 131 | bb_error_msg("compressed with %d bits, can only handle %d bits", maxbits, |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 132 | BITS); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 133 | return -1; |
| 134 | } |
| 135 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 136 | maxcode = MAXCODE(n_bits = INIT_BITS) - 1; |
| 137 | bitmask = (1 << n_bits) - 1; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 138 | oldcode = -1; |
| 139 | finchar = 0; |
| 140 | outpos = 0; |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 141 | posbits = 0 << 3; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 142 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 143 | free_ent = ((block_mode) ? FIRST : 256); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 144 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 145 | /* As above, initialize the first 256 entries in the table. */ |
| 146 | clear_tab_prefixof(); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 147 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 148 | for (code = 255; code >= 0; --code) { |
| 149 | tab_suffixof(code) = (unsigned char) code; |
| 150 | } |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 151 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 152 | do { |
| 153 | resetbuf:; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 154 | { |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 155 | int i; |
| 156 | int e; |
| 157 | int o; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 158 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 159 | e = insize - (o = (posbits >> 3)); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 160 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 161 | for (i = 0; i < e; ++i) |
| 162 | inbuf[i] = inbuf[i + o]; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 163 | |
| 164 | insize = e; |
| 165 | posbits = 0; |
| 166 | } |
| 167 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 168 | if (insize < (int) sizeof(inbuf) - IBUFSIZ) { |
| 169 | rsize = read(fd_in, inbuf + insize, IBUFSIZ); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 170 | insize += rsize; |
| 171 | } |
| 172 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 173 | inbits = ((rsize > 0) ? (insize - insize % n_bits) << 3 : |
| 174 | (insize << 3) - (n_bits - 1)); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 175 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 176 | while (inbits > posbits) { |
| 177 | if (free_ent > maxcode) { |
| 178 | posbits = |
| 179 | ((posbits - 1) + |
| 180 | ((n_bits << 3) - |
| 181 | (posbits - 1 + (n_bits << 3)) % (n_bits << 3))); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 182 | ++n_bits; |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 183 | if (n_bits == maxbits) { |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 184 | maxcode = maxmaxcode; |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 185 | } else { |
| 186 | maxcode = MAXCODE(n_bits) - 1; |
| 187 | } |
| 188 | bitmask = (1 << n_bits) - 1; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 189 | goto resetbuf; |
| 190 | } |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 191 | { |
| 192 | unsigned char *p = &inbuf[posbits >> 3]; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 193 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 194 | code = |
| 195 | ((((long) (p[0])) | ((long) (p[1]) << 8) | |
| 196 | ((long) (p[2]) << 16)) >> (posbits & 0x7)) & bitmask; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 197 | } |
| 198 | posbits += n_bits; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 199 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 200 | |
| 201 | if (oldcode == -1) { |
| 202 | outbuf[outpos++] = (unsigned char) (finchar = |
| 203 | (int) (oldcode = code)); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 204 | continue; |
| 205 | } |
| 206 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 207 | if (code == CLEAR && block_mode) { |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 208 | clear_tab_prefixof(); |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 209 | free_ent = FIRST - 1; |
| 210 | posbits = |
| 211 | ((posbits - 1) + |
| 212 | ((n_bits << 3) - |
| 213 | (posbits - 1 + (n_bits << 3)) % (n_bits << 3))); |
| 214 | maxcode = MAXCODE(n_bits = INIT_BITS) - 1; |
| 215 | bitmask = (1 << n_bits) - 1; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 216 | goto resetbuf; |
| 217 | } |
| 218 | |
| 219 | incode = code; |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 220 | stackp = de_stack; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 221 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 222 | /* Special case for KwKwK string. */ |
| 223 | if (code >= free_ent) { |
| 224 | if (code > free_ent) { |
| 225 | unsigned char *p; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 226 | |
| 227 | posbits -= n_bits; |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 228 | p = &inbuf[posbits >> 3]; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 229 | |
Manuel Novoa III | cad5364 | 2003-03-19 09:13:01 +0000 | [diff] [blame] | 230 | bb_error_msg |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 231 | ("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)", |
| 232 | insize, posbits, p[-1], p[0], p[1], p[2], p[3], |
| 233 | (posbits & 07)); |
Manuel Novoa III | cad5364 | 2003-03-19 09:13:01 +0000 | [diff] [blame] | 234 | bb_error_msg("uncompress: corrupt input"); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 235 | return -1; |
| 236 | } |
| 237 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 238 | *--stackp = (unsigned char) finchar; |
| 239 | code = oldcode; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 240 | } |
| 241 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 242 | /* Generate output characters in reverse order */ |
| 243 | while ((long int) code >= (long int) 256) { |
| 244 | *--stackp = tab_suffixof(code); |
| 245 | code = tab_prefixof(code); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 246 | } |
| 247 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 248 | *--stackp = (unsigned char) (finchar = tab_suffixof(code)); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 249 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 250 | /* And put them out in forward order */ |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 251 | { |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 252 | int i; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 253 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 254 | if (outpos + (i = (de_stack - stackp)) >= OBUFSIZ) { |
| 255 | do { |
| 256 | if (i > OBUFSIZ - outpos) { |
| 257 | i = OBUFSIZ - outpos; |
| 258 | } |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 259 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 260 | if (i > 0) { |
| 261 | memcpy(outbuf + outpos, stackp, i); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 262 | outpos += i; |
| 263 | } |
| 264 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 265 | if (outpos >= OBUFSIZ) { |
Glenn L McGrath | 2e41d0c | 2002-09-27 06:46:02 +0000 | [diff] [blame] | 266 | write(fd_out, outbuf, outpos); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 267 | outpos = 0; |
| 268 | } |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 269 | stackp += i; |
| 270 | } while ((i = (de_stack - stackp)) > 0); |
| 271 | } else { |
| 272 | memcpy(outbuf + outpos, stackp, i); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 273 | outpos += i; |
| 274 | } |
| 275 | } |
| 276 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 277 | /* Generate the new entry. */ |
| 278 | if ((code = free_ent) < maxmaxcode) { |
| 279 | tab_prefixof(code) = (unsigned short) oldcode; |
| 280 | tab_suffixof(code) = (unsigned char) finchar; |
| 281 | free_ent = code + 1; |
| 282 | } |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 283 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 284 | /* Remember previous code. */ |
| 285 | oldcode = incode; |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 286 | } |
| 287 | |
Glenn L McGrath | fedbfe4 | 2002-11-28 09:09:47 +0000 | [diff] [blame] | 288 | } while (rsize > 0); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 289 | |
Glenn L McGrath | 2e41d0c | 2002-09-27 06:46:02 +0000 | [diff] [blame] | 290 | if (outpos > 0) { |
| 291 | write(fd_out, outbuf, outpos); |
| 292 | } |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 293 | |
| 294 | return 0; |
| 295 | } |
| 296 | |
| 297 | |
| 298 | #endif |