Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 1 | #include "config.h" |
| 2 | #include "libbb.h" |
| 3 | |
Glenn L McGrath | bf1cc8b | 2002-11-01 23:38:54 +0000 | [diff] [blame] | 4 | #ifdef CONFIG_UNCOMPRESS || defined CONFIG_FEATURE_GUNZIP_UNCOMPRESS |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 5 | |
| 6 | /* uncompress for busybox -- (c) 2002 Robert Griebl |
| 7 | * |
| 8 | * based on the original compress42.c source |
| 9 | * (see disclaimer below) |
| 10 | */ |
| 11 | |
| 12 | |
| 13 | /* (N)compress42.c - File compression ala IEEE Computer, Mar 1992. |
| 14 | * |
| 15 | * Authors: |
| 16 | * Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) |
| 17 | * Jim McKie (decvax!mcvax!jim) |
| 18 | * Steve Davies (decvax!vax135!petsd!peora!srd) |
| 19 | * Ken Turkowski (decvax!decwrl!turtlevax!ken) |
| 20 | * James A. Woods (decvax!ihnp4!ames!jaw) |
| 21 | * Joe Orost (decvax!vax135!petsd!joe) |
| 22 | * Dave Mack (csu@alembic.acs.com) |
| 23 | * Peter Jannesen, Network Communication Systems |
| 24 | * (peter@ncs.nl) |
| 25 | * |
| 26 | * marc@suse.de : a small security fix for a buffer overflow |
| 27 | * |
| 28 | * [... History snipped ...] |
| 29 | * |
| 30 | */ |
Glenn L McGrath | 2e41d0c | 2002-09-27 06:46:02 +0000 | [diff] [blame] | 31 | #include <stdio.h> |
| 32 | #include <string.h> |
| 33 | #include <unistd.h> |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 34 | |
| 35 | #define IBUFSIZ 2048 /* Defailt input buffer size */ |
| 36 | #define OBUFSIZ 2048 /* Default output buffer size */ |
| 37 | |
| 38 | /* Defines for third byte of header */ |
| 39 | #define MAGIC_1 (char_type)'\037'/* First byte of compressed file */ |
| 40 | #define MAGIC_2 (char_type)'\235'/* Second byte of compressed file */ |
| 41 | #define BIT_MASK 0x1f /* Mask for 'number of compresssion bits' */ |
| 42 | /* Masks 0x20 and 0x40 are free. */ |
| 43 | /* I think 0x20 should mean that there is */ |
| 44 | /* a fourth header byte (for expansion). */ |
| 45 | #define BLOCK_MODE 0x80 /* Block compresssion if table is full and */ |
| 46 | /* compression rate is dropping flush tables */ |
| 47 | |
| 48 | /* the next two codes should not be changed lightly, as they must not */ |
| 49 | /* lie within the contiguous general code space. */ |
| 50 | #define FIRST 257 /* first free entry */ |
| 51 | #define CLEAR 256 /* table clear output code */ |
| 52 | |
| 53 | #define INIT_BITS 9 /* initial number of bits/code */ |
| 54 | |
| 55 | |
| 56 | /* |
| 57 | * machine variants which require cc -Dmachine: pdp11, z8000, DOS |
| 58 | */ |
| 59 | #define FAST |
| 60 | |
| 61 | #define HBITS 17 /* 50% occupancy */ |
| 62 | #define HSIZE (1<<HBITS) |
| 63 | #define HMASK (HSIZE-1) |
| 64 | #define HPRIME 9941 |
| 65 | #define BITS 16 |
| 66 | #undef MAXSEG_64K |
| 67 | |
| 68 | typedef long int code_int; |
| 69 | |
| 70 | typedef long int count_int; |
| 71 | typedef long int cmp_code_int; |
| 72 | |
| 73 | typedef unsigned char char_type; |
| 74 | |
| 75 | #define MAXCODE(n) (1L << (n)) |
| 76 | |
| 77 | |
| 78 | |
| 79 | int block_mode = BLOCK_MODE;/* Block compress mode -C compatible with 2.0*/ |
| 80 | int maxbits = BITS; /* user settable max # bits/code */ |
| 81 | int exit_code = -1; /* Exitcode of compress (-1 no file compressed) */ |
| 82 | |
| 83 | char_type inbuf[IBUFSIZ+64]; /* Input buffer */ |
| 84 | char_type outbuf[OBUFSIZ+2048];/* Output buffer */ |
| 85 | |
| 86 | |
| 87 | count_int htab[HSIZE]; |
| 88 | unsigned short codetab[HSIZE]; |
| 89 | |
| 90 | #define htabof(i) htab[i] |
| 91 | #define codetabof(i) codetab[i] |
| 92 | #define tab_prefixof(i) codetabof(i) |
| 93 | #define tab_suffixof(i) ((char_type *)(htab))[i] |
| 94 | #define de_stack ((char_type *)&(htab[HSIZE-1])) |
| 95 | #define clear_htab() memset(htab, -1, sizeof(htab)) |
| 96 | #define clear_tab_prefixof() memset(codetab, 0, 256); |
| 97 | |
| 98 | |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 99 | /* |
| 100 | * Decompress stdin to stdout. This routine adapts to the codes in the |
| 101 | * file building the "string" table on-the-fly; requiring no table to |
| 102 | * be stored in the compressed file. The tables used herein are shared |
| 103 | * with those of the compress() routine. See the definitions above. |
| 104 | */ |
| 105 | |
Glenn L McGrath | 2e41d0c | 2002-09-27 06:46:02 +0000 | [diff] [blame] | 106 | extern int uncompress(int fd_in, int fd_out) |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 107 | { |
| 108 | char_type *stackp; |
| 109 | code_int code; |
| 110 | int finchar; |
| 111 | code_int oldcode; |
| 112 | code_int incode; |
| 113 | int inbits; |
| 114 | int posbits; |
| 115 | int outpos; |
| 116 | int insize; |
| 117 | int bitmask; |
| 118 | code_int free_ent; |
| 119 | code_int maxcode; |
| 120 | code_int maxmaxcode; |
| 121 | int n_bits; |
| 122 | int rsize = 0; |
| 123 | |
| 124 | insize = 0; |
| 125 | |
Glenn L McGrath | 2e41d0c | 2002-09-27 06:46:02 +0000 | [diff] [blame] | 126 | inbuf [0] = xread_char(fd_in); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 127 | |
| 128 | maxbits = inbuf[0] & BIT_MASK; |
| 129 | block_mode = inbuf[0] & BLOCK_MODE; |
| 130 | maxmaxcode = MAXCODE(maxbits); |
| 131 | |
| 132 | if (maxbits > BITS) |
| 133 | { |
| 134 | fprintf(stderr, "compressed with %d bits, can only handle %d bits\n", maxbits, BITS); |
| 135 | return -1; |
| 136 | } |
| 137 | |
| 138 | //fprintf(stderr, "Bits: %d, block_mode: %d\n", maxbits, block_mode ); |
| 139 | |
| 140 | maxcode = MAXCODE(n_bits = INIT_BITS)-1; |
| 141 | bitmask = (1<<n_bits)-1; |
| 142 | oldcode = -1; |
| 143 | finchar = 0; |
| 144 | outpos = 0; |
| 145 | posbits = 0<<3; |
| 146 | |
| 147 | free_ent = ((block_mode) ? FIRST : 256); |
| 148 | |
| 149 | clear_tab_prefixof(); /* As above, initialize the first |
| 150 | 256 entries in the table. */ |
| 151 | |
| 152 | for (code = 255 ; code >= 0 ; --code) |
| 153 | tab_suffixof(code) = (char_type)code; |
| 154 | |
| 155 | do |
| 156 | { |
| 157 | resetbuf: ; |
| 158 | { |
| 159 | int i; |
| 160 | int e; |
| 161 | int o; |
| 162 | |
| 163 | e = insize-(o = (posbits>>3)); |
| 164 | |
| 165 | for (i = 0 ; i < e ; ++i) |
| 166 | inbuf[i] = inbuf[i+o]; |
| 167 | |
| 168 | insize = e; |
| 169 | posbits = 0; |
| 170 | } |
| 171 | |
| 172 | if (insize < (int) sizeof(inbuf)-IBUFSIZ) |
| 173 | { |
Glenn L McGrath | d6aec86 | 2002-11-01 22:28:13 +0000 | [diff] [blame] | 174 | rsize = read(fd_in, inbuf+insize, IBUFSIZ); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 175 | insize += rsize; |
| 176 | } |
| 177 | |
| 178 | inbits = ((rsize > 0) ? (insize - insize%n_bits)<<3 : |
| 179 | (insize<<3)-(n_bits-1)); |
| 180 | |
| 181 | while (inbits > posbits) |
| 182 | { |
| 183 | if (free_ent > maxcode) |
| 184 | { |
| 185 | posbits = ((posbits-1) + ((n_bits<<3) - |
| 186 | (posbits-1+(n_bits<<3))%(n_bits<<3))); |
| 187 | |
| 188 | ++n_bits; |
| 189 | if (n_bits == maxbits) |
| 190 | maxcode = maxmaxcode; |
| 191 | else |
| 192 | maxcode = MAXCODE(n_bits)-1; |
| 193 | |
| 194 | bitmask = (1<<n_bits)-1; |
| 195 | goto resetbuf; |
| 196 | } |
| 197 | |
| 198 | |
| 199 | { |
| 200 | char_type *p = &inbuf[posbits>>3]; |
| 201 | code = ((((long)(p[0]))|((long)(p[1])<<8)|((long)(p[2])<<16))>>(posbits&0x7))&bitmask; |
| 202 | } |
| 203 | posbits += n_bits; |
| 204 | |
| 205 | |
| 206 | if (oldcode == -1) |
| 207 | { |
| 208 | outbuf[outpos++] = (char_type)(finchar = (int)(oldcode = code)); |
| 209 | continue; |
| 210 | } |
| 211 | |
| 212 | if (code == CLEAR && block_mode) |
| 213 | { |
| 214 | clear_tab_prefixof(); |
| 215 | free_ent = FIRST - 1; |
| 216 | posbits = ((posbits-1) + ((n_bits<<3) - |
| 217 | (posbits-1+(n_bits<<3))%(n_bits<<3))); |
| 218 | maxcode = MAXCODE(n_bits = INIT_BITS)-1; |
| 219 | bitmask = (1<<n_bits)-1; |
| 220 | goto resetbuf; |
| 221 | } |
| 222 | |
| 223 | incode = code; |
| 224 | stackp = de_stack; |
| 225 | |
| 226 | if (code >= free_ent) /* Special case for KwKwK string. */ |
| 227 | { |
| 228 | if (code > free_ent) |
| 229 | { |
| 230 | char_type *p; |
| 231 | |
| 232 | posbits -= n_bits; |
| 233 | p = &inbuf[posbits>>3]; |
| 234 | |
| 235 | fprintf(stderr, "insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)\n", insize, posbits, |
| 236 | p[-1],p[0],p[1],p[2],p[3], (posbits&07)); |
| 237 | fprintf(stderr, "uncompress: corrupt input\n"); |
| 238 | return -1; |
| 239 | } |
| 240 | |
| 241 | *--stackp = (char_type)finchar; |
| 242 | code = oldcode; |
| 243 | } |
| 244 | |
| 245 | while ((cmp_code_int)code >= (cmp_code_int)256) |
| 246 | { /* Generate output characters in reverse order */ |
| 247 | *--stackp = tab_suffixof(code); |
| 248 | code = tab_prefixof(code); |
| 249 | } |
| 250 | |
| 251 | *--stackp = (char_type)(finchar = tab_suffixof(code)); |
| 252 | |
| 253 | /* And put them out in forward order */ |
| 254 | |
| 255 | { |
| 256 | int i; |
| 257 | |
| 258 | if (outpos+(i = (de_stack-stackp)) >= OBUFSIZ) |
| 259 | { |
| 260 | do |
| 261 | { |
| 262 | if (i > OBUFSIZ-outpos) i = OBUFSIZ-outpos; |
| 263 | |
| 264 | if (i > 0) |
| 265 | { |
| 266 | memcpy(outbuf+outpos, stackp, i); |
| 267 | outpos += i; |
| 268 | } |
| 269 | |
| 270 | if (outpos >= OBUFSIZ) |
| 271 | { |
Glenn L McGrath | 2e41d0c | 2002-09-27 06:46:02 +0000 | [diff] [blame] | 272 | write(fd_out, outbuf, outpos); |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 273 | outpos = 0; |
| 274 | } |
| 275 | stackp+= i; |
| 276 | } |
| 277 | while ((i = (de_stack-stackp)) > 0); |
| 278 | } |
| 279 | else |
| 280 | { |
| 281 | memcpy(outbuf+outpos, stackp, i); |
| 282 | outpos += i; |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | if ((code = free_ent) < maxmaxcode) /* Generate the new entry. */ |
| 287 | { |
| 288 | tab_prefixof(code) = (unsigned short)oldcode; |
| 289 | tab_suffixof(code) = (char_type)finchar; |
| 290 | free_ent = code+1; |
| 291 | } |
| 292 | |
| 293 | oldcode = incode; /* Remember previous code. */ |
| 294 | } |
| 295 | |
| 296 | } |
| 297 | while (rsize > 0); |
| 298 | |
Glenn L McGrath | 2e41d0c | 2002-09-27 06:46:02 +0000 | [diff] [blame] | 299 | if (outpos > 0) { |
| 300 | write(fd_out, outbuf, outpos); |
| 301 | } |
Robert Griebl | 94a6a95 | 2002-05-19 19:00:14 +0000 | [diff] [blame] | 302 | |
| 303 | return 0; |
| 304 | } |
| 305 | |
| 306 | |
| 307 | #endif |