blob: 890141b7c56004e5587513dbdacc6c5359f08523 [file] [log] [blame]
Denys Vlasenko602ce692010-05-30 03:35:18 +02001/*
2 * LZMA2 decoder
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#include "xz_private.h"
12#include "xz_lzma2.h"
13
14/*
15 * Range decoder initialization eats the first five bytes of each LZMA chunk.
16 */
17#define RC_INIT_BYTES 5
18
19/*
20 * Minimum number of usable input buffer to safely decode one LZMA symbol.
21 * The worst case is that we decode 22 bits using probabilities and 26
22 * direct bits. This may decode at maximum of 20 bytes of input. However,
23 * lzma_main() does an extra normalization before returning, thus we
24 * need to put 21 here.
25 */
26#define LZMA_IN_REQUIRED 21
27
28/*
29 * Dictionary (history buffer)
30 *
31 * These are always true:
32 * start <= pos <= full <= end
33 * pos <= limit <= end
34 *
35 * In multi-call mode, also these are true:
36 * end == size
37 * size <= allocated
38 *
39 * Most of these variables are size_t to support single-call mode,
40 * in which the dictionary variables address the actual output
41 * buffer directly.
42 */
43struct dictionary {
44 /* Beginning of the history buffer */
45 uint8_t *buf;
46
47 /* Old position in buf (before decoding more data) */
48 size_t start;
49
50 /* Position in buf */
51 size_t pos;
52
53 /*
54 * How full dictionary is. This is used to detect corrupt input that
55 * would read beyond the beginning of the uncompressed stream.
56 */
57 size_t full;
58
59 /* Write limit; we don't write to buf[limit] or later bytes. */
60 size_t limit;
61
62 /*
63 * End of the dictionary buffer. In multi-call mode, this is
64 * the same as the dictionary size. In single-call mode, this
65 * indicates the size of the output buffer.
66 */
67 size_t end;
68
69 /*
70 * Size of the dictionary as specified in Block Header. This is used
71 * together with "full" to detect corrupt input that would make us
72 * read beyond the beginning of the uncompressed stream.
73 */
74 uint32_t size;
75
76 /*
77 * Amount of memory allocated for the dictionary. A special
78 * value of zero indicates that we are in single-call mode,
79 * where the output buffer works as the dictionary.
80 */
81 uint32_t allocated;
82};
83
84/* Range decoder */
85struct rc_dec {
86 uint32_t range;
87 uint32_t code;
88
89 /*
90 * Number of initializing bytes remaining to be read
91 * by rc_read_init().
92 */
93 uint32_t init_bytes_left;
94
95 /*
96 * Buffer from which we read our input. It can be either
97 * temp.buf or the caller-provided input buffer.
98 */
99 const uint8_t *in;
100 size_t in_pos;
101 size_t in_limit;
102};
103
104/* Probabilities for a length decoder. */
105struct lzma_len_dec {
106 /* Probability of match length being at least 10 */
107 uint16_t choice;
108
109 /* Probability of match length being at least 18 */
110 uint16_t choice2;
111
112 /* Probabilities for match lengths 2-9 */
113 uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
114
115 /* Probabilities for match lengths 10-17 */
116 uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
117
118 /* Probabilities for match lengths 18-273 */
119 uint16_t high[LEN_HIGH_SYMBOLS];
120};
121
122struct lzma_dec {
123 /*
124 * LZMA properties or related bit masks (number of literal
125 * context bits, a mask dervied from the number of literal
126 * position bits, and a mask dervied from the number
127 * position bits)
128 */
129 uint32_t lc;
130 uint32_t literal_pos_mask; /* (1 << lp) - 1 */
131 uint32_t pos_mask; /* (1 << pb) - 1 */
132
133 /* Types of the most recently seen LZMA symbols */
134 enum lzma_state state;
135
136 /* Distances of latest four matches */
137 uint32_t rep0;
138 uint32_t rep1;
139 uint32_t rep2;
140 uint32_t rep3;
141
142 /*
143 * Length of a match. This is updated so that dict_repeat can
144 * be called again to finish repeating the whole match.
145 */
146 uint32_t len;
147
148 /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
149 uint16_t is_match[STATES][POS_STATES_MAX];
150
151 /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
152 uint16_t is_rep[STATES];
153
154 /*
155 * If 0, distance of a repeated match is rep0.
156 * Otherwise check is_rep1.
157 */
158 uint16_t is_rep0[STATES];
159
160 /*
161 * If 0, distance of a repeated match is rep1.
162 * Otherwise check is_rep2.
163 */
164 uint16_t is_rep1[STATES];
165
166 /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
167 uint16_t is_rep2[STATES];
168
169 /*
170 * If 1, the repeated match has length of one byte. Otherwise
171 * the length is decoded from rep_len_decoder.
172 */
173 uint16_t is_rep0_long[STATES][POS_STATES_MAX];
174
175 /*
176 * Probability tree for the highest two bits of the match
177 * distance. There is a separate probability tree for match
178 * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
179 */
180 uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
181
182 /*
183 * Probility trees for additional bits for match distance
184 * when the distance is in the range [4, 127].
185 */
186 uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
187
188 /*
189 * Probability tree for the lowest four bits of a match
190 * distance that is equal to or greater than 128.
191 */
192 uint16_t dist_align[ALIGN_SIZE];
193
194 /* Length of a normal match */
195 struct lzma_len_dec match_len_dec;
196
197 /* Length of a repeated match */
198 struct lzma_len_dec rep_len_dec;
199
200 /* Probabilities of literals */
201 uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
202};
203
204struct xz_dec_lzma2 {
205 /* LZMA2 */
206 struct {
207 /* Position in xz_dec_lzma2_run(). */
208 enum lzma2_seq {
209 SEQ_CONTROL,
210 SEQ_UNCOMPRESSED_1,
211 SEQ_UNCOMPRESSED_2,
212 SEQ_COMPRESSED_0,
213 SEQ_COMPRESSED_1,
214 SEQ_PROPERTIES,
215 SEQ_LZMA_PREPARE,
216 SEQ_LZMA_RUN,
217 SEQ_COPY
218 } sequence;
219
220 /*
221 * Next position after decoding the compressed size of
222 * the chunk.
223 */
224 enum lzma2_seq next_sequence;
225
226 /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
227 uint32_t uncompressed;
228
229 /*
230 * Compressed size of LZMA chunk or compressed/uncompressed
231 * size of uncompressed chunk (64 KiB at maximum)
232 */
233 uint32_t compressed;
234
235 /*
236 * True if dictionary reset is needed. This is false before
237 * the first chunk (LZMA or uncompressed).
238 */
239 bool need_dict_reset;
240
241 /*
242 * True if new LZMA properties are needed. This is false
243 * before the first LZMA chunk.
244 */
245 bool need_props;
246 } lzma2;
247
248 /*
249 * Temporary buffer which holds small number of input bytes between
250 * decoder calls. See lzma2_lzma() for details.
251 */
252 struct {
253 uint32_t size;
254 uint8_t buf[3 * LZMA_IN_REQUIRED];
255 } temp;
256
257 struct dictionary dict;
258 struct rc_dec rc;
259 struct lzma_dec lzma;
260};
261
262/**************
263 * Dictionary *
264 **************/
265
266/*
267 * Reset the dictionary state. When in single-call mode, set up the beginning
268 * of the dictionary to point to the actual output buffer.
269 */
270static void XZ_FUNC dict_reset(struct dictionary *dict, struct xz_buf *b)
271{
272 if (dict->allocated == 0) {
273 dict->buf = b->out + b->out_pos;
274 dict->end = b->out_size - b->out_pos;
275 }
276
277 dict->start = 0;
278 dict->pos = 0;
279 dict->limit = 0;
280 dict->full = 0;
281}
282
283/* Set dictionary write limit */
284static void XZ_FUNC dict_limit(struct dictionary *dict, size_t out_max)
285{
286 if (dict->end - dict->pos <= out_max)
287 dict->limit = dict->end;
288 else
289 dict->limit = dict->pos + out_max;
290}
291
292/* Return true if at least one byte can be written into the dictionary. */
293static __always_inline bool XZ_FUNC dict_has_space(const struct dictionary *dict)
294{
295 return dict->pos < dict->limit;
296}
297
298/*
299 * Get a byte from the dictionary at the given distance. The distance is
300 * assumed to valid, or as a special case, zero when the dictionary is
301 * still empty. This special case is needed for single-call decoding to
302 * avoid writing a '\0' to the end of the destination buffer.
303 */
304static __always_inline uint32_t XZ_FUNC dict_get(
305 const struct dictionary *dict, uint32_t dist)
306{
307 size_t offset = dict->pos - dist - 1;
308
309 if (dist >= dict->pos)
310 offset += dict->end;
311
312 return dict->full > 0 ? dict->buf[offset] : 0;
313}
314
315/*
316 * Put one byte into the dictionary. It is assumed that there is space for it.
317 */
318static inline void XZ_FUNC dict_put(struct dictionary *dict, uint8_t byte)
319{
320 dict->buf[dict->pos++] = byte;
321
322 if (dict->full < dict->pos)
323 dict->full = dict->pos;
324}
325
326/*
327 * Repeat given number of bytes from the given distance. If the distance is
328 * invalid, false is returned. On success, true is returned and *len is
329 * updated to indicate how many bytes were left to be repeated.
330 */
331static bool XZ_FUNC dict_repeat(
332 struct dictionary *dict, uint32_t *len, uint32_t dist)
333{
334 size_t back;
335 uint32_t left;
336
337 if (dist >= dict->full || dist >= dict->size)
338 return false;
339
340 left = min_t(size_t, dict->limit - dict->pos, *len);
341 *len -= left;
342
343 back = dict->pos - dist - 1;
344 if (dist >= dict->pos)
345 back += dict->end;
346
347 do {
348 dict->buf[dict->pos++] = dict->buf[back++];
349 if (back == dict->end)
350 back = 0;
351 } while (--left > 0);
352
353 if (dict->full < dict->pos)
354 dict->full = dict->pos;
355
356 return true;
357}
358
359/* Copy uncompressed data as is from input to dictionary and output buffers. */
360static void XZ_FUNC dict_uncompressed(
361 struct dictionary *dict, struct xz_buf *b, uint32_t *left)
362{
363 size_t copy_size;
364
365 while (*left > 0 && b->in_pos < b->in_size
366 && b->out_pos < b->out_size) {
367 copy_size = min(b->in_size - b->in_pos,
368 b->out_size - b->out_pos);
369 if (copy_size > dict->end - dict->pos)
370 copy_size = dict->end - dict->pos;
371 if (copy_size > *left)
372 copy_size = *left;
373
374 *left -= copy_size;
375
376 memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
377 dict->pos += copy_size;
378
379 if (dict->full < dict->pos)
380 dict->full = dict->pos;
381
382 if (dict->allocated != 0) {
383 if (dict->pos == dict->end)
384 dict->pos = 0;
385
386 memcpy(b->out + b->out_pos, b->in + b->in_pos,
387 copy_size);
388 }
389
390 dict->start = dict->pos;
391
392 b->out_pos += copy_size;
393 b->in_pos += copy_size;
394
395 }
396}
397
398/*
399 * Flush pending data from dictionary to b->out. It is assumed that there is
400 * enough space in b->out. This is guaranteed because caller uses dict_limit()
401 * before decoding data into the dictionary.
402 */
403static uint32_t XZ_FUNC dict_flush(struct dictionary *dict, struct xz_buf *b)
404{
405 size_t copy_size = dict->pos - dict->start;
406
407 if (dict->allocated != 0) {
408 if (dict->pos == dict->end)
409 dict->pos = 0;
410
411 memcpy(b->out + b->out_pos, dict->buf + dict->start,
412 copy_size);
413 }
414
415 dict->start = dict->pos;
416 b->out_pos += copy_size;
417 return copy_size;
418}
419
420/*****************
421 * Range decoder *
422 *****************/
423
424/* Reset the range decoder. */
425static __always_inline void XZ_FUNC rc_reset(struct rc_dec *rc)
426{
427 rc->range = (uint32_t)-1;
428 rc->code = 0;
429 rc->init_bytes_left = RC_INIT_BYTES;
430}
431
432/*
433 * Read the first five initial bytes into rc->code if they haven't been
434 * read already. (Yes, the first byte gets completely ignored.)
435 */
436static bool XZ_FUNC rc_read_init(struct rc_dec *rc, struct xz_buf *b)
437{
438 while (rc->init_bytes_left > 0) {
439 if (b->in_pos == b->in_size)
440 return false;
441
442 rc->code = (rc->code << 8) + b->in[b->in_pos++];
443 --rc->init_bytes_left;
444 }
445
446 return true;
447}
448
449/* Return true if there may not be enough input for the next decoding loop. */
450static inline bool XZ_FUNC rc_limit_exceeded(const struct rc_dec *rc)
451{
452 return rc->in_pos > rc->in_limit;
453}
454
455/*
456 * Return true if it is possible (from point of view of range decoder) that
457 * we have reached the end of the LZMA chunk.
458 */
459static inline bool XZ_FUNC rc_is_finished(const struct rc_dec *rc)
460{
461 return rc->code == 0;
462}
463
464/* Read the next input byte if needed. */
465static __always_inline void XZ_FUNC rc_normalize(struct rc_dec *rc)
466{
467 if (rc->range < RC_TOP_VALUE) {
468 rc->range <<= RC_SHIFT_BITS;
469 rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
470 }
471}
472
473/*
474 * Decode one bit. In some versions, this function has been splitted in three
475 * functions so that the compiler is supposed to be able to more easily avoid
476 * an extra branch. In this particular version of the LZMA decoder, this
477 * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
478 * on x86). Using a non-splitted version results in nicer looking code too.
479 *
480 * NOTE: This must return an int. Do not make it return a bool or the speed
481 * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
482 * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
483 */
484static __always_inline int XZ_FUNC rc_bit(struct rc_dec *rc, uint16_t *prob)
485{
486 uint32_t bound;
487 int bit;
488
489 rc_normalize(rc);
490 bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
491 if (rc->code < bound) {
492 rc->range = bound;
493 *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
494 bit = 0;
495 } else {
496 rc->range -= bound;
497 rc->code -= bound;
498 *prob -= *prob >> RC_MOVE_BITS;
499 bit = 1;
500 }
501
502 return bit;
503}
504
505/* Decode a bittree starting from the most significant bit. */
506static __always_inline uint32_t XZ_FUNC rc_bittree(
507 struct rc_dec *rc, uint16_t *probs, uint32_t limit)
508{
509 uint32_t symbol = 1;
510
511 do {
512 if (rc_bit(rc, &probs[symbol]))
513 symbol = (symbol << 1) + 1;
514 else
515 symbol <<= 1;
516 } while (symbol < limit);
517
518 return symbol;
519}
520
521/* Decode a bittree starting from the least significant bit. */
522static __always_inline void XZ_FUNC rc_bittree_reverse(struct rc_dec *rc,
523 uint16_t *probs, uint32_t *dest, uint32_t limit)
524{
525 uint32_t symbol = 1;
526 uint32_t i = 0;
527
528 do {
529 if (rc_bit(rc, &probs[symbol])) {
530 symbol = (symbol << 1) + 1;
531 *dest += 1 << i;
532 } else {
533 symbol <<= 1;
534 }
535 } while (++i < limit);
536}
537
538/* Decode direct bits (fixed fifty-fifty probability) */
539static inline void XZ_FUNC rc_direct(
540 struct rc_dec *rc, uint32_t *dest, uint32_t limit)
541{
542 uint32_t mask;
543
544 do {
545 rc_normalize(rc);
546 rc->range >>= 1;
547 rc->code -= rc->range;
548 mask = (uint32_t)0 - (rc->code >> 31);
549 rc->code += rc->range & mask;
550 *dest = (*dest << 1) + (mask + 1);
551 } while (--limit > 0);
552}
553
554/********
555 * LZMA *
556 ********/
557
558/* Get pointer to literal coder probability array. */
559static uint16_t * XZ_FUNC lzma_literal_probs(struct xz_dec_lzma2 *s)
560{
561 uint32_t prev_byte = dict_get(&s->dict, 0);
562 uint32_t low = prev_byte >> (8 - s->lzma.lc);
563 uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
564 return s->lzma.literal[low + high];
565}
566
567/* Decode a literal (one 8-bit byte) */
568static void XZ_FUNC lzma_literal(struct xz_dec_lzma2 *s)
569{
570 uint16_t *probs;
571 uint32_t symbol;
572 uint32_t match_byte;
573 uint32_t match_bit;
574 uint32_t offset;
575 uint32_t i;
576
577 probs = lzma_literal_probs(s);
578
579 if (lzma_state_is_literal(s->lzma.state)) {
580 symbol = rc_bittree(&s->rc, probs, 0x100);
581 } else {
582 symbol = 1;
583 match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
584 offset = 0x100;
585
586 do {
587 match_bit = match_byte & offset;
588 match_byte <<= 1;
589 i = offset + match_bit + symbol;
590
591 if (rc_bit(&s->rc, &probs[i])) {
592 symbol = (symbol << 1) + 1;
593 offset &= match_bit;
594 } else {
595 symbol <<= 1;
596 offset &= ~match_bit;
597 }
598 } while (symbol < 0x100);
599 }
600
601 dict_put(&s->dict, (uint8_t)symbol);
602 lzma_state_literal(&s->lzma.state);
603}
604
605/* Decode the length of the match into s->lzma.len. */
606static void XZ_FUNC lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
607 uint32_t pos_state)
608{
609 uint16_t *probs;
610 uint32_t limit;
611
612 if (!rc_bit(&s->rc, &l->choice)) {
613 probs = l->low[pos_state];
614 limit = LEN_LOW_SYMBOLS;
615 s->lzma.len = MATCH_LEN_MIN;
616 } else {
617 if (!rc_bit(&s->rc, &l->choice2)) {
618 probs = l->mid[pos_state];
619 limit = LEN_MID_SYMBOLS;
620 s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
621 } else {
622 probs = l->high;
623 limit = LEN_HIGH_SYMBOLS;
624 s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
625 + LEN_MID_SYMBOLS;
626 }
627 }
628
629 s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
630}
631
632/* Decode a match. The distance will be stored in s->lzma.rep0. */
633static void XZ_FUNC lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
634{
635 uint16_t *probs;
636 uint32_t dist_slot;
637 uint32_t limit;
638
639 lzma_state_match(&s->lzma.state);
640
641 s->lzma.rep3 = s->lzma.rep2;
642 s->lzma.rep2 = s->lzma.rep1;
643 s->lzma.rep1 = s->lzma.rep0;
644
645 lzma_len(s, &s->lzma.match_len_dec, pos_state);
646
647 probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
648 dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
649
650 if (dist_slot < DIST_MODEL_START) {
651 s->lzma.rep0 = dist_slot;
652 } else {
653 limit = (dist_slot >> 1) - 1;
654 s->lzma.rep0 = 2 + (dist_slot & 1);
655
656 if (dist_slot < DIST_MODEL_END) {
657 s->lzma.rep0 <<= limit;
658 probs = s->lzma.dist_special + s->lzma.rep0
659 - dist_slot - 1;
660 rc_bittree_reverse(&s->rc, probs,
661 &s->lzma.rep0, limit);
662 } else {
663 rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
664 s->lzma.rep0 <<= ALIGN_BITS;
665 rc_bittree_reverse(&s->rc, s->lzma.dist_align,
666 &s->lzma.rep0, ALIGN_BITS);
667 }
668 }
669}
670
671/*
672 * Decode a repeated match. The distance is one of the four most recently
673 * seen matches. The distance will be stored in s->lzma.rep0.
674 */
675static void XZ_FUNC lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
676{
677 uint32_t tmp;
678
679 if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
680 if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
681 s->lzma.state][pos_state])) {
682 lzma_state_short_rep(&s->lzma.state);
683 s->lzma.len = 1;
684 return;
685 }
686 } else {
687 if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
688 tmp = s->lzma.rep1;
689 } else {
690 if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
691 tmp = s->lzma.rep2;
692 } else {
693 tmp = s->lzma.rep3;
694 s->lzma.rep3 = s->lzma.rep2;
695 }
696
697 s->lzma.rep2 = s->lzma.rep1;
698 }
699
700 s->lzma.rep1 = s->lzma.rep0;
701 s->lzma.rep0 = tmp;
702 }
703
704 lzma_state_long_rep(&s->lzma.state);
705 lzma_len(s, &s->lzma.rep_len_dec, pos_state);
706}
707
708/* LZMA decoder core */
709static bool XZ_FUNC lzma_main(struct xz_dec_lzma2 *s)
710{
711 uint32_t pos_state;
712
713 /*
714 * If the dictionary was reached during the previous call, try to
715 * finish the possibly pending repeat in the dictionary.
716 */
717 if (dict_has_space(&s->dict) && s->lzma.len > 0)
718 dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
719
720 /*
721 * Decode more LZMA symbols. One iteration may consume up to
722 * LZMA_IN_REQUIRED - 1 bytes.
723 */
724 while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
725 pos_state = s->dict.pos & s->lzma.pos_mask;
726
727 if (!rc_bit(&s->rc, &s->lzma.is_match[
728 s->lzma.state][pos_state])) {
729 lzma_literal(s);
730 } else {
731 if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
732 lzma_rep_match(s, pos_state);
733 else
734 lzma_match(s, pos_state);
735
736 if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
737 return false;
738 }
739 }
740
741 /*
742 * Having the range decoder always normalized when we are outside
743 * this function makes it easier to correctly handle end of the chunk.
744 */
745 rc_normalize(&s->rc);
746
747 return true;
748}
749
750/*
751 * Reset the LZMA decoder and range decoder state. Dictionary is nore reset
752 * here, because LZMA state may be reset without resetting the dictionary.
753 */
754static void XZ_FUNC lzma_reset(struct xz_dec_lzma2 *s)
755{
756 uint16_t *probs;
757 size_t i;
758
759 s->lzma.state = STATE_LIT_LIT;
760 s->lzma.rep0 = 0;
761 s->lzma.rep1 = 0;
762 s->lzma.rep2 = 0;
763 s->lzma.rep3 = 0;
764
765 /*
766 * All probabilities are initialized to the same value. This hack
767 * makes the code smaller by avoiding a separate loop for each
768 * probability array.
769 *
770 * This could be optimized so that only that part of literal
771 * probabilities that are actually required. In the common case
772 * we would write 12 KiB less.
773 */
774 probs = s->lzma.is_match[0];
775 for (i = 0; i < PROBS_TOTAL; ++i)
776 probs[i] = RC_BIT_MODEL_TOTAL / 2;
777
778 rc_reset(&s->rc);
779}
780
781/*
782 * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
783 * from the decoded lp and pb values. On success, the LZMA decoder state is
784 * reset and true is returned.
785 */
786static bool XZ_FUNC lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
787{
788 if (props > (4 * 5 + 4) * 9 + 8)
789 return false;
790
791 s->lzma.pos_mask = 0;
792 while (props >= 9 * 5) {
793 props -= 9 * 5;
794 ++s->lzma.pos_mask;
795 }
796
797 s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
798
799 s->lzma.literal_pos_mask = 0;
800 while (props >= 9) {
801 props -= 9;
802 ++s->lzma.literal_pos_mask;
803 }
804
805 s->lzma.lc = props;
806
807 if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
808 return false;
809
810 s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
811
812 lzma_reset(s);
813
814 return true;
815}
816
817/*********
818 * LZMA2 *
819 *********/
820
821/*
822 * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
823 * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
824 * wrapper function takes care of making the LZMA decoder's assumption safe.
825 *
826 * As long as there is plenty of input left to be decoded in the current LZMA
827 * chunk, we decode directly from the caller-supplied input buffer until
828 * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
829 * s->temp.buf, which (hopefully) gets filled on the next call to this
830 * function. We decode a few bytes from the temporary buffer so that we can
831 * continue decoding from the caller-supplied input buffer again.
832 */
833static bool XZ_FUNC lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
834{
835 size_t in_avail;
836 uint32_t tmp;
837
838 in_avail = b->in_size - b->in_pos;
839 if (s->temp.size > 0 || s->lzma2.compressed == 0) {
840 tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
841 if (tmp > s->lzma2.compressed - s->temp.size)
842 tmp = s->lzma2.compressed - s->temp.size;
843 if (tmp > in_avail)
844 tmp = in_avail;
845
846 memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
847
848 if (s->temp.size + tmp == s->lzma2.compressed) {
849 memzero(s->temp.buf + s->temp.size + tmp,
850 sizeof(s->temp.buf)
851 - s->temp.size - tmp);
852 s->rc.in_limit = s->temp.size + tmp;
853 } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
854 s->temp.size += tmp;
855 b->in_pos += tmp;
856 return true;
857 } else {
858 s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
859 }
860
861 s->rc.in = s->temp.buf;
862 s->rc.in_pos = 0;
863
864 if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
865 return false;
866
867 s->lzma2.compressed -= s->rc.in_pos;
868
869 if (s->rc.in_pos < s->temp.size) {
870 s->temp.size -= s->rc.in_pos;
871 memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
872 s->temp.size);
873 return true;
874 }
875
876 b->in_pos += s->rc.in_pos - s->temp.size;
877 s->temp.size = 0;
878 }
879
880 in_avail = b->in_size - b->in_pos;
881 if (in_avail >= LZMA_IN_REQUIRED) {
882 s->rc.in = b->in;
883 s->rc.in_pos = b->in_pos;
884
885 if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
886 s->rc.in_limit = b->in_pos + s->lzma2.compressed;
887 else
888 s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
889
890 if (!lzma_main(s))
891 return false;
892
893 in_avail = s->rc.in_pos - b->in_pos;
894 if (in_avail > s->lzma2.compressed)
895 return false;
896
897 s->lzma2.compressed -= in_avail;
898 b->in_pos = s->rc.in_pos;
899 }
900
901 in_avail = b->in_size - b->in_pos;
902 if (in_avail < LZMA_IN_REQUIRED) {
903 if (in_avail > s->lzma2.compressed)
904 in_avail = s->lzma2.compressed;
905
906 memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
907 s->temp.size = in_avail;
908 b->in_pos += in_avail;
909 }
910
911 return true;
912}
913
914/*
915 * Take care of the LZMA2 control layer, and forward the job of actual LZMA
916 * decoding or copying of uncompressed chunks to other functions.
917 */
918XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_run(
919 struct xz_dec_lzma2 *s, struct xz_buf *b)
920{
921 uint32_t tmp;
922
923 while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
924 switch (s->lzma2.sequence) {
925 case SEQ_CONTROL:
926 /*
927 * LZMA2 control byte
928 *
929 * Exact values:
930 * 0x00 End marker
931 * 0x01 Dictionary reset followed by
932 * an uncompressed chunk
933 * 0x02 Uncompressed chunk (no dictionary reset)
934 *
935 * Highest three bits (s->control & 0xE0):
936 * 0xE0 Dictionary reset, new properties and state
937 * reset, followed by LZMA compressed chunk
938 * 0xC0 New properties and state reset, followed
939 * by LZMA compressed chunk (no dictionary
940 * reset)
941 * 0xA0 State reset using old properties,
942 * followed by LZMA compressed chunk (no
943 * dictionary reset)
944 * 0x80 LZMA chunk (no dictionary or state reset)
945 *
946 * For LZMA compressed chunks, the lowest five bits
947 * (s->control & 1F) are the highest bits of the
948 * uncompressed size (bits 16-20).
949 *
950 * A new LZMA2 stream must begin with a dictionary
951 * reset. The first LZMA chunk must set new
952 * properties and reset the LZMA state.
953 *
954 * Values that don't match anything described above
955 * are invalid and we return XZ_DATA_ERROR.
956 */
957 tmp = b->in[b->in_pos++];
958
959 if (tmp >= 0xE0 || tmp == 0x01) {
960 s->lzma2.need_props = true;
961 s->lzma2.need_dict_reset = false;
962 dict_reset(&s->dict, b);
963 } else if (s->lzma2.need_dict_reset) {
964 return XZ_DATA_ERROR;
965 }
966
967 if (tmp >= 0x80) {
968 s->lzma2.uncompressed = (tmp & 0x1F) << 16;
969 s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
970
971 if (tmp >= 0xC0) {
972 /*
973 * When there are new properties,
974 * state reset is done at
975 * SEQ_PROPERTIES.
976 */
977 s->lzma2.need_props = false;
978 s->lzma2.next_sequence
979 = SEQ_PROPERTIES;
980
981 } else if (s->lzma2.need_props) {
982 return XZ_DATA_ERROR;
983
984 } else {
985 s->lzma2.next_sequence
986 = SEQ_LZMA_PREPARE;
987 if (tmp >= 0xA0)
988 lzma_reset(s);
989 }
990 } else {
991 if (tmp == 0x00)
992 return XZ_STREAM_END;
993
994 if (tmp > 0x02)
995 return XZ_DATA_ERROR;
996
997 s->lzma2.sequence = SEQ_COMPRESSED_0;
998 s->lzma2.next_sequence = SEQ_COPY;
999 }
1000
1001 break;
1002
1003 case SEQ_UNCOMPRESSED_1:
1004 s->lzma2.uncompressed
1005 += (uint32_t)b->in[b->in_pos++] << 8;
1006 s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
1007 break;
1008
1009 case SEQ_UNCOMPRESSED_2:
1010 s->lzma2.uncompressed
1011 += (uint32_t)b->in[b->in_pos++] + 1;
1012 s->lzma2.sequence = SEQ_COMPRESSED_0;
1013 break;
1014
1015 case SEQ_COMPRESSED_0:
1016 s->lzma2.compressed
1017 = (uint32_t)b->in[b->in_pos++] << 8;
1018 s->lzma2.sequence = SEQ_COMPRESSED_1;
1019 break;
1020
1021 case SEQ_COMPRESSED_1:
1022 s->lzma2.compressed
1023 += (uint32_t)b->in[b->in_pos++] + 1;
1024 s->lzma2.sequence = s->lzma2.next_sequence;
1025 break;
1026
1027 case SEQ_PROPERTIES:
1028 if (!lzma_props(s, b->in[b->in_pos++]))
1029 return XZ_DATA_ERROR;
1030
1031 s->lzma2.sequence = SEQ_LZMA_PREPARE;
1032
1033 case SEQ_LZMA_PREPARE:
1034 if (s->lzma2.compressed < RC_INIT_BYTES)
1035 return XZ_DATA_ERROR;
1036
1037 if (!rc_read_init(&s->rc, b))
1038 return XZ_OK;
1039
1040 s->lzma2.compressed -= RC_INIT_BYTES;
1041 s->lzma2.sequence = SEQ_LZMA_RUN;
1042
1043 case SEQ_LZMA_RUN:
1044 /*
1045 * Set dictionary limit to indicate how much we want
1046 * to be encoded at maximum. Decode new data into the
1047 * dictionary. Flush the new data from dictionary to
1048 * b->out. Check if we finished decoding this chunk.
1049 * In case the dictionary got full but we didn't fill
1050 * the output buffer yet, we may run this loop
1051 * multiple times without changing s->lzma2.sequence.
1052 */
1053 dict_limit(&s->dict, min_t(size_t,
1054 b->out_size - b->out_pos,
1055 s->lzma2.uncompressed));
1056 if (!lzma2_lzma(s, b))
1057 return XZ_DATA_ERROR;
1058
1059 s->lzma2.uncompressed -= dict_flush(&s->dict, b);
1060
1061 if (s->lzma2.uncompressed == 0) {
1062 if (s->lzma2.compressed > 0 || s->lzma.len > 0
1063 || !rc_is_finished(&s->rc))
1064 return XZ_DATA_ERROR;
1065
1066 rc_reset(&s->rc);
1067 s->lzma2.sequence = SEQ_CONTROL;
1068
1069 } else if (b->out_pos == b->out_size
1070 || (b->in_pos == b->in_size
1071 && s->temp.size
1072 < s->lzma2.compressed)) {
1073 return XZ_OK;
1074 }
1075
1076 break;
1077
1078 case SEQ_COPY:
1079 dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
1080 if (s->lzma2.compressed > 0)
1081 return XZ_OK;
1082
1083 s->lzma2.sequence = SEQ_CONTROL;
1084 break;
1085 }
1086 }
1087
1088 return XZ_OK;
1089}
1090
1091XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create(uint32_t dict_max)
1092{
1093 struct xz_dec_lzma2 *s;
1094
1095 /* Maximum supported dictionary by this implementation is 3 GiB. */
1096 if (dict_max > ((uint32_t)3 << 30))
1097 return NULL;
1098
1099 s = kmalloc(sizeof(*s), GFP_KERNEL);
1100 if (s == NULL)
1101 return NULL;
1102
1103 if (dict_max > 0) {
1104 s->dict.buf = vmalloc(dict_max);
1105 if (s->dict.buf == NULL) {
1106 kfree(s);
1107 return NULL;
1108 }
1109 }
1110
1111 s->dict.allocated = dict_max;
1112
1113 return s;
1114}
1115
1116XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset(
1117 struct xz_dec_lzma2 *s, uint8_t props)
1118{
1119 /* This limits dictionary size to 3 GiB to keep parsing simpler. */
1120 if (props > 39) {
1121 XZ_DEBUG_MSG("props:%d", props);
1122 return XZ_OPTIONS_ERROR;
1123 }
1124
1125 s->dict.size = 2 + (props & 1);
1126 s->dict.size <<= (props >> 1) + 11;
1127
1128 if (s->dict.allocated > 0 && s->dict.allocated < s->dict.size) {
1129#ifdef XZ_REALLOC_DICT_BUF
1130 s->dict.buf = XZ_REALLOC_DICT_BUF(s->dict.buf, s->dict.size);
1131 if (!s->dict.buf)
1132 return XZ_MEMLIMIT_ERROR;
1133 s->dict.allocated = s->dict.size;
1134#else
1135 return XZ_MEMLIMIT_ERROR;
1136#endif
1137 }
1138
1139 s->dict.end = s->dict.size;
1140
1141 s->lzma.len = 0;
1142
1143 s->lzma2.sequence = SEQ_CONTROL;
1144 s->lzma2.need_dict_reset = true;
1145
1146 s->temp.size = 0;
1147
1148 return XZ_OK;
1149}
1150
1151XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
1152{
1153 if (s->dict.allocated > 0)
1154 vfree(s->dict.buf);
1155
1156 kfree(s);
1157}