unxz: update from XZ embedded git

function                                             old     new   delta
rc_reset                                               -      21     +21
unpack_xz_stream                                    2342    2357     +15
lzma_reset                                           102      64     -38
lzma_len                                             506     443     -63
xz_dec_lzma2_run                                    1438    1374     -64
xz_dec_reset                                          73       -     -73
lzma_main                                           2517    2183    -334
------------------------------------------------------------------------------
(add/remove: 1/1 grow/shrink: 1/4 up/down: 36/-572)          Total: -536 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
diff --git a/archival/libunarchive/unxz/xz_dec_lzma2.c b/archival/libunarchive/unxz/xz_dec_lzma2.c
index 37de6fc..da71cb4 100644
--- a/archival/libunarchive/unxz/xz_dec_lzma2.c
+++ b/archival/libunarchive/unxz/xz_dec_lzma2.c
@@ -34,7 +34,8 @@
  *
  * In multi-call mode, also these are true:
  *    end == size
- *    size <= allocated
+ *    size <= size_max
+ *    allocated <= size
  *
  * Most of these variables are size_t to support single-call mode,
  * in which the dictionary variables address the actual output
@@ -74,11 +75,20 @@
 	uint32_t size;
 
 	/*
-	 * Amount of memory allocated for the dictionary. A special
-	 * value of zero indicates that we are in single-call mode,
-	 * where the output buffer works as the dictionary.
+	 * Maximum allowed dictionary size in multi-call mode.
+	 * This is ignored in single-call mode.
+	 */
+	uint32_t size_max;
+
+	/*
+	 * Amount of memory currently allocated for the dictionary.
+	 * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC,
+	 * size_max is always the same as the allocated size.)
 	 */
 	uint32_t allocated;
+
+	/* Operation mode */
+	enum xz_mode mode;
 };
 
 /* Range decoder */
@@ -120,6 +130,21 @@
 };
 
 struct lzma_dec {
+	/* Distances of latest four matches */
+	uint32_t rep0;
+	uint32_t rep1;
+	uint32_t rep2;
+	uint32_t rep3;
+
+	/* Types of the most recently seen LZMA symbols */
+	enum lzma_state state;
+
+	/*
+	 * Length of a match. This is updated so that dict_repeat can
+	 * be called again to finish repeating the whole match.
+	 */
+	uint32_t len;
+
 	/*
 	 * LZMA properties or related bit masks (number of literal
 	 * context bits, a mask dervied from the number of literal
@@ -130,21 +155,6 @@
 	uint32_t literal_pos_mask; /* (1 << lp) - 1 */
 	uint32_t pos_mask;         /* (1 << pb) - 1 */
 
-	/* Types of the most recently seen LZMA symbols */
-	enum lzma_state state;
-
-	/* Distances of latest four matches */
-	uint32_t rep0;
-	uint32_t rep1;
-	uint32_t rep2;
-	uint32_t rep3;
-
-	/*
-	 * Length of a match. This is updated so that dict_repeat can
-	 * be called again to finish repeating the whole match.
-	 */
-	uint32_t len;
-
 	/* If 1, it's a match. Otherwise it's a single 8-bit literal. */
 	uint16_t is_match[STATES][POS_STATES_MAX];
 
@@ -201,49 +211,59 @@
 	uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
 };
 
+struct lzma2_dec {
+	/* Position in xz_dec_lzma2_run(). */
+	enum lzma2_seq {
+		SEQ_CONTROL,
+		SEQ_UNCOMPRESSED_1,
+		SEQ_UNCOMPRESSED_2,
+		SEQ_COMPRESSED_0,
+		SEQ_COMPRESSED_1,
+		SEQ_PROPERTIES,
+		SEQ_LZMA_PREPARE,
+		SEQ_LZMA_RUN,
+		SEQ_COPY
+	} sequence;
+
+	/* Next position after decoding the compressed size of the chunk. */
+	enum lzma2_seq next_sequence;
+
+	/* Uncompressed size of LZMA chunk (2 MiB at maximum) */
+	uint32_t uncompressed;
+
+	/*
+	 * Compressed size of LZMA chunk or compressed/uncompressed
+	 * size of uncompressed chunk (64 KiB at maximum)
+	 */
+	uint32_t compressed;
+
+	/*
+	 * True if dictionary reset is needed. This is false before
+	 * the first chunk (LZMA or uncompressed).
+	 */
+	bool need_dict_reset;
+
+	/*
+	 * True if new LZMA properties are needed. This is false
+	 * before the first LZMA chunk.
+	 */
+	bool need_props;
+};
+
 struct xz_dec_lzma2 {
-	/* LZMA2 */
-	struct {
-		/* Position in xz_dec_lzma2_run(). */
-		enum lzma2_seq {
-			SEQ_CONTROL,
-			SEQ_UNCOMPRESSED_1,
-			SEQ_UNCOMPRESSED_2,
-			SEQ_COMPRESSED_0,
-			SEQ_COMPRESSED_1,
-			SEQ_PROPERTIES,
-			SEQ_LZMA_PREPARE,
-			SEQ_LZMA_RUN,
-			SEQ_COPY
-		} sequence;
-
-		/*
-		 * Next position after decoding the compressed size of
-		 * the chunk.
-		 */
-		enum lzma2_seq next_sequence;
-
-		/* Uncompressed size of LZMA chunk (2 MiB at maximum) */
-		uint32_t uncompressed;
-
-		/*
-		 * Compressed size of LZMA chunk or compressed/uncompressed
-		 * size of uncompressed chunk (64 KiB at maximum)
-		 */
-		uint32_t compressed;
-
-		/*
-		 * True if dictionary reset is needed. This is false before
-		 * the first chunk (LZMA or uncompressed).
-		 */
-		bool need_dict_reset;
-
-		/*
-		 * True if new LZMA properties are needed. This is false
-		 * before the first LZMA chunk.
-		 */
-		bool need_props;
-	} lzma2;
+	/*
+	 * The order below is important on x86 to reduce code size and
+	 * it shouldn't hurt on other platforms. Everything up to and
+	 * including lzma.pos_mask are in the first 128 bytes on x86-32,
+	 * which allows using smaller instructions to access those
+	 * variables. On x86-64, fewer variables fit into the first 128
+	 * bytes, but this is still the best order without sacrificing
+	 * the readability by splitting the structures.
+	 */
+	struct rc_dec rc;
+	struct dictionary dict;
+	struct lzma2_dec lzma2;
+	struct lzma_dec lzma;
 
 	/*
 	 * Temporary buffer which holds small number of input bytes between
@@ -253,10 +273,6 @@
 		uint32_t size;
 		uint8_t buf[3 * LZMA_IN_REQUIRED];
 	} temp;
-
-	struct dictionary dict;
-	struct rc_dec rc;
-	struct lzma_dec lzma;
 };
 
 /**************
@@ -269,7 +285,7 @@
  */
 static void XZ_FUNC dict_reset(struct dictionary *dict, struct xz_buf *b)
 {
-	if (dict->allocated == 0) {
+	if (DEC_IS_SINGLE(dict->mode)) {
 		dict->buf = b->out + b->out_pos;
 		dict->end = b->out_size - b->out_pos;
 	}
@@ -379,7 +395,7 @@
 		if (dict->full < dict->pos)
 			dict->full = dict->pos;
 
-		if (dict->allocated != 0) {
+		if (DEC_IS_MULTI(dict->mode)) {
 			if (dict->pos == dict->end)
 				dict->pos = 0;
 
@@ -404,7 +420,7 @@
 {
 	size_t copy_size = dict->pos - dict->start;
 
-	if (dict->allocated != 0) {
+	if (DEC_IS_MULTI(dict->mode)) {
 		if (dict->pos == dict->end)
 			dict->pos = 0;
 
@@ -422,7 +438,7 @@
  *****************/
 
 /* Reset the range decoder. */
-static __always_inline void XZ_FUNC rc_reset(struct rc_dec *rc)
+static void XZ_FUNC rc_reset(struct rc_dec *rc)
 {
 	rc->range = (uint32_t)-1;
 	rc->code = 0;
@@ -1088,28 +1104,27 @@
 	return XZ_OK;
 }
 
-XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create(uint32_t dict_max)
+XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create(
+		enum xz_mode mode, uint32_t dict_max)
 {
-	struct xz_dec_lzma2 *s;
-
-	/* Maximum supported dictionary by this implementation is 3 GiB. */
-	if (dict_max > ((uint32_t)3 << 30))
-		return NULL;
-
-	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (s == NULL)
 		return NULL;
 
-	if (dict_max > 0) {
+	s->dict.mode = mode;
+	s->dict.size_max = dict_max;
+
+	if (DEC_IS_PREALLOC(mode)) {
 		s->dict.buf = vmalloc(dict_max);
 		if (s->dict.buf == NULL) {
 			kfree(s);
 			return NULL;
 		}
+	} else if (DEC_IS_DYNALLOC(mode)) {
+		s->dict.buf = NULL;
+		s->dict.allocated = 0;
 	}
 
-	s->dict.allocated = dict_max;
-
 	return s;
 }
 
@@ -1123,18 +1138,23 @@
 	s->dict.size = 2 + (props & 1);
 	s->dict.size <<= (props >> 1) + 11;
 
-	if (s->dict.allocated > 0 && s->dict.allocated < s->dict.size) {
-#ifdef XZ_REALLOC_DICT_BUF
-		s->dict.buf = XZ_REALLOC_DICT_BUF(s->dict.buf, s->dict.size);
-			if (!s->dict.buf)
-				return XZ_MEMLIMIT_ERROR;
-		s->dict.allocated = s->dict.size;
-#else
-		return XZ_MEMLIMIT_ERROR;
-#endif
-	}
+	if (DEC_IS_MULTI(s->dict.mode)) {
+		if (s->dict.size > s->dict.size_max)
+			return XZ_MEMLIMIT_ERROR;
 
-	s->dict.end = s->dict.size;
+		s->dict.end = s->dict.size;
+
+		if (DEC_IS_DYNALLOC(s->dict.mode)) {
+			if (s->dict.allocated < s->dict.size) {
+				vfree(s->dict.buf);
+				s->dict.buf = vmalloc(s->dict.size);
+				if (s->dict.buf == NULL) {
+					s->dict.allocated = 0;
+					return XZ_MEM_ERROR;
+				}
+			}
+		}
+	}
 
 	s->lzma.len = 0;
 
@@ -1148,7 +1168,7 @@
 
 XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
 {
-	if (s->dict.allocated > 0)
+	if (DEC_IS_MULTI(s->dict.mode))
 		vfree(s->dict.buf);
 
 	kfree(s);