Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 1 | /* |
| 2 | * This file uses XZ Embedded library code which is written |
| 3 | * by Lasse Collin <lasse.collin@tukaani.org> |
| 4 | * and Igor Pavlov <http://7-zip.org/> |
| 5 | * |
Denys Vlasenko | 6948f21 | 2010-05-30 04:18:13 +0200 | [diff] [blame] | 6 | * See README file in unxz/ directory for more information. |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 7 | * |
| 8 | * This file is: |
| 9 | * Copyright (C) 2010 Denys Vlasenko <vda.linux@googlemail.com> |
Denys Vlasenko | 0ef64bd | 2010-08-16 20:14:46 +0200 | [diff] [blame] | 10 | * Licensed under GPLv2, see file LICENSE in this source tree. |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 11 | */ |
| 12 | #include "libbb.h" |
Denys Vlasenko | d184a72 | 2011-09-22 12:45:14 +0200 | [diff] [blame] | 13 | #include "bb_archive.h" |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 14 | |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 15 | #define XZ_FUNC FAST_FUNC |
| 16 | #define XZ_EXTERN static |
| 17 | |
Denys Vlasenko | ba73cfd | 2010-06-20 02:40:56 +0200 | [diff] [blame] | 18 | #define XZ_DEC_DYNALLOC |
| 19 | |
Denys Vlasenko | 716f3f6 | 2010-06-01 14:41:39 +0200 | [diff] [blame] | 20 | /* Skip check (rather than fail) of unsupported hash functions */ |
| 21 | #define XZ_DEC_ANY_CHECK 1 |
| 22 | |
| 23 | /* We use our own crc32 function */ |
| 24 | #define XZ_INTERNAL_CRC32 0 |
Denys Vlasenko | 716f3f6 | 2010-06-01 14:41:39 +0200 | [diff] [blame] | 25 | static uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc) |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 26 | { |
Denys Vlasenko | 9ce642f | 2010-10-27 15:26:45 +0200 | [diff] [blame] | 27 | return ~crc32_block_endian0(~crc, buf, size, global_crc32_table); |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 28 | } |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 29 | |
Denys Vlasenko | 2c1258c | 2017-07-15 20:22:25 +0200 | [diff] [blame] | 30 | /* We use arch-optimized unaligned fixed-endian accessors. |
| 31 | * They have been moved to libbb (proved to be useful elsewhere as well), |
| 32 | * just check that we have them defined: |
| 33 | */ |
| 34 | #if !defined(get_unaligned_le32) \ |
| 35 | || !defined(get_unaligned_be32) \ |
| 36 | || !defined(put_unaligned_le32) \ |
| 37 | || !defined(put_unaligned_be32) |
| 38 | # error get_unaligned_le32 accessors are not defined |
| 39 | #endif |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 40 | |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 41 | #include "unxz/xz_dec_bcj.c" |
| 42 | #include "unxz/xz_dec_lzma2.c" |
| 43 | #include "unxz/xz_dec_stream.c" |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 44 | |
| 45 | IF_DESKTOP(long long) int FAST_FUNC |
Denys Vlasenko | b4c11c1 | 2014-12-07 00:44:00 +0100 | [diff] [blame] | 46 | unpack_xz_stream(transformer_state_t *xstate) |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 47 | { |
Lasse Collin | 380c8a0 | 2013-02-27 17:26:40 +0100 | [diff] [blame] | 48 | enum xz_ret xz_result; |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 49 | struct xz_buf iobuf; |
| 50 | struct xz_dec *state; |
| 51 | unsigned char *membuf; |
| 52 | IF_DESKTOP(long long) int total = 0; |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 53 | |
Denys Vlasenko | 9ce642f | 2010-10-27 15:26:45 +0200 | [diff] [blame] | 54 | if (!global_crc32_table) |
Denys Vlasenko | ddacb03 | 2018-02-01 10:56:19 +0100 | [diff] [blame] | 55 | global_crc32_new_table_le(); |
Denys Vlasenko | 8376bfa | 2010-06-01 23:26:54 +0200 | [diff] [blame] | 56 | |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 57 | memset(&iobuf, 0, sizeof(iobuf)); |
Denys Vlasenko | 8a6a2f9 | 2012-03-06 16:27:48 +0100 | [diff] [blame] | 58 | membuf = xmalloc(2 * BUFSIZ); |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 59 | iobuf.in = membuf; |
Denys Vlasenko | ba73cfd | 2010-06-20 02:40:56 +0200 | [diff] [blame] | 60 | iobuf.out = membuf + BUFSIZ; |
| 61 | iobuf.out_size = BUFSIZ; |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 62 | |
Denys Vlasenko | 984b0a6 | 2016-06-20 11:06:42 +0200 | [diff] [blame] | 63 | if (!xstate || xstate->signature_skipped) { |
Denys Vlasenko | 8a6a2f9 | 2012-03-06 16:27:48 +0100 | [diff] [blame] | 64 | /* Preload XZ file signature */ |
| 65 | strcpy((char*)membuf, HEADER_MAGIC); |
| 66 | iobuf.in_size = HEADER_MAGIC_SIZE; |
| 67 | } /* else: let xz code read & check it */ |
| 68 | |
Denys Vlasenko | ba73cfd | 2010-06-20 02:40:56 +0200 | [diff] [blame] | 69 | /* Limit memory usage to about 64 MiB. */ |
| 70 | state = xz_dec_init(XZ_DYNALLOC, 64*1024*1024); |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 71 | |
Lasse Collin | 380c8a0 | 2013-02-27 17:26:40 +0100 | [diff] [blame] | 72 | xz_result = X_OK; |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 73 | while (1) { |
Denys Vlasenko | ba73cfd | 2010-06-20 02:40:56 +0200 | [diff] [blame] | 74 | if (iobuf.in_pos == iobuf.in_size) { |
Denys Vlasenko | b4c11c1 | 2014-12-07 00:44:00 +0100 | [diff] [blame] | 75 | int rd = safe_read(xstate->src_fd, membuf, BUFSIZ); |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 76 | if (rd < 0) { |
James Byrne | 6937487 | 2019-07-02 11:35:03 +0200 | [diff] [blame] | 77 | bb_simple_error_msg(bb_msg_read_error); |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 78 | total = -1; |
| 79 | break; |
| 80 | } |
Lasse Collin | 380c8a0 | 2013-02-27 17:26:40 +0100 | [diff] [blame] | 81 | if (rd == 0 && xz_result == XZ_STREAM_END) |
| 82 | break; |
Denys Vlasenko | ba73cfd | 2010-06-20 02:40:56 +0200 | [diff] [blame] | 83 | iobuf.in_size = rd; |
| 84 | iobuf.in_pos = 0; |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 85 | } |
Lasse Collin | 380c8a0 | 2013-02-27 17:26:40 +0100 | [diff] [blame] | 86 | if (xz_result == XZ_STREAM_END) { |
| 87 | /* |
| 88 | * Try to start decoding next concatenated stream. |
| 89 | * Stream padding must always be a multiple of four |
| 90 | * bytes to preserve four-byte alignment. To keep the |
| 91 | * code slightly smaller, we aren't as strict here as |
| 92 | * the .xz spec requires. We just skip all zero-bytes |
| 93 | * without checking the alignment and thus can accept |
| 94 | * files that aren't valid, e.g. the XZ utils test |
| 95 | * files bad-0pad-empty.xz and bad-0catpad-empty.xz. |
| 96 | */ |
| 97 | do { |
| 98 | if (membuf[iobuf.in_pos] != 0) { |
Denys Vlasenko | 42f454b | 2019-10-11 14:11:44 +0200 | [diff] [blame] | 99 | /* There is more data, but is it XZ data? |
| 100 | * Example: dpkg-deb -f busybox_1.30.1-4_amd64.deb |
| 101 | * reads control.tar.xz "control" file |
| 102 | * inside the ar archive, but tar.xz |
| 103 | * extraction code reaches end of xz data, |
| 104 | * reached this code and reads the beginning |
| 105 | * of data.tar.xz's ar header, which isn't xz data, |
| 106 | * and prints "corrupted data". |
| 107 | * The correct solution is to not read |
| 108 | * past nested archive (to simulate EOF). |
| 109 | * This is a workaround: |
| 110 | */ |
| 111 | if (membuf[iobuf.in_pos] != 0xfd) { |
| 112 | /* It's definitely not a xz signature |
| 113 | * (which is 0xfd,"7zXZ",0x00). |
| 114 | */ |
| 115 | goto end; |
| 116 | } |
Lasse Collin | 380c8a0 | 2013-02-27 17:26:40 +0100 | [diff] [blame] | 117 | xz_dec_reset(state); |
| 118 | goto do_run; |
| 119 | } |
| 120 | iobuf.in_pos++; |
| 121 | } while (iobuf.in_pos < iobuf.in_size); |
| 122 | } |
| 123 | do_run: |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 124 | // bb_error_msg(">in pos:%d size:%d out pos:%d size:%d", |
| 125 | // iobuf.in_pos, iobuf.in_size, iobuf.out_pos, iobuf.out_size); |
Lasse Collin | 380c8a0 | 2013-02-27 17:26:40 +0100 | [diff] [blame] | 126 | xz_result = xz_dec_run(state, &iobuf); |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 127 | // bb_error_msg("<in pos:%d size:%d out pos:%d size:%d r:%d", |
Lasse Collin | 380c8a0 | 2013-02-27 17:26:40 +0100 | [diff] [blame] | 128 | // iobuf.in_pos, iobuf.in_size, iobuf.out_pos, iobuf.out_size, xz_result); |
Denys Vlasenko | ba73cfd | 2010-06-20 02:40:56 +0200 | [diff] [blame] | 129 | if (iobuf.out_pos) { |
Denys Vlasenko | b4c11c1 | 2014-12-07 00:44:00 +0100 | [diff] [blame] | 130 | xtransformer_write(xstate, iobuf.out, iobuf.out_pos); |
Denys Vlasenko | ba73cfd | 2010-06-20 02:40:56 +0200 | [diff] [blame] | 131 | IF_DESKTOP(total += iobuf.out_pos;) |
| 132 | iobuf.out_pos = 0; |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 133 | } |
Lasse Collin | 380c8a0 | 2013-02-27 17:26:40 +0100 | [diff] [blame] | 134 | if (xz_result == XZ_STREAM_END) { |
| 135 | /* |
| 136 | * Can just "break;" here, if not for concatenated |
| 137 | * .xz streams. |
| 138 | * Checking for padding may require buffer |
| 139 | * replenishment. Can't do it here. |
| 140 | */ |
| 141 | continue; |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 142 | } |
Lasse Collin | 380c8a0 | 2013-02-27 17:26:40 +0100 | [diff] [blame] | 143 | if (xz_result != XZ_OK && xz_result != XZ_UNSUPPORTED_CHECK) { |
James Byrne | 6937487 | 2019-07-02 11:35:03 +0200 | [diff] [blame] | 144 | bb_simple_error_msg("corrupted data"); |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 145 | total = -1; |
| 146 | break; |
| 147 | } |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 148 | } |
Denys Vlasenko | 42f454b | 2019-10-11 14:11:44 +0200 | [diff] [blame] | 149 | end: |
Denys Vlasenko | fb6c76c | 2010-05-30 03:47:40 +0200 | [diff] [blame] | 150 | xz_dec_end(state); |
| 151 | free(membuf); |
| 152 | |
| 153 | return total; |
| 154 | } |