blob: d4b6ef751aea8cee3096b9523bf841a3a91487ab [file] [log] [blame]
Denys Vlasenko602ce692010-05-30 03:35:18 +02001/*
2 * Branch/Call/Jump (BCJ) filter decoders
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11#include "xz_private.h"
12
13struct xz_dec_bcj {
14 /* Type of the BCJ filter being used */
15 enum {
16 BCJ_X86 = 4, /* x86 or x86-64 */
17 BCJ_POWERPC = 5, /* Big endian only */
18 BCJ_IA64 = 6, /* Big or little endian */
19 BCJ_ARM = 7, /* Little endian only */
20 BCJ_ARMTHUMB = 8, /* Little endian only */
21 BCJ_SPARC = 9 /* Big or little endian */
22 } type;
23
24 /*
25 * Return value of the next filter in the chain. We need to preserve
26 * this information across calls, because we must not call the next
27 * filter anymore once it has returned XZ_STREAM_END.
28 */
29 enum xz_ret ret;
30
31 /* True if we are operating in single-call mode. */
32 bool single_call;
33
34 /*
35 * Absolute position relative to the beginning of the uncompressed
36 * data (in a single .xz Block). We care only about the lowest 32
37 * bits so this doesn't need to be uint64_t even with big files.
38 */
39 uint32_t pos;
40
41 /* x86 filter state */
42 uint32_t x86_prev_mask;
43
44 /* Temporary space to hold the variables from struct xz_buf */
45 uint8_t *out;
46 size_t out_pos;
47 size_t out_size;
48
49 struct {
50 /* Amount of already filtered data in the beginning of buf */
51 size_t filtered;
52
53 /* Total amount of data currently stored in buf */
54 size_t size;
55
56 /*
57 * Buffer to hold a mix of filtered and unfiltered data. This
58 * needs to be big enough to hold Alignment + 2 * Look-ahead:
59 *
60 * Type Alignment Look-ahead
61 * x86 1 4
62 * PowerPC 4 0
63 * IA-64 16 0
64 * ARM 4 0
65 * ARM-Thumb 2 2
66 * SPARC 4 0
67 */
68 uint8_t buf[16];
69 } temp;
70};
71
72#ifdef XZ_DEC_X86
73/*
74 * This is macro used to test the most significant byte of a memory address
75 * in an x86 instruction.
76 */
77#define bcj_x86_test_msbyte(b) ((b) == 0x00 || (b) == 0xFF)
78
79static noinline_for_stack size_t XZ_FUNC bcj_x86(
80 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
81{
82 static const bool mask_to_allowed_status[8]
83 = { true, true, true, false, true, false, false, false };
84
85 static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 };
86
87 size_t i;
88 size_t prev_pos = (size_t)-1;
89 uint32_t prev_mask = s->x86_prev_mask;
90 uint32_t src;
91 uint32_t dest;
92 uint32_t j;
93 uint8_t b;
94
95 if (size <= 4)
96 return 0;
97
98 size -= 4;
99 for (i = 0; i < size; ++i) {
100 if ((buf[i] & 0xFE) != 0xE8)
101 continue;
102
103 prev_pos = i - prev_pos;
104 if (prev_pos > 3) {
105 prev_mask = 0;
106 } else {
107 prev_mask = (prev_mask << (prev_pos - 1)) & 7;
108 if (prev_mask != 0) {
109 b = buf[i + 4 - mask_to_bit_num[prev_mask]];
110 if (!mask_to_allowed_status[prev_mask]
111 || bcj_x86_test_msbyte(b)) {
112 prev_pos = i;
113 prev_mask = (prev_mask << 1) | 1;
114 continue;
115 }
116 }
117 }
118
119 prev_pos = i;
120
121 if (bcj_x86_test_msbyte(buf[i + 4])) {
122 src = get_unaligned_le32(buf + i + 1);
123 while (true) {
124 dest = src - (s->pos + (uint32_t)i + 5);
125 if (prev_mask == 0)
126 break;
127
128 j = mask_to_bit_num[prev_mask] * 8;
129 b = (uint8_t)(dest >> (24 - j));
130 if (!bcj_x86_test_msbyte(b))
131 break;
132
133 src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
134 }
135
136 dest &= 0x01FFFFFF;
137 dest |= (uint32_t)0 - (dest & 0x01000000);
138 put_unaligned_le32(dest, buf + i + 1);
139 i += 4;
140 } else {
141 prev_mask = (prev_mask << 1) | 1;
142 }
143 }
144
145 prev_pos = i - prev_pos;
146 s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
147 return i;
148}
149#endif
150
151#ifdef XZ_DEC_POWERPC
152static noinline_for_stack size_t XZ_FUNC bcj_powerpc(
153 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
154{
155 size_t i;
156 uint32_t instr;
157
158 for (i = 0; i + 4 <= size; i += 4) {
159 instr = get_unaligned_be32(buf + i);
160 if ((instr & 0xFC000003) == 0x48000001) {
161 instr &= 0x03FFFFFC;
162 instr -= s->pos + (uint32_t)i;
163 instr &= 0x03FFFFFC;
164 instr |= 0x48000001;
165 put_unaligned_be32(instr, buf + i);
166 }
167 }
168
169 return i;
170}
171#endif
172
173#ifdef XZ_DEC_IA64
174static noinline_for_stack size_t XZ_FUNC bcj_ia64(
175 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
176{
177 static const uint8_t branch_table[32] = {
178 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0,
180 4, 4, 6, 6, 0, 0, 7, 7,
181 4, 4, 0, 0, 4, 4, 0, 0
182 };
183
184 /*
185 * The local variables take a little bit stack space, but it's less
186 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
187 * stack usage here without doing that for the LZMA2 decoder too.
188 */
189
190 /* Loop counters */
191 size_t i;
192 size_t j;
193
194 /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
195 uint32_t slot;
196
197 /* Bitwise offset of the instruction indicated by slot */
198 uint32_t bit_pos;
199
200 /* bit_pos split into byte and bit parts */
201 uint32_t byte_pos;
202 uint32_t bit_res;
203
204 /* Address part of an instruction */
205 uint32_t addr;
206
207 /* Mask used to detect which instructions to convert */
208 uint32_t mask;
209
210 /* 41-bit instruction stored somewhere in the lowest 48 bits */
211 uint64_t instr;
212
213 /* Instruction normalized with bit_res for easier manipulation */
214 uint64_t norm;
215
216 for (i = 0; i + 16 <= size; i += 16) {
217 mask = branch_table[buf[i] & 0x1F];
218 for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
219 if (((mask >> slot) & 1) == 0)
220 continue;
221
222 byte_pos = bit_pos >> 3;
223 bit_res = bit_pos & 7;
224 instr = 0;
225 for (j = 0; j < 6; ++j)
226 instr |= (uint64_t)(buf[i + j + byte_pos])
227 << (8 * j);
228
229 norm = instr >> bit_res;
230
231 if (((norm >> 37) & 0x0F) == 0x05
232 && ((norm >> 9) & 0x07) == 0) {
233 addr = (norm >> 13) & 0x0FFFFF;
234 addr |= ((uint32_t)(norm >> 36) & 1) << 20;
235 addr <<= 4;
236 addr -= s->pos + (uint32_t)i;
237 addr >>= 4;
238
239 norm &= ~((uint64_t)0x8FFFFF << 13);
240 norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
241 norm |= (uint64_t)(addr & 0x100000)
242 << (36 - 20);
243
244 instr &= (1 << bit_res) - 1;
245 instr |= norm << bit_res;
246
247 for (j = 0; j < 6; j++)
248 buf[i + j + byte_pos]
249 = (uint8_t)(instr >> (8 * j));
250 }
251 }
252 }
253
254 return i;
255}
256#endif
257
258#ifdef XZ_DEC_ARM
259static noinline_for_stack size_t XZ_FUNC bcj_arm(
260 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
261{
262 size_t i;
263 uint32_t addr;
264
265 for (i = 0; i + 4 <= size; i += 4) {
266 if (buf[i + 3] == 0xEB) {
267 addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
268 | ((uint32_t)buf[i + 2] << 16);
269 addr <<= 2;
270 addr -= s->pos + (uint32_t)i + 8;
271 addr >>= 2;
272 buf[i] = (uint8_t)addr;
273 buf[i + 1] = (uint8_t)(addr >> 8);
274 buf[i + 2] = (uint8_t)(addr >> 16);
275 }
276 }
277
278 return i;
279}
280#endif
281
282#ifdef XZ_DEC_ARMTHUMB
283static noinline_for_stack size_t XZ_FUNC bcj_armthumb(
284 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
285{
286 size_t i;
287 uint32_t addr;
288
289 for (i = 0; i + 4 <= size; i += 2) {
290 if ((buf[i + 1] & 0xF8) == 0xF0
291 && (buf[i + 3] & 0xF8) == 0xF8) {
292 addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
293 | ((uint32_t)buf[i] << 11)
294 | (((uint32_t)buf[i + 3] & 0x07) << 8)
295 | (uint32_t)buf[i + 2];
296 addr <<= 1;
297 addr -= s->pos + (uint32_t)i + 4;
298 addr >>= 1;
299 buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
300 buf[i] = (uint8_t)(addr >> 11);
301 buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
302 buf[i + 2] = (uint8_t)addr;
303 i += 2;
304 }
305 }
306
307 return i;
308}
309#endif
310
311#ifdef XZ_DEC_SPARC
312static noinline_for_stack size_t XZ_FUNC bcj_sparc(
313 struct xz_dec_bcj *s, uint8_t *buf, size_t size)
314{
315 size_t i;
316 uint32_t instr;
317
318 for (i = 0; i + 4 <= size; i += 4) {
319 instr = get_unaligned_be32(buf + i);
320 if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
321 instr <<= 2;
322 instr -= s->pos + (uint32_t)i;
323 instr >>= 2;
324 instr = ((uint32_t)0x40000000 - (instr & 0x400000))
325 | 0x40000000 | (instr & 0x3FFFFF);
326 put_unaligned_be32(instr, buf + i);
327 }
328 }
329
330 return i;
331}
332#endif
333
334#ifdef XZ_DEC_BCJ
335/*
336 * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
337 * of data that got filtered.
338 *
339 * NOTE: This is implemented as a switch statement to avoid using function
340 * pointers, which could be problematic in the kernel boot code, which must
341 * avoid pointers to static data (at least on x86).
342 */
343static void XZ_FUNC bcj_apply(struct xz_dec_bcj *s,
344 uint8_t *buf, size_t *pos, size_t size)
345{
346 size_t filtered;
347
348 buf += *pos;
349 size -= *pos;
350
351 switch (s->type) {
352#ifdef XZ_DEC_X86
353 case BCJ_X86:
354 filtered = bcj_x86(s, buf, size);
355 break;
356#endif
357#ifdef XZ_DEC_POWERPC
358 case BCJ_POWERPC:
359 filtered = bcj_powerpc(s, buf, size);
360 break;
361#endif
362#ifdef XZ_DEC_IA64
363 case BCJ_IA64:
364 filtered = bcj_ia64(s, buf, size);
365 break;
366#endif
367#ifdef XZ_DEC_ARM
368 case BCJ_ARM:
369 filtered = bcj_arm(s, buf, size);
370 break;
371#endif
372#ifdef XZ_DEC_ARMTHUMB
373 case BCJ_ARMTHUMB:
374 filtered = bcj_armthumb(s, buf, size);
375 break;
376#endif
377#ifdef XZ_DEC_SPARC
378 case BCJ_SPARC:
379 filtered = bcj_sparc(s, buf, size);
380 break;
381#endif
382 default:
383 /* Never reached but silence compiler warnings. */
384 filtered = 0;
385 break;
386 }
387
388 *pos += filtered;
389 s->pos += filtered;
390}
391#endif
392
393#ifdef XZ_DEC_BCJ
394/*
395 * Flush pending filtered data from temp to the output buffer.
396 * Move the remaining mixture of possibly filtered and unfiltered
397 * data to the beginning of temp.
398 */
399static void XZ_FUNC bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
400{
401 size_t copy_size;
402
403 copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
404 memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
405 b->out_pos += copy_size;
406
407 s->temp.filtered -= copy_size;
408 s->temp.size -= copy_size;
409 memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
410}
411
412/*
413 * The BCJ filter functions are primitive in sense that they process the
414 * data in chunks of 1-16 bytes. To hide this issue, this function does
415 * some buffering.
416 */
417XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
418 struct xz_dec_lzma2 *lzma2, struct xz_buf *b)
419{
420 size_t out_start;
421
422 /*
423 * Flush pending already filtered data to the output buffer. Return
424 * immediatelly if we couldn't flush everything, or if the next
425 * filter in the chain had already returned XZ_STREAM_END.
426 */
427 if (s->temp.filtered > 0) {
428 bcj_flush(s, b);
429 if (s->temp.filtered > 0)
430 return XZ_OK;
431
432 if (s->ret == XZ_STREAM_END)
433 return XZ_STREAM_END;
434 }
435
436 /*
437 * If we have more output space than what is currently pending in
438 * temp, copy the unfiltered data from temp to the output buffer
439 * and try to fill the output buffer by decoding more data from the
440 * next filter in the chain. Apply the BCJ filter on the new data
441 * in the output buffer. If everything cannot be filtered, copy it
442 * to temp and rewind the output buffer position accordingly.
443 */
444 if (s->temp.size < b->out_size - b->out_pos) {
445 out_start = b->out_pos;
446 memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
447 b->out_pos += s->temp.size;
448
449 s->ret = xz_dec_lzma2_run(lzma2, b);
450 if (s->ret != XZ_STREAM_END
451 && (s->ret != XZ_OK || s->single_call))
452 return s->ret;
453
454 bcj_apply(s, b->out, &out_start, b->out_pos);
455
456 /*
457 * As an exception, if the next filter returned XZ_STREAM_END,
458 * we can do that too, since the last few bytes that remain
459 * unfiltered are meant to remain unfiltered.
460 */
461 if (s->ret == XZ_STREAM_END)
462 return XZ_STREAM_END;
463
464 s->temp.size = b->out_pos - out_start;
465 b->out_pos -= s->temp.size;
466 memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
467 }
468
469 /*
470 * If we have unfiltered data in temp, try to fill by decoding more
471 * data from the next filter. Apply the BCJ filter on temp. Then we
472 * hopefully can fill the actual output buffer by copying filtered
473 * data from temp. A mix of filtered and unfiltered data may be left
474 * in temp; it will be taken care on the next call to this function.
475 */
476 if (s->temp.size > 0) {
477 /* Make b->out{,_pos,_size} temporarily point to s->temp. */
478 s->out = b->out;
479 s->out_pos = b->out_pos;
480 s->out_size = b->out_size;
481 b->out = s->temp.buf;
482 b->out_pos = s->temp.size;
483 b->out_size = sizeof(s->temp.buf);
484
485 s->ret = xz_dec_lzma2_run(lzma2, b);
486
487 s->temp.size = b->out_pos;
488 b->out = s->out;
489 b->out_pos = s->out_pos;
490 b->out_size = s->out_size;
491
492 if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
493 return s->ret;
494
495 bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
496
497 /*
498 * If the next filter returned XZ_STREAM_END, we mark that
499 * everything is filtered, since the last unfiltered bytes
500 * of the stream are meant to be left as is.
501 */
502 if (s->ret == XZ_STREAM_END)
503 s->temp.filtered = s->temp.size;
504
505 bcj_flush(s, b);
506 if (s->temp.filtered > 0)
507 return XZ_OK;
508 }
509
510 return s->ret;
511}
512
513XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call)
514{
515 struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL);
516 if (s != NULL)
517 s->single_call = single_call;
518
519 return s;
520}
521
522XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset(
523 struct xz_dec_bcj *s, uint8_t id)
524{
525 switch (id) {
526#ifdef XZ_DEC_X86
527 case BCJ_X86:
528#endif
529#ifdef XZ_DEC_POWERPC
530 case BCJ_POWERPC:
531#endif
532#ifdef XZ_DEC_IA64
533 case BCJ_IA64:
534#endif
535#ifdef XZ_DEC_ARM
536 case BCJ_ARM:
537#endif
538#ifdef XZ_DEC_ARMTHUMB
539 case BCJ_ARMTHUMB:
540#endif
541#ifdef XZ_DEC_SPARC
542 case BCJ_SPARC:
543#endif
544 break;
545
546 default:
547 /* Unsupported Filter ID */
548 return XZ_OPTIONS_ERROR;
549 }
550
551 s->type = id;
552 s->ret = XZ_OK;
553 s->pos = 0;
554 s->x86_prev_mask = 0;
555 s->temp.filtered = 0;
556 s->temp.size = 0;
557
558 return XZ_OK;
559}
560#endif