]> git.pld-linux.org Git - packages/xen.git/blame - xen-xz.patch
- rel 1
[packages/xen.git] / xen-xz.patch
CommitLineData
53ff2d70
AM
1diff --git a/xen/common/Makefile b/xen/common/Makefile
2--- a/xen/common/Makefile
3+++ b/xen/common/Makefile
4@@ -43,7 +43,7 @@
5 obj-y += rbtree.o
6 obj-y += lzo.o
7
8-obj-$(CONFIG_X86) += decompress.o bunzip2.o unlzma.o unlzo.o
9+obj-$(CONFIG_X86) += decompress.o bunzip2.o unxz.o unlzma.o unlzo.o
10
11 obj-$(perfc) += perfc.o
12 obj-$(crash_debug) += gdbstub.o
13diff --git a/xen/common/decompress.c b/xen/common/decompress.c
14--- a/xen/common/decompress.c
15+++ b/xen/common/decompress.c
16@@ -20,6 +20,9 @@
17 if ( len >= 3 && !memcmp(inbuf, "\x42\x5a\x68", 3) )
18 return bunzip2(inbuf, len, NULL, NULL, outbuf, NULL, error);
19
20+ if ( len >= 6 && !memcmp(inbuf, "\3757zXZ", 6) )
21+ return unxz(inbuf, len, NULL, NULL, outbuf, NULL, error);
22+
23 if ( len >= 2 && !memcmp(inbuf, "\135\000", 2) )
24 return unlzma(inbuf, len, NULL, NULL, outbuf, NULL, error);
25
26diff --git a/xen/common/decompress.h b/xen/common/decompress.h
27--- a/xen/common/decompress.h
28+++ b/xen/common/decompress.h
29@@ -8,6 +8,7 @@
30
31 #define STATIC
32 #define INIT __init
33+#define INITDATA __initdata
34
35 static void(*__initdata error)(const char *);
36 #define set_error_fn(x) error = x;
37diff --git a/xen/common/unxz.c b/xen/common/unxz.c
38new file mode 100644
39--- /dev/null
40+++ b/xen/common/unxz.c
41@@ -0,0 +1,306 @@
42+/*
43+ * Wrapper for decompressing XZ-compressed kernel, initramfs, and initrd
44+ *
45+ * Author: Lasse Collin <lasse.collin@tukaani.org>
46+ *
47+ * This file has been put into the public domain.
48+ * You can do whatever you want with this file.
49+ */
50+
51+/*
52+ * Important notes about in-place decompression
53+ *
54+ * At least on x86, the kernel is decompressed in place: the compressed data
55+ * is placed to the end of the output buffer, and the decompressor overwrites
56+ * most of the compressed data. There must be enough safety margin to
57+ * guarantee that the write position is always behind the read position.
58+ *
59+ * The safety margin for XZ with LZMA2 or BCJ+LZMA2 is calculated below.
60+ * Note that the margin with XZ is bigger than with Deflate (gzip)!
61+ *
62+ * The worst case for in-place decompression is that the beginning of
63+ * the file is compressed extremely well, and the rest of the file is
64+ * uncompressible. Thus, we must look for worst-case expansion when the
65+ * compressor is encoding uncompressible data.
66+ *
67+ * The structure of the .xz file in case of a compresed kernel is as follows.
68+ * Sizes (as bytes) of the fields are in parenthesis.
69+ *
70+ * Stream Header (12)
71+ * Block Header:
72+ * Block Header (8-12)
73+ * Compressed Data (N)
74+ * Block Padding (0-3)
75+ * CRC32 (4)
76+ * Index (8-20)
77+ * Stream Footer (12)
78+ *
79+ * Normally there is exactly one Block, but let's assume that there are
80+ * 2-4 Blocks just in case. Because Stream Header and also Block Header
81+ * of the first Block don't make the decompressor produce any uncompressed
82+ * data, we can ignore them from our calculations. Block Headers of possible
83+ * additional Blocks have to be taken into account still. With these
84+ * assumptions, it is safe to assume that the total header overhead is
85+ * less than 128 bytes.
86+ *
87+ * Compressed Data contains LZMA2 or BCJ+LZMA2 encoded data. Since BCJ
88+ * doesn't change the size of the data, it is enough to calculate the
89+ * safety margin for LZMA2.
90+ *
91+ * LZMA2 stores the data in chunks. Each chunk has a header whose size is
92+ * a maximum of 6 bytes, but to get round 2^n numbers, let's assume that
93+ * the maximum chunk header size is 8 bytes. After the chunk header, there
94+ * may be up to 64 KiB of actual payload in the chunk. Often the payload is
95+ * quite a bit smaller though; to be safe, let's assume that an average
96+ * chunk has only 32 KiB of payload.
97+ *
98+ * The maximum uncompressed size of the payload is 2 MiB. The minimum
99+ * uncompressed size of the payload is in practice never less than the
100+ * payload size itself. The LZMA2 format would allow uncompressed size
101+ * to be less than the payload size, but no sane compressor creates such
102+ * files. LZMA2 supports storing uncompressible data in uncompressed form,
103+ * so there's never a need to create payloads whose uncompressed size is
104+ * smaller than the compressed size.
105+ *
106+ * The assumption, that the uncompressed size of the payload is never
107+ * smaller than the payload itself, is valid only when talking about
108+ * the payload as a whole. It is possible that the payload has parts where
109+ * the decompressor consumes more input than it produces output. Calculating
110+ * the worst case for this would be tricky. Instead of trying to do that,
111+ * let's simply make sure that the decompressor never overwrites any bytes
112+ * of the payload which it is currently reading.
113+ *
114+ * Now we have enough information to calculate the safety margin. We need
115+ * - 128 bytes for the .xz file format headers;
116+ * - 8 bytes per every 32 KiB of uncompressed size (one LZMA2 chunk header
117+ * per chunk, each chunk having average payload size of 32 KiB); and
118+ * - 64 KiB (biggest possible LZMA2 chunk payload size) to make sure that
119+ * the decompressor never overwrites anything from the LZMA2 chunk
120+ * payload it is currently reading.
121+ *
122+ * We get the following formula:
123+ *
124+ * safety_margin = 128 + uncompressed_size * 8 / 32768 + 65536
125+ * = 128 + (uncompressed_size >> 12) + 65536
126+ *
127+ * For comparision, according to arch/x86/boot/compressed/misc.c, the
128+ * equivalent formula for Deflate is this:
129+ *
130+ * safety_margin = 18 + (uncompressed_size >> 12) + 32768
131+ *
132+ * Thus, when updating Deflate-only in-place kernel decompressor to
133+ * support XZ, the fixed overhead has to be increased from 18+32768 bytes
134+ * to 128+65536 bytes.
135+ */
136+
137+#include "decompress.h"
138+
139+#define XZ_EXTERN STATIC
140+
141+/*
142+ * For boot time use, we enable only the BCJ filter of the current
143+ * architecture or none if no BCJ filter is available for the architecture.
144+ */
145+#ifdef CONFIG_X86
146+# define XZ_DEC_X86
147+#endif
148+#ifdef CONFIG_PPC
149+# define XZ_DEC_POWERPC
150+#endif
151+#ifdef CONFIG_ARM
152+# define XZ_DEC_ARM
153+#endif
154+#ifdef CONFIG_IA64
155+# define XZ_DEC_IA64
156+#endif
157+#ifdef CONFIG_SPARC
158+# define XZ_DEC_SPARC
159+#endif
160+
161+/*
162+ * This will get the basic headers so that memeq() and others
163+ * can be defined.
164+ */
165+#include "xz/private.h"
166+
167+/*
168+ * memeq and memzero are not used much and any remotely sane implementation
169+ * is fast enough. memcpy/memmove speed matters in multi-call mode, but
170+ * the kernel image is decompressed in single-call mode, in which only
171+ * memcpy speed can matter and only if there is a lot of uncompressible data
172+ * (LZMA2 stores uncompressible chunks in uncompressed form). Thus, the
173+ * functions below should just be kept small; it's probably not worth
174+ * optimizing for speed.
175+ */
176+
177+#ifndef memeq
178+#define memeq(p1, p2, sz) (memcmp(p1, p2, sz) == 0)
179+#endif
180+
181+#ifndef memzero
182+#define memzero(p, sz) memset(p, 0, sz)
183+#endif
184+
185+#include "xz/crc32.c"
186+#include "xz/dec_stream.c"
187+#include "xz/dec_lzma2.c"
188+#include "xz/dec_bcj.c"
189+
190+/* Size of the input and output buffers in multi-call mode */
191+#define XZ_IOBUF_SIZE 4096
192+
193+/*
194+ * This function implements the API defined in <linux/decompress/generic.h>.
195+ *
196+ * This wrapper will automatically choose single-call or multi-call mode
197+ * of the native XZ decoder API. The single-call mode can be used only when
198+ * both input and output buffers are available as a single chunk, i.e. when
199+ * fill() and flush() won't be used.
200+ */
201+STATIC int INIT unxz(unsigned char *in, unsigned int in_size,
202+ int (*fill)(void *dest, unsigned int size),
203+ int (*flush)(void *src, unsigned int size),
204+ unsigned char *out, unsigned int *in_used,
205+ void (*error_fn)(const char *x))
206+{
207+ struct xz_buf b;
208+ struct xz_dec *s;
209+ enum xz_ret ret;
210+ bool_t must_free_in = false;
211+
212+ set_error_fn(error_fn);
213+
214+ xz_crc32_init();
215+
216+ if (in_used != NULL)
217+ *in_used = 0;
218+
219+ if (fill == NULL && flush == NULL)
220+ s = xz_dec_init(XZ_SINGLE, 0);
221+ else
222+ s = xz_dec_init(XZ_DYNALLOC, (uint32_t)-1);
223+
224+ if (s == NULL)
225+ goto error_alloc_state;
226+
227+ if (flush == NULL) {
228+ b.out = out;
229+ b.out_size = (size_t)-1;
230+ } else {
231+ b.out_size = XZ_IOBUF_SIZE;
232+ b.out = malloc(XZ_IOBUF_SIZE);
233+ if (b.out == NULL)
234+ goto error_alloc_out;
235+ }
236+
237+ if (in == NULL) {
238+ must_free_in = true;
239+ in = malloc(XZ_IOBUF_SIZE);
240+ if (in == NULL)
241+ goto error_alloc_in;
242+ }
243+
244+ b.in = in;
245+ b.in_pos = 0;
246+ b.in_size = in_size;
247+ b.out_pos = 0;
248+
249+ if (fill == NULL && flush == NULL) {
250+ ret = xz_dec_run(s, &b);
251+ } else {
252+ do {
253+ if (b.in_pos == b.in_size && fill != NULL) {
254+ if (in_used != NULL)
255+ *in_used += b.in_pos;
256+
257+ b.in_pos = 0;
258+
259+ in_size = fill(in, XZ_IOBUF_SIZE);
1545f2de 260+ if ((int) in_size < 0) {
53ff2d70
AM
261+ /*
262+ * This isn't an optimal error code
263+ * but it probably isn't worth making
264+ * a new one either.
265+ */
266+ ret = XZ_BUF_ERROR;
267+ break;
268+ }
269+
270+ b.in_size = in_size;
271+ }
272+
273+ ret = xz_dec_run(s, &b);
274+
275+ if (flush != NULL && (b.out_pos == b.out_size
276+ || (ret != XZ_OK && b.out_pos > 0))) {
277+ /*
278+ * Setting ret here may hide an error
279+ * returned by xz_dec_run(), but probably
280+ * it's not too bad.
281+ */
282+ if (flush(b.out, b.out_pos) != (int)b.out_pos)
283+ ret = XZ_BUF_ERROR;
284+
285+ b.out_pos = 0;
286+ }
287+ } while (ret == XZ_OK);
288+
289+ if (must_free_in)
290+ free(in);
291+
292+ if (flush != NULL)
293+ free(b.out);
294+ }
295+
296+ if (in_used != NULL)
297+ *in_used += b.in_pos;
298+
299+ xz_dec_end(s);
300+
301+ switch (ret) {
302+ case XZ_STREAM_END:
303+ return 0;
304+
305+ case XZ_MEM_ERROR:
306+ /* This can occur only in multi-call mode. */
307+ error("XZ decompressor ran out of memory");
308+ break;
309+
310+ case XZ_FORMAT_ERROR:
311+ error("Input is not in the XZ format (wrong magic bytes)");
312+ break;
313+
314+ case XZ_OPTIONS_ERROR:
315+ error("Input was encoded with settings that are not "
316+ "supported by this XZ decoder");
317+ break;
318+
319+ case XZ_DATA_ERROR:
320+ case XZ_BUF_ERROR:
321+ error("XZ-compressed data is corrupt");
322+ break;
323+
324+ default:
325+ error("Bug in the XZ decompressor");
326+ break;
327+ }
328+
329+ return -1;
330+
331+error_alloc_in:
332+ if (flush != NULL)
333+ free(b.out);
334+
335+error_alloc_out:
336+ xz_dec_end(s);
337+
338+error_alloc_state:
339+ error("XZ decompressor ran out of memory");
340+ return -1;
341+}
342+
343+/*
344+ * This macro is used by architecture-specific files to decompress
345+ * the kernel image.
346+ */
347+#define decompress unxz
348diff --git a/xen/common/xz/crc32.c b/xen/common/xz/crc32.c
349new file mode 100644
350--- /dev/null
351+++ b/xen/common/xz/crc32.c
352@@ -0,0 +1,51 @@
353+/*
354+ * CRC32 using the polynomial from IEEE-802.3
355+ *
356+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
357+ * Igor Pavlov <http://7-zip.org/>
358+ *
359+ * This file has been put into the public domain.
360+ * You can do whatever you want with this file.
361+ */
362+
363+/*
364+ * This is not the fastest implementation, but it is pretty compact.
365+ * The fastest versions of xz_crc32() on modern CPUs without hardware
366+ * accelerated CRC instruction are 3-5 times as fast as this version,
367+ * but they are bigger and use more memory for the lookup table.
368+ */
369+
370+#include "private.h"
371+
372+XZ_EXTERN uint32_t INITDATA xz_crc32_table[256];
373+
374+XZ_EXTERN void INIT xz_crc32_init(void)
375+{
376+ const uint32_t poly = 0xEDB88320;
377+
378+ uint32_t i;
379+ uint32_t j;
380+ uint32_t r;
381+
382+ for (i = 0; i < 256; ++i) {
383+ r = i;
384+ for (j = 0; j < 8; ++j)
385+ r = (r >> 1) ^ (poly & ~((r & 1) - 1));
386+
387+ xz_crc32_table[i] = r;
388+ }
389+
390+ return;
391+}
392+
393+XZ_EXTERN uint32_t INIT xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
394+{
395+ crc = ~crc;
396+
397+ while (size != 0) {
398+ crc = xz_crc32_table[*buf++ ^ (crc & 0xFF)] ^ (crc >> 8);
399+ --size;
400+ }
401+
402+ return ~crc;
403+}
404diff --git a/xen/common/xz/dec_bcj.c b/xen/common/xz/dec_bcj.c
405new file mode 100644
406--- /dev/null
407+++ b/xen/common/xz/dec_bcj.c
1545f2de 408@@ -0,0 +1,575 @@
53ff2d70
AM
409+/*
410+ * Branch/Call/Jump (BCJ) filter decoders
411+ *
412+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
413+ * Igor Pavlov <http://7-zip.org/>
414+ *
415+ * This file has been put into the public domain.
416+ * You can do whatever you want with this file.
417+ */
418+
419+#include "private.h"
420+
421+/*
422+ * The rest of the file is inside this ifdef. It makes things a little more
423+ * convenient when building without support for any BCJ filters.
424+ */
425+#ifdef XZ_DEC_BCJ
426+
427+struct xz_dec_bcj {
428+ /* Type of the BCJ filter being used */
429+ enum {
430+ BCJ_X86 = 4, /* x86 or x86-64 */
431+ BCJ_POWERPC = 5, /* Big endian only */
432+ BCJ_IA64 = 6, /* Big or little endian */
433+ BCJ_ARM = 7, /* Little endian only */
434+ BCJ_ARMTHUMB = 8, /* Little endian only */
435+ BCJ_SPARC = 9 /* Big or little endian */
436+ } type;
437+
438+ /*
439+ * Return value of the next filter in the chain. We need to preserve
440+ * this information across calls, because we must not call the next
441+ * filter anymore once it has returned XZ_STREAM_END.
442+ */
443+ enum xz_ret ret;
444+
445+ /* True if we are operating in single-call mode. */
446+ bool_t single_call;
447+
448+ /*
449+ * Absolute position relative to the beginning of the uncompressed
450+ * data (in a single .xz Block). We care only about the lowest 32
451+ * bits so this doesn't need to be uint64_t even with big files.
452+ */
453+ uint32_t pos;
454+
455+ /* x86 filter state */
456+ uint32_t x86_prev_mask;
457+
458+ /* Temporary space to hold the variables from struct xz_buf */
459+ uint8_t *out;
460+ size_t out_pos;
461+ size_t out_size;
462+
463+ struct {
464+ /* Amount of already filtered data in the beginning of buf */
465+ size_t filtered;
466+
467+ /* Total amount of data currently stored in buf */
468+ size_t size;
469+
470+ /*
471+ * Buffer to hold a mix of filtered and unfiltered data. This
472+ * needs to be big enough to hold Alignment + 2 * Look-ahead:
473+ *
474+ * Type Alignment Look-ahead
475+ * x86 1 4
476+ * PowerPC 4 0
477+ * IA-64 16 0
478+ * ARM 4 0
479+ * ARM-Thumb 2 2
480+ * SPARC 4 0
481+ */
482+ uint8_t buf[16];
483+ } temp;
484+};
485+
486+#ifdef XZ_DEC_X86
487+/*
488+ * This is used to test the most significant byte of a memory address
489+ * in an x86 instruction.
490+ */
491+static inline int INIT bcj_x86_test_msbyte(uint8_t b)
492+{
493+ return b == 0x00 || b == 0xFF;
494+}
495+
496+static size_t INIT bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
497+{
498+ static /*const*/ bool_t INITDATA mask_to_allowed_status[8]
499+ = { true, true, true, false, true, false, false, false };
500+
501+ static /*const*/ uint8_t INITDATA mask_to_bit_num[8]
502+ = { 0, 1, 2, 2, 3, 3, 3, 3 };
503+
504+ size_t i;
505+ size_t prev_pos = (size_t)-1;
506+ uint32_t prev_mask = s->x86_prev_mask;
507+ uint32_t src;
508+ uint32_t dest;
509+ uint32_t j;
510+ uint8_t b;
511+
512+ if (size <= 4)
513+ return 0;
514+
515+ size -= 4;
516+ for (i = 0; i < size; ++i) {
517+ if ((buf[i] & 0xFE) != 0xE8)
518+ continue;
519+
520+ prev_pos = i - prev_pos;
521+ if (prev_pos > 3) {
522+ prev_mask = 0;
523+ } else {
524+ prev_mask = (prev_mask << (prev_pos - 1)) & 7;
525+ if (prev_mask != 0) {
526+ b = buf[i + 4 - mask_to_bit_num[prev_mask]];
527+ if (!mask_to_allowed_status[prev_mask]
528+ || bcj_x86_test_msbyte(b)) {
529+ prev_pos = i;
530+ prev_mask = (prev_mask << 1) | 1;
531+ continue;
532+ }
533+ }
534+ }
535+
536+ prev_pos = i;
537+
538+ if (bcj_x86_test_msbyte(buf[i + 4])) {
539+ src = get_unaligned_le32(buf + i + 1);
540+ while (true) {
541+ dest = src - (s->pos + (uint32_t)i + 5);
542+ if (prev_mask == 0)
543+ break;
544+
545+ j = mask_to_bit_num[prev_mask] * 8;
546+ b = (uint8_t)(dest >> (24 - j));
547+ if (!bcj_x86_test_msbyte(b))
548+ break;
549+
550+ src = dest ^ (((uint32_t)1 << (32 - j)) - 1);
551+ }
552+
553+ dest &= 0x01FFFFFF;
554+ dest |= (uint32_t)0 - (dest & 0x01000000);
555+ put_unaligned_le32(dest, buf + i + 1);
556+ i += 4;
557+ } else {
558+ prev_mask = (prev_mask << 1) | 1;
559+ }
560+ }
561+
562+ prev_pos = i - prev_pos;
563+ s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1);
564+ return i;
565+}
566+#endif
567+
568+#ifdef XZ_DEC_POWERPC
569+static size_t INIT bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
570+{
571+ size_t i;
572+ uint32_t instr;
573+
574+ for (i = 0; i + 4 <= size; i += 4) {
575+ instr = get_unaligned_be32(buf + i);
576+ if ((instr & 0xFC000003) == 0x48000001) {
577+ instr &= 0x03FFFFFC;
578+ instr -= s->pos + (uint32_t)i;
579+ instr &= 0x03FFFFFC;
580+ instr |= 0x48000001;
581+ put_unaligned_be32(instr, buf + i);
582+ }
583+ }
584+
585+ return i;
586+}
587+#endif
588+
589+#ifdef XZ_DEC_IA64
590+static size_t INIT bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
591+{
592+ static const uint8_t branch_table[32] = {
593+ 0, 0, 0, 0, 0, 0, 0, 0,
594+ 0, 0, 0, 0, 0, 0, 0, 0,
595+ 4, 4, 6, 6, 0, 0, 7, 7,
596+ 4, 4, 0, 0, 4, 4, 0, 0
597+ };
598+
599+ /*
600+ * The local variables take a little bit stack space, but it's less
601+ * than what LZMA2 decoder takes, so it doesn't make sense to reduce
602+ * stack usage here without doing that for the LZMA2 decoder too.
603+ */
604+
605+ /* Loop counters */
606+ size_t i;
607+ size_t j;
608+
609+ /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
610+ uint32_t slot;
611+
612+ /* Bitwise offset of the instruction indicated by slot */
613+ uint32_t bit_pos;
614+
615+ /* bit_pos split into byte and bit parts */
616+ uint32_t byte_pos;
617+ uint32_t bit_res;
618+
619+ /* Address part of an instruction */
620+ uint32_t addr;
621+
622+ /* Mask used to detect which instructions to convert */
623+ uint32_t mask;
624+
625+ /* 41-bit instruction stored somewhere in the lowest 48 bits */
626+ uint64_t instr;
627+
628+ /* Instruction normalized with bit_res for easier manipulation */
629+ uint64_t norm;
630+
631+ for (i = 0; i + 16 <= size; i += 16) {
632+ mask = branch_table[buf[i] & 0x1F];
633+ for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) {
634+ if (((mask >> slot) & 1) == 0)
635+ continue;
636+
637+ byte_pos = bit_pos >> 3;
638+ bit_res = bit_pos & 7;
639+ instr = 0;
640+ for (j = 0; j < 6; ++j)
641+ instr |= (uint64_t)(buf[i + j + byte_pos])
642+ << (8 * j);
643+
644+ norm = instr >> bit_res;
645+
646+ if (((norm >> 37) & 0x0F) == 0x05
647+ && ((norm >> 9) & 0x07) == 0) {
648+ addr = (norm >> 13) & 0x0FFFFF;
649+ addr |= ((uint32_t)(norm >> 36) & 1) << 20;
650+ addr <<= 4;
651+ addr -= s->pos + (uint32_t)i;
652+ addr >>= 4;
653+
654+ norm &= ~((uint64_t)0x8FFFFF << 13);
655+ norm |= (uint64_t)(addr & 0x0FFFFF) << 13;
656+ norm |= (uint64_t)(addr & 0x100000)
657+ << (36 - 20);
658+
659+ instr &= (1 << bit_res) - 1;
660+ instr |= norm << bit_res;
661+
662+ for (j = 0; j < 6; j++)
663+ buf[i + j + byte_pos]
664+ = (uint8_t)(instr >> (8 * j));
665+ }
666+ }
667+ }
668+
669+ return i;
670+}
671+#endif
672+
673+#ifdef XZ_DEC_ARM
674+static size_t INIT bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
675+{
676+ size_t i;
677+ uint32_t addr;
678+
679+ for (i = 0; i + 4 <= size; i += 4) {
680+ if (buf[i + 3] == 0xEB) {
681+ addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8)
682+ | ((uint32_t)buf[i + 2] << 16);
683+ addr <<= 2;
684+ addr -= s->pos + (uint32_t)i + 8;
685+ addr >>= 2;
686+ buf[i] = (uint8_t)addr;
687+ buf[i + 1] = (uint8_t)(addr >> 8);
688+ buf[i + 2] = (uint8_t)(addr >> 16);
689+ }
690+ }
691+
692+ return i;
693+}
694+#endif
695+
696+#ifdef XZ_DEC_ARMTHUMB
697+static size_t INIT bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
698+{
699+ size_t i;
700+ uint32_t addr;
701+
702+ for (i = 0; i + 4 <= size; i += 2) {
703+ if ((buf[i + 1] & 0xF8) == 0xF0
704+ && (buf[i + 3] & 0xF8) == 0xF8) {
705+ addr = (((uint32_t)buf[i + 1] & 0x07) << 19)
706+ | ((uint32_t)buf[i] << 11)
707+ | (((uint32_t)buf[i + 3] & 0x07) << 8)
708+ | (uint32_t)buf[i + 2];
709+ addr <<= 1;
710+ addr -= s->pos + (uint32_t)i + 4;
711+ addr >>= 1;
712+ buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07));
713+ buf[i] = (uint8_t)(addr >> 11);
714+ buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07));
715+ buf[i + 2] = (uint8_t)addr;
716+ i += 2;
717+ }
718+ }
719+
720+ return i;
721+}
722+#endif
723+
724+#ifdef XZ_DEC_SPARC
725+static size_t INIT bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size)
726+{
727+ size_t i;
728+ uint32_t instr;
729+
730+ for (i = 0; i + 4 <= size; i += 4) {
731+ instr = get_unaligned_be32(buf + i);
732+ if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) {
733+ instr <<= 2;
734+ instr -= s->pos + (uint32_t)i;
735+ instr >>= 2;
736+ instr = ((uint32_t)0x40000000 - (instr & 0x400000))
737+ | 0x40000000 | (instr & 0x3FFFFF);
738+ put_unaligned_be32(instr, buf + i);
739+ }
740+ }
741+
742+ return i;
743+}
744+#endif
745+
746+/*
747+ * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
748+ * of data that got filtered.
749+ *
750+ * NOTE: This is implemented as a switch statement to avoid using function
751+ * pointers, which could be problematic in the kernel boot code, which must
752+ * avoid pointers to static data (at least on x86).
753+ */
754+static void INIT bcj_apply(struct xz_dec_bcj *s,
755+ uint8_t *buf, size_t *pos, size_t size)
756+{
757+ size_t filtered;
758+
759+ buf += *pos;
760+ size -= *pos;
761+
762+ switch (s->type) {
763+#ifdef XZ_DEC_X86
764+ case BCJ_X86:
765+ filtered = bcj_x86(s, buf, size);
766+ break;
767+#endif
768+#ifdef XZ_DEC_POWERPC
769+ case BCJ_POWERPC:
770+ filtered = bcj_powerpc(s, buf, size);
771+ break;
772+#endif
773+#ifdef XZ_DEC_IA64
774+ case BCJ_IA64:
775+ filtered = bcj_ia64(s, buf, size);
776+ break;
777+#endif
778+#ifdef XZ_DEC_ARM
779+ case BCJ_ARM:
780+ filtered = bcj_arm(s, buf, size);
781+ break;
782+#endif
783+#ifdef XZ_DEC_ARMTHUMB
784+ case BCJ_ARMTHUMB:
785+ filtered = bcj_armthumb(s, buf, size);
786+ break;
787+#endif
788+#ifdef XZ_DEC_SPARC
789+ case BCJ_SPARC:
790+ filtered = bcj_sparc(s, buf, size);
791+ break;
792+#endif
793+ default:
794+ /* Never reached but silence compiler warnings. */
795+ filtered = 0;
796+ break;
797+ }
798+
799+ *pos += filtered;
800+ s->pos += filtered;
801+}
802+
803+/*
804+ * Flush pending filtered data from temp to the output buffer.
805+ * Move the remaining mixture of possibly filtered and unfiltered
806+ * data to the beginning of temp.
807+ */
808+static void INIT bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b)
809+{
810+ size_t copy_size;
811+
812+ copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos);
813+ memcpy(b->out + b->out_pos, s->temp.buf, copy_size);
814+ b->out_pos += copy_size;
815+
816+ s->temp.filtered -= copy_size;
817+ s->temp.size -= copy_size;
818+ memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size);
819+}
820+
821+/*
822+ * The BCJ filter functions are primitive in sense that they process the
823+ * data in chunks of 1-16 bytes. To hide this issue, this function does
824+ * some buffering.
825+ */
826+XZ_EXTERN enum xz_ret INIT xz_dec_bcj_run(struct xz_dec_bcj *s,
827+ struct xz_dec_lzma2 *lzma2,
828+ struct xz_buf *b)
829+{
830+ size_t out_start;
831+
832+ /*
833+ * Flush pending already filtered data to the output buffer. Return
834+ * immediatelly if we couldn't flush everything, or if the next
835+ * filter in the chain had already returned XZ_STREAM_END.
836+ */
837+ if (s->temp.filtered > 0) {
838+ bcj_flush(s, b);
839+ if (s->temp.filtered > 0)
840+ return XZ_OK;
841+
842+ if (s->ret == XZ_STREAM_END)
843+ return XZ_STREAM_END;
844+ }
845+
846+ /*
847+ * If we have more output space than what is currently pending in
848+ * temp, copy the unfiltered data from temp to the output buffer
849+ * and try to fill the output buffer by decoding more data from the
850+ * next filter in the chain. Apply the BCJ filter on the new data
851+ * in the output buffer. If everything cannot be filtered, copy it
852+ * to temp and rewind the output buffer position accordingly.
1545f2de
JR
853+ *
854+ * This needs to be always run when temp.size == 0 to handle a special
855+ * case where the output buffer is full and the next filter has no
856+ * more output coming but hasn't returned XZ_STREAM_END yet.
53ff2d70 857+ */
1545f2de 858+ if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) {
53ff2d70
AM
859+ out_start = b->out_pos;
860+ memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size);
861+ b->out_pos += s->temp.size;
862+
863+ s->ret = xz_dec_lzma2_run(lzma2, b);
864+ if (s->ret != XZ_STREAM_END
865+ && (s->ret != XZ_OK || s->single_call))
866+ return s->ret;
867+
868+ bcj_apply(s, b->out, &out_start, b->out_pos);
869+
870+ /*
871+ * As an exception, if the next filter returned XZ_STREAM_END,
872+ * we can do that too, since the last few bytes that remain
873+ * unfiltered are meant to remain unfiltered.
874+ */
875+ if (s->ret == XZ_STREAM_END)
876+ return XZ_STREAM_END;
877+
878+ s->temp.size = b->out_pos - out_start;
879+ b->out_pos -= s->temp.size;
880+ memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size);
1545f2de
JR
881+
882+ /*
883+ * If there wasn't enough input to the next filter to fill
884+ * the output buffer with unfiltered data, there's no point
885+ * to try decoding more data to temp.
886+ */
887+ if (b->out_pos + s->temp.size < b->out_size)
888+ return XZ_OK;
53ff2d70
AM
889+ }
890+
891+ /*
1545f2de
JR
892+ * We have unfiltered data in temp. If the output buffer isn't full
893+ * yet, try to fill the temp buffer by decoding more data from the
894+ * next filter. Apply the BCJ filter on temp. Then we hopefully can
895+ * fill the actual output buffer by copying filtered data from temp.
896+ * A mix of filtered and unfiltered data may be left in temp; it will
897+ * be taken care on the next call to this function.
53ff2d70 898+ */
1545f2de 899+ if (b->out_pos < b->out_size) {
53ff2d70
AM
900+ /* Make b->out{,_pos,_size} temporarily point to s->temp. */
901+ s->out = b->out;
902+ s->out_pos = b->out_pos;
903+ s->out_size = b->out_size;
904+ b->out = s->temp.buf;
905+ b->out_pos = s->temp.size;
906+ b->out_size = sizeof(s->temp.buf);
907+
908+ s->ret = xz_dec_lzma2_run(lzma2, b);
909+
910+ s->temp.size = b->out_pos;
911+ b->out = s->out;
912+ b->out_pos = s->out_pos;
913+ b->out_size = s->out_size;
914+
915+ if (s->ret != XZ_OK && s->ret != XZ_STREAM_END)
916+ return s->ret;
917+
918+ bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size);
919+
920+ /*
921+ * If the next filter returned XZ_STREAM_END, we mark that
922+ * everything is filtered, since the last unfiltered bytes
923+ * of the stream are meant to be left as is.
924+ */
925+ if (s->ret == XZ_STREAM_END)
926+ s->temp.filtered = s->temp.size;
927+
928+ bcj_flush(s, b);
929+ if (s->temp.filtered > 0)
930+ return XZ_OK;
931+ }
932+
933+ return s->ret;
934+}
935+
936+XZ_EXTERN struct xz_dec_bcj *INIT xz_dec_bcj_create(bool_t single_call)
937+{
938+ struct xz_dec_bcj *s = malloc(sizeof(*s));
939+ if (s != NULL)
940+ s->single_call = single_call;
941+
942+ return s;
943+}
944+
945+XZ_EXTERN enum xz_ret INIT xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id)
946+{
947+ switch (id) {
948+#ifdef XZ_DEC_X86
949+ case BCJ_X86:
950+#endif
951+#ifdef XZ_DEC_POWERPC
952+ case BCJ_POWERPC:
953+#endif
954+#ifdef XZ_DEC_IA64
955+ case BCJ_IA64:
956+#endif
957+#ifdef XZ_DEC_ARM
958+ case BCJ_ARM:
959+#endif
960+#ifdef XZ_DEC_ARMTHUMB
961+ case BCJ_ARMTHUMB:
962+#endif
963+#ifdef XZ_DEC_SPARC
964+ case BCJ_SPARC:
965+#endif
966+ break;
967+
968+ default:
969+ /* Unsupported Filter ID */
970+ return XZ_OPTIONS_ERROR;
971+ }
972+
973+ s->type = id;
974+ s->ret = XZ_OK;
975+ s->pos = 0;
976+ s->x86_prev_mask = 0;
977+ s->temp.filtered = 0;
978+ s->temp.size = 0;
979+
980+ return XZ_OK;
981+}
982+
983+#endif
984diff --git a/xen/common/xz/dec_lzma2.c b/xen/common/xz/dec_lzma2.c
985new file mode 100644
986--- /dev/null
987+++ b/xen/common/xz/dec_lzma2.c
988@@ -0,0 +1,1171 @@
989+/*
990+ * LZMA2 decoder
991+ *
992+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
993+ * Igor Pavlov <http://7-zip.org/>
994+ *
995+ * This file has been put into the public domain.
996+ * You can do whatever you want with this file.
997+ */
998+
999+#include "private.h"
1000+#include "lzma2.h"
1001+
1002+/*
1003+ * Range decoder initialization eats the first five bytes of each LZMA chunk.
1004+ */
1005+#define RC_INIT_BYTES 5
1006+
1007+/*
1008+ * Minimum number of usable input buffer to safely decode one LZMA symbol.
1009+ * The worst case is that we decode 22 bits using probabilities and 26
1010+ * direct bits. This may decode at maximum of 20 bytes of input. However,
1011+ * lzma_main() does an extra normalization before returning, thus we
1012+ * need to put 21 here.
1013+ */
1014+#define LZMA_IN_REQUIRED 21
1015+
1016+/*
1017+ * Dictionary (history buffer)
1018+ *
1019+ * These are always true:
1020+ * start <= pos <= full <= end
1021+ * pos <= limit <= end
1022+ *
1023+ * In multi-call mode, also these are true:
1024+ * end == size
1025+ * size <= size_max
1026+ * allocated <= size
1027+ *
1028+ * Most of these variables are size_t to support single-call mode,
1029+ * in which the dictionary variables address the actual output
1030+ * buffer directly.
1031+ */
1032+struct dictionary {
1033+ /* Beginning of the history buffer */
1034+ uint8_t *buf;
1035+
1036+ /* Old position in buf (before decoding more data) */
1037+ size_t start;
1038+
1039+ /* Position in buf */
1040+ size_t pos;
1041+
1042+ /*
1043+ * How full dictionary is. This is used to detect corrupt input that
1044+ * would read beyond the beginning of the uncompressed stream.
1045+ */
1046+ size_t full;
1047+
1048+ /* Write limit; we don't write to buf[limit] or later bytes. */
1049+ size_t limit;
1050+
1051+ /*
1052+ * End of the dictionary buffer. In multi-call mode, this is
1053+ * the same as the dictionary size. In single-call mode, this
1054+ * indicates the size of the output buffer.
1055+ */
1056+ size_t end;
1057+
1058+ /*
1059+ * Size of the dictionary as specified in Block Header. This is used
1060+ * together with "full" to detect corrupt input that would make us
1061+ * read beyond the beginning of the uncompressed stream.
1062+ */
1063+ uint32_t size;
1064+
1065+ /*
1066+ * Maximum allowed dictionary size in multi-call mode.
1067+ * This is ignored in single-call mode.
1068+ */
1069+ uint32_t size_max;
1070+
1071+ /*
1072+ * Amount of memory currently allocated for the dictionary.
1073+ * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC,
1074+ * size_max is always the same as the allocated size.)
1075+ */
1076+ uint32_t allocated;
1077+
1078+ /* Operation mode */
1079+ enum xz_mode mode;
1080+};
1081+
1082+/* Range decoder */
1083+struct rc_dec {
1084+ uint32_t range;
1085+ uint32_t code;
1086+
1087+ /*
1088+ * Number of initializing bytes remaining to be read
1089+ * by rc_read_init().
1090+ */
1091+ uint32_t init_bytes_left;
1092+
1093+ /*
1094+ * Buffer from which we read our input. It can be either
1095+ * temp.buf or the caller-provided input buffer.
1096+ */
1097+ const uint8_t *in;
1098+ size_t in_pos;
1099+ size_t in_limit;
1100+};
1101+
1102+/* Probabilities for a length decoder. */
1103+struct lzma_len_dec {
1104+ /* Probability of match length being at least 10 */
1105+ uint16_t choice;
1106+
1107+ /* Probability of match length being at least 18 */
1108+ uint16_t choice2;
1109+
1110+ /* Probabilities for match lengths 2-9 */
1111+ uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS];
1112+
1113+ /* Probabilities for match lengths 10-17 */
1114+ uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS];
1115+
1116+ /* Probabilities for match lengths 18-273 */
1117+ uint16_t high[LEN_HIGH_SYMBOLS];
1118+};
1119+
1120+struct lzma_dec {
1121+ /* Distances of latest four matches */
1122+ uint32_t rep0;
1123+ uint32_t rep1;
1124+ uint32_t rep2;
1125+ uint32_t rep3;
1126+
1127+ /* Types of the most recently seen LZMA symbols */
1128+ enum lzma_state state;
1129+
1130+ /*
1131+ * Length of a match. This is updated so that dict_repeat can
1132+ * be called again to finish repeating the whole match.
1133+ */
1134+ uint32_t len;
1135+
1136+ /*
1137+ * LZMA properties or related bit masks (number of literal
1138+ * context bits, a mask dervied from the number of literal
1139+ * position bits, and a mask dervied from the number
1140+ * position bits)
1141+ */
1142+ uint32_t lc;
1143+ uint32_t literal_pos_mask; /* (1 << lp) - 1 */
1144+ uint32_t pos_mask; /* (1 << pb) - 1 */
1145+
1146+ /* If 1, it's a match. Otherwise it's a single 8-bit literal. */
1147+ uint16_t is_match[STATES][POS_STATES_MAX];
1148+
1149+ /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */
1150+ uint16_t is_rep[STATES];
1151+
1152+ /*
1153+ * If 0, distance of a repeated match is rep0.
1154+ * Otherwise check is_rep1.
1155+ */
1156+ uint16_t is_rep0[STATES];
1157+
1158+ /*
1159+ * If 0, distance of a repeated match is rep1.
1160+ * Otherwise check is_rep2.
1161+ */
1162+ uint16_t is_rep1[STATES];
1163+
1164+ /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */
1165+ uint16_t is_rep2[STATES];
1166+
1167+ /*
1168+ * If 1, the repeated match has length of one byte. Otherwise
1169+ * the length is decoded from rep_len_decoder.
1170+ */
1171+ uint16_t is_rep0_long[STATES][POS_STATES_MAX];
1172+
1173+ /*
1174+ * Probability tree for the highest two bits of the match
1175+ * distance. There is a separate probability tree for match
1176+ * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273].
1177+ */
1178+ uint16_t dist_slot[DIST_STATES][DIST_SLOTS];
1179+
1180+ /*
1181+ * Probility trees for additional bits for match distance
1182+ * when the distance is in the range [4, 127].
1183+ */
1184+ uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END];
1185+
1186+ /*
1187+ * Probability tree for the lowest four bits of a match
1188+ * distance that is equal to or greater than 128.
1189+ */
1190+ uint16_t dist_align[ALIGN_SIZE];
1191+
1192+ /* Length of a normal match */
1193+ struct lzma_len_dec match_len_dec;
1194+
1195+ /* Length of a repeated match */
1196+ struct lzma_len_dec rep_len_dec;
1197+
1198+ /* Probabilities of literals */
1199+ uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
1200+};
1201+
1202+struct lzma2_dec {
1203+ /* Position in xz_dec_lzma2_run(). */
1204+ enum lzma2_seq {
1205+ SEQ_CONTROL,
1206+ SEQ_UNCOMPRESSED_1,
1207+ SEQ_UNCOMPRESSED_2,
1208+ SEQ_COMPRESSED_0,
1209+ SEQ_COMPRESSED_1,
1210+ SEQ_PROPERTIES,
1211+ SEQ_LZMA_PREPARE,
1212+ SEQ_LZMA_RUN,
1213+ SEQ_COPY
1214+ } sequence;
1215+
1216+ /* Next position after decoding the compressed size of the chunk. */
1217+ enum lzma2_seq next_sequence;
1218+
1219+ /* Uncompressed size of LZMA chunk (2 MiB at maximum) */
1220+ uint32_t uncompressed;
1221+
1222+ /*
1223+ * Compressed size of LZMA chunk or compressed/uncompressed
1224+ * size of uncompressed chunk (64 KiB at maximum)
1225+ */
1226+ uint32_t compressed;
1227+
1228+ /*
1229+ * True if dictionary reset is needed. This is false before
1230+ * the first chunk (LZMA or uncompressed).
1231+ */
1232+ bool_t need_dict_reset;
1233+
1234+ /*
1235+ * True if new LZMA properties are needed. This is false
1236+ * before the first LZMA chunk.
1237+ */
1238+ bool_t need_props;
1239+};
1240+
1241+struct xz_dec_lzma2 {
1242+ /*
1243+ * The order below is important on x86 to reduce code size and
1244+ * it shouldn't hurt on other platforms. Everything up to and
1245+ * including lzma.pos_mask are in the first 128 bytes on x86-32,
1246+ * which allows using smaller instructions to access those
1247+ * variables. On x86-64, fewer variables fit into the first 128
1248+ * bytes, but this is still the best order without sacrificing
1249+ * the readability by splitting the structures.
1250+ */
1251+ struct rc_dec rc;
1252+ struct dictionary dict;
1253+ struct lzma2_dec lzma2;
1254+ struct lzma_dec lzma;
1255+
1256+ /*
1257+ * Temporary buffer which holds small number of input bytes between
1258+ * decoder calls. See lzma2_lzma() for details.
1259+ */
1260+ struct {
1261+ uint32_t size;
1262+ uint8_t buf[3 * LZMA_IN_REQUIRED];
1263+ } temp;
1264+};
1265+
1266+/**************
1267+ * Dictionary *
1268+ **************/
1269+
1270+/*
1271+ * Reset the dictionary state. When in single-call mode, set up the beginning
1272+ * of the dictionary to point to the actual output buffer.
1273+ */
1274+static void INIT dict_reset(struct dictionary *dict, struct xz_buf *b)
1275+{
1276+ if (DEC_IS_SINGLE(dict->mode)) {
1277+ dict->buf = b->out + b->out_pos;
1278+ dict->end = b->out_size - b->out_pos;
1279+ }
1280+
1281+ dict->start = 0;
1282+ dict->pos = 0;
1283+ dict->limit = 0;
1284+ dict->full = 0;
1285+}
1286+
1287+/* Set dictionary write limit */
1288+static void INIT dict_limit(struct dictionary *dict, size_t out_max)
1289+{
1290+ if (dict->end - dict->pos <= out_max)
1291+ dict->limit = dict->end;
1292+ else
1293+ dict->limit = dict->pos + out_max;
1294+}
1295+
1296+/* Return true if at least one byte can be written into the dictionary. */
1297+static inline bool_t INIT dict_has_space(const struct dictionary *dict)
1298+{
1299+ return dict->pos < dict->limit;
1300+}
1301+
1302+/*
1303+ * Get a byte from the dictionary at the given distance. The distance is
1304+ * assumed to valid, or as a special case, zero when the dictionary is
1305+ * still empty. This special case is needed for single-call decoding to
1306+ * avoid writing a '\0' to the end of the destination buffer.
1307+ */
1308+static inline uint32_t INIT dict_get(const struct dictionary *dict, uint32_t dist)
1309+{
1310+ size_t offset = dict->pos - dist - 1;
1311+
1312+ if (dist >= dict->pos)
1313+ offset += dict->end;
1314+
1315+ return dict->full > 0 ? dict->buf[offset] : 0;
1316+}
1317+
1318+/*
1319+ * Put one byte into the dictionary. It is assumed that there is space for it.
1320+ */
1321+static inline void INIT dict_put(struct dictionary *dict, uint8_t byte)
1322+{
1323+ dict->buf[dict->pos++] = byte;
1324+
1325+ if (dict->full < dict->pos)
1326+ dict->full = dict->pos;
1327+}
1328+
1329+/*
1330+ * Repeat given number of bytes from the given distance. If the distance is
1331+ * invalid, false is returned. On success, true is returned and *len is
1332+ * updated to indicate how many bytes were left to be repeated.
1333+ */
1334+static bool_t INIT dict_repeat(struct dictionary *dict, uint32_t *len, uint32_t dist)
1335+{
1336+ size_t back;
1337+ uint32_t left;
1338+
1339+ if (dist >= dict->full || dist >= dict->size)
1340+ return false;
1341+
1342+ left = min_t(size_t, dict->limit - dict->pos, *len);
1343+ *len -= left;
1344+
1345+ back = dict->pos - dist - 1;
1346+ if (dist >= dict->pos)
1347+ back += dict->end;
1348+
1349+ do {
1350+ dict->buf[dict->pos++] = dict->buf[back++];
1351+ if (back == dict->end)
1352+ back = 0;
1353+ } while (--left > 0);
1354+
1355+ if (dict->full < dict->pos)
1356+ dict->full = dict->pos;
1357+
1358+ return true;
1359+}
1360+
1361+/* Copy uncompressed data as is from input to dictionary and output buffers. */
1362+static void INIT dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
1363+ uint32_t *left)
1364+{
1365+ size_t copy_size;
1366+
1367+ while (*left > 0 && b->in_pos < b->in_size
1368+ && b->out_pos < b->out_size) {
1369+ copy_size = min(b->in_size - b->in_pos,
1370+ b->out_size - b->out_pos);
1371+ if (copy_size > dict->end - dict->pos)
1372+ copy_size = dict->end - dict->pos;
1373+ if (copy_size > *left)
1374+ copy_size = *left;
1375+
1376+ *left -= copy_size;
1377+
1378+ memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size);
1379+ dict->pos += copy_size;
1380+
1381+ if (dict->full < dict->pos)
1382+ dict->full = dict->pos;
1383+
1384+ if (DEC_IS_MULTI(dict->mode)) {
1385+ if (dict->pos == dict->end)
1386+ dict->pos = 0;
1387+
1388+ memcpy(b->out + b->out_pos, b->in + b->in_pos,
1389+ copy_size);
1390+ }
1391+
1392+ dict->start = dict->pos;
1393+
1394+ b->out_pos += copy_size;
1395+ b->in_pos += copy_size;
1396+ }
1397+}
1398+
1399+/*
1400+ * Flush pending data from dictionary to b->out. It is assumed that there is
1401+ * enough space in b->out. This is guaranteed because caller uses dict_limit()
1402+ * before decoding data into the dictionary.
1403+ */
1404+static uint32_t INIT dict_flush(struct dictionary *dict, struct xz_buf *b)
1405+{
1406+ size_t copy_size = dict->pos - dict->start;
1407+
1408+ if (DEC_IS_MULTI(dict->mode)) {
1409+ if (dict->pos == dict->end)
1410+ dict->pos = 0;
1411+
1412+ memcpy(b->out + b->out_pos, dict->buf + dict->start,
1413+ copy_size);
1414+ }
1415+
1416+ dict->start = dict->pos;
1417+ b->out_pos += copy_size;
1418+ return copy_size;
1419+}
1420+
1421+/*****************
1422+ * Range decoder *
1423+ *****************/
1424+
1425+/* Reset the range decoder. */
1426+static void INIT rc_reset(struct rc_dec *rc)
1427+{
1428+ rc->range = (uint32_t)-1;
1429+ rc->code = 0;
1430+ rc->init_bytes_left = RC_INIT_BYTES;
1431+}
1432+
1433+/*
1434+ * Read the first five initial bytes into rc->code if they haven't been
1435+ * read already. (Yes, the first byte gets completely ignored.)
1436+ */
1437+static bool_t INIT rc_read_init(struct rc_dec *rc, struct xz_buf *b)
1438+{
1439+ while (rc->init_bytes_left > 0) {
1440+ if (b->in_pos == b->in_size)
1441+ return false;
1442+
1443+ rc->code = (rc->code << 8) + b->in[b->in_pos++];
1444+ --rc->init_bytes_left;
1445+ }
1446+
1447+ return true;
1448+}
1449+
1450+/* Return true if there may not be enough input for the next decoding loop. */
1451+static inline bool_t INIT rc_limit_exceeded(const struct rc_dec *rc)
1452+{
1453+ return rc->in_pos > rc->in_limit;
1454+}
1455+
1456+/*
1457+ * Return true if it is possible (from point of view of range decoder) that
1458+ * we have reached the end of the LZMA chunk.
1459+ */
1460+static inline bool_t INIT rc_is_finished(const struct rc_dec *rc)
1461+{
1462+ return rc->code == 0;
1463+}
1464+
1465+/* Read the next input byte if needed. */
1466+static always_inline void rc_normalize(struct rc_dec *rc)
1467+{
1468+ if (rc->range < RC_TOP_VALUE) {
1469+ rc->range <<= RC_SHIFT_BITS;
1470+ rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++];
1471+ }
1472+}
1473+
1474+/*
1475+ * Decode one bit. In some versions, this function has been splitted in three
1476+ * functions so that the compiler is supposed to be able to more easily avoid
1477+ * an extra branch. In this particular version of the LZMA decoder, this
1478+ * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3
1479+ * on x86). Using a non-splitted version results in nicer looking code too.
1480+ *
1481+ * NOTE: This must return an int. Do not make it return a bool or the speed
1482+ * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care,
1483+ * and it generates 10-20 % faster code than GCC 3.x from this file anyway.)
1484+ */
1485+static always_inline int rc_bit(struct rc_dec *rc, uint16_t *prob)
1486+{
1487+ uint32_t bound;
1488+ int bit;
1489+
1490+ rc_normalize(rc);
1491+ bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob;
1492+ if (rc->code < bound) {
1493+ rc->range = bound;
1494+ *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS;
1495+ bit = 0;
1496+ } else {
1497+ rc->range -= bound;
1498+ rc->code -= bound;
1499+ *prob -= *prob >> RC_MOVE_BITS;
1500+ bit = 1;
1501+ }
1502+
1503+ return bit;
1504+}
1505+
1506+/* Decode a bittree starting from the most significant bit. */
1507+static always_inline uint32_t rc_bittree(struct rc_dec *rc,
1508+ uint16_t *probs, uint32_t limit)
1509+{
1510+ uint32_t symbol = 1;
1511+
1512+ do {
1513+ if (rc_bit(rc, &probs[symbol]))
1514+ symbol = (symbol << 1) + 1;
1515+ else
1516+ symbol <<= 1;
1517+ } while (symbol < limit);
1518+
1519+ return symbol;
1520+}
1521+
1522+/* Decode a bittree starting from the least significant bit. */
1523+static always_inline void rc_bittree_reverse(struct rc_dec *rc,
1524+ uint16_t *probs,
1525+ uint32_t *dest, uint32_t limit)
1526+{
1527+ uint32_t symbol = 1;
1528+ uint32_t i = 0;
1529+
1530+ do {
1531+ if (rc_bit(rc, &probs[symbol])) {
1532+ symbol = (symbol << 1) + 1;
1533+ *dest += 1 << i;
1534+ } else {
1535+ symbol <<= 1;
1536+ }
1537+ } while (++i < limit);
1538+}
1539+
1540+/* Decode direct bits (fixed fifty-fifty probability) */
1541+static inline void INIT rc_direct(struct rc_dec *rc, uint32_t *dest, uint32_t limit)
1542+{
1543+ uint32_t mask;
1544+
1545+ do {
1546+ rc_normalize(rc);
1547+ rc->range >>= 1;
1548+ rc->code -= rc->range;
1549+ mask = (uint32_t)0 - (rc->code >> 31);
1550+ rc->code += rc->range & mask;
1551+ *dest = (*dest << 1) + (mask + 1);
1552+ } while (--limit > 0);
1553+}
1554+
1555+/********
1556+ * LZMA *
1557+ ********/
1558+
1559+/* Get pointer to literal coder probability array. */
1560+static uint16_t *INIT lzma_literal_probs(struct xz_dec_lzma2 *s)
1561+{
1562+ uint32_t prev_byte = dict_get(&s->dict, 0);
1563+ uint32_t low = prev_byte >> (8 - s->lzma.lc);
1564+ uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc;
1565+ return s->lzma.literal[low + high];
1566+}
1567+
1568+/* Decode a literal (one 8-bit byte) */
1569+static void INIT lzma_literal(struct xz_dec_lzma2 *s)
1570+{
1571+ uint16_t *probs;
1572+ uint32_t symbol;
1573+ uint32_t match_byte;
1574+ uint32_t match_bit;
1575+ uint32_t offset;
1576+ uint32_t i;
1577+
1578+ probs = lzma_literal_probs(s);
1579+
1580+ if (lzma_state_is_literal(s->lzma.state)) {
1581+ symbol = rc_bittree(&s->rc, probs, 0x100);
1582+ } else {
1583+ symbol = 1;
1584+ match_byte = dict_get(&s->dict, s->lzma.rep0) << 1;
1585+ offset = 0x100;
1586+
1587+ do {
1588+ match_bit = match_byte & offset;
1589+ match_byte <<= 1;
1590+ i = offset + match_bit + symbol;
1591+
1592+ if (rc_bit(&s->rc, &probs[i])) {
1593+ symbol = (symbol << 1) + 1;
1594+ offset &= match_bit;
1595+ } else {
1596+ symbol <<= 1;
1597+ offset &= ~match_bit;
1598+ }
1599+ } while (symbol < 0x100);
1600+ }
1601+
1602+ dict_put(&s->dict, (uint8_t)symbol);
1603+ lzma_state_literal(&s->lzma.state);
1604+}
1605+
1606+/* Decode the length of the match into s->lzma.len. */
1607+static void INIT lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l,
1608+ uint32_t pos_state)
1609+{
1610+ uint16_t *probs;
1611+ uint32_t limit;
1612+
1613+ if (!rc_bit(&s->rc, &l->choice)) {
1614+ probs = l->low[pos_state];
1615+ limit = LEN_LOW_SYMBOLS;
1616+ s->lzma.len = MATCH_LEN_MIN;
1617+ } else {
1618+ if (!rc_bit(&s->rc, &l->choice2)) {
1619+ probs = l->mid[pos_state];
1620+ limit = LEN_MID_SYMBOLS;
1621+ s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS;
1622+ } else {
1623+ probs = l->high;
1624+ limit = LEN_HIGH_SYMBOLS;
1625+ s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS
1626+ + LEN_MID_SYMBOLS;
1627+ }
1628+ }
1629+
1630+ s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit;
1631+}
1632+
1633+/* Decode a match. The distance will be stored in s->lzma.rep0. */
1634+static void INIT lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
1635+{
1636+ uint16_t *probs;
1637+ uint32_t dist_slot;
1638+ uint32_t limit;
1639+
1640+ lzma_state_match(&s->lzma.state);
1641+
1642+ s->lzma.rep3 = s->lzma.rep2;
1643+ s->lzma.rep2 = s->lzma.rep1;
1644+ s->lzma.rep1 = s->lzma.rep0;
1645+
1646+ lzma_len(s, &s->lzma.match_len_dec, pos_state);
1647+
1648+ probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)];
1649+ dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS;
1650+
1651+ if (dist_slot < DIST_MODEL_START) {
1652+ s->lzma.rep0 = dist_slot;
1653+ } else {
1654+ limit = (dist_slot >> 1) - 1;
1655+ s->lzma.rep0 = 2 + (dist_slot & 1);
1656+
1657+ if (dist_slot < DIST_MODEL_END) {
1658+ s->lzma.rep0 <<= limit;
1659+ probs = s->lzma.dist_special + s->lzma.rep0
1660+ - dist_slot - 1;
1661+ rc_bittree_reverse(&s->rc, probs,
1662+ &s->lzma.rep0, limit);
1663+ } else {
1664+ rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS);
1665+ s->lzma.rep0 <<= ALIGN_BITS;
1666+ rc_bittree_reverse(&s->rc, s->lzma.dist_align,
1667+ &s->lzma.rep0, ALIGN_BITS);
1668+ }
1669+ }
1670+}
1671+
1672+/*
1673+ * Decode a repeated match. The distance is one of the four most recently
1674+ * seen matches. The distance will be stored in s->lzma.rep0.
1675+ */
1676+static void INIT lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state)
1677+{
1678+ uint32_t tmp;
1679+
1680+ if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) {
1681+ if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[
1682+ s->lzma.state][pos_state])) {
1683+ lzma_state_short_rep(&s->lzma.state);
1684+ s->lzma.len = 1;
1685+ return;
1686+ }
1687+ } else {
1688+ if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) {
1689+ tmp = s->lzma.rep1;
1690+ } else {
1691+ if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) {
1692+ tmp = s->lzma.rep2;
1693+ } else {
1694+ tmp = s->lzma.rep3;
1695+ s->lzma.rep3 = s->lzma.rep2;
1696+ }
1697+
1698+ s->lzma.rep2 = s->lzma.rep1;
1699+ }
1700+
1701+ s->lzma.rep1 = s->lzma.rep0;
1702+ s->lzma.rep0 = tmp;
1703+ }
1704+
1705+ lzma_state_long_rep(&s->lzma.state);
1706+ lzma_len(s, &s->lzma.rep_len_dec, pos_state);
1707+}
1708+
1709+/* LZMA decoder core */
1710+static bool_t INIT lzma_main(struct xz_dec_lzma2 *s)
1711+{
1712+ uint32_t pos_state;
1713+
1714+ /*
1715+ * If the dictionary was reached during the previous call, try to
1716+ * finish the possibly pending repeat in the dictionary.
1717+ */
1718+ if (dict_has_space(&s->dict) && s->lzma.len > 0)
1719+ dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0);
1720+
1721+ /*
1722+ * Decode more LZMA symbols. One iteration may consume up to
1723+ * LZMA_IN_REQUIRED - 1 bytes.
1724+ */
1725+ while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) {
1726+ pos_state = s->dict.pos & s->lzma.pos_mask;
1727+
1728+ if (!rc_bit(&s->rc, &s->lzma.is_match[
1729+ s->lzma.state][pos_state])) {
1730+ lzma_literal(s);
1731+ } else {
1732+ if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state]))
1733+ lzma_rep_match(s, pos_state);
1734+ else
1735+ lzma_match(s, pos_state);
1736+
1737+ if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0))
1738+ return false;
1739+ }
1740+ }
1741+
1742+ /*
1743+ * Having the range decoder always normalized when we are outside
1744+ * this function makes it easier to correctly handle end of the chunk.
1745+ */
1746+ rc_normalize(&s->rc);
1747+
1748+ return true;
1749+}
1750+
1751+/*
1752+ * Reset the LZMA decoder and range decoder state. Dictionary is nore reset
1753+ * here, because LZMA state may be reset without resetting the dictionary.
1754+ */
1755+static void INIT lzma_reset(struct xz_dec_lzma2 *s)
1756+{
1757+ uint16_t *probs;
1758+ size_t i;
1759+
1760+ s->lzma.state = STATE_LIT_LIT;
1761+ s->lzma.rep0 = 0;
1762+ s->lzma.rep1 = 0;
1763+ s->lzma.rep2 = 0;
1764+ s->lzma.rep3 = 0;
1765+
1766+ /*
1767+ * All probabilities are initialized to the same value. This hack
1768+ * makes the code smaller by avoiding a separate loop for each
1769+ * probability array.
1770+ *
1771+ * This could be optimized so that only that part of literal
1772+ * probabilities that are actually required. In the common case
1773+ * we would write 12 KiB less.
1774+ */
1775+ probs = s->lzma.is_match[0];
1776+ for (i = 0; i < PROBS_TOTAL; ++i)
1777+ probs[i] = RC_BIT_MODEL_TOTAL / 2;
1778+
1779+ rc_reset(&s->rc);
1780+}
1781+
1782+/*
1783+ * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks
1784+ * from the decoded lp and pb values. On success, the LZMA decoder state is
1785+ * reset and true is returned.
1786+ */
1787+static bool_t INIT lzma_props(struct xz_dec_lzma2 *s, uint8_t props)
1788+{
1789+ if (props > (4 * 5 + 4) * 9 + 8)
1790+ return false;
1791+
1792+ s->lzma.pos_mask = 0;
1793+ while (props >= 9 * 5) {
1794+ props -= 9 * 5;
1795+ ++s->lzma.pos_mask;
1796+ }
1797+
1798+ s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1;
1799+
1800+ s->lzma.literal_pos_mask = 0;
1801+ while (props >= 9) {
1802+ props -= 9;
1803+ ++s->lzma.literal_pos_mask;
1804+ }
1805+
1806+ s->lzma.lc = props;
1807+
1808+ if (s->lzma.lc + s->lzma.literal_pos_mask > 4)
1809+ return false;
1810+
1811+ s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1;
1812+
1813+ lzma_reset(s);
1814+
1815+ return true;
1816+}
1817+
1818+/*********
1819+ * LZMA2 *
1820+ *********/
1821+
1822+/*
1823+ * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't
1824+ * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This
1825+ * wrapper function takes care of making the LZMA decoder's assumption safe.
1826+ *
1827+ * As long as there is plenty of input left to be decoded in the current LZMA
1828+ * chunk, we decode directly from the caller-supplied input buffer until
1829+ * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into
1830+ * s->temp.buf, which (hopefully) gets filled on the next call to this
1831+ * function. We decode a few bytes from the temporary buffer so that we can
1832+ * continue decoding from the caller-supplied input buffer again.
1833+ */
1834+static bool_t INIT lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b)
1835+{
1836+ size_t in_avail;
1837+ uint32_t tmp;
1838+
1839+ in_avail = b->in_size - b->in_pos;
1840+ if (s->temp.size > 0 || s->lzma2.compressed == 0) {
1841+ tmp = 2 * LZMA_IN_REQUIRED - s->temp.size;
1842+ if (tmp > s->lzma2.compressed - s->temp.size)
1843+ tmp = s->lzma2.compressed - s->temp.size;
1844+ if (tmp > in_avail)
1845+ tmp = in_avail;
1846+
1847+ memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp);
1848+
1849+ if (s->temp.size + tmp == s->lzma2.compressed) {
1850+ memzero(s->temp.buf + s->temp.size + tmp,
1851+ sizeof(s->temp.buf)
1852+ - s->temp.size - tmp);
1853+ s->rc.in_limit = s->temp.size + tmp;
1854+ } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) {
1855+ s->temp.size += tmp;
1856+ b->in_pos += tmp;
1857+ return true;
1858+ } else {
1859+ s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED;
1860+ }
1861+
1862+ s->rc.in = s->temp.buf;
1863+ s->rc.in_pos = 0;
1864+
1865+ if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp)
1866+ return false;
1867+
1868+ s->lzma2.compressed -= s->rc.in_pos;
1869+
1870+ if (s->rc.in_pos < s->temp.size) {
1871+ s->temp.size -= s->rc.in_pos;
1872+ memmove(s->temp.buf, s->temp.buf + s->rc.in_pos,
1873+ s->temp.size);
1874+ return true;
1875+ }
1876+
1877+ b->in_pos += s->rc.in_pos - s->temp.size;
1878+ s->temp.size = 0;
1879+ }
1880+
1881+ in_avail = b->in_size - b->in_pos;
1882+ if (in_avail >= LZMA_IN_REQUIRED) {
1883+ s->rc.in = b->in;
1884+ s->rc.in_pos = b->in_pos;
1885+
1886+ if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED)
1887+ s->rc.in_limit = b->in_pos + s->lzma2.compressed;
1888+ else
1889+ s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED;
1890+
1891+ if (!lzma_main(s))
1892+ return false;
1893+
1894+ in_avail = s->rc.in_pos - b->in_pos;
1895+ if (in_avail > s->lzma2.compressed)
1896+ return false;
1897+
1898+ s->lzma2.compressed -= in_avail;
1899+ b->in_pos = s->rc.in_pos;
1900+ }
1901+
1902+ in_avail = b->in_size - b->in_pos;
1903+ if (in_avail < LZMA_IN_REQUIRED) {
1904+ if (in_avail > s->lzma2.compressed)
1905+ in_avail = s->lzma2.compressed;
1906+
1907+ memcpy(s->temp.buf, b->in + b->in_pos, in_avail);
1908+ s->temp.size = in_avail;
1909+ b->in_pos += in_avail;
1910+ }
1911+
1912+ return true;
1913+}
1914+
1915+/*
1916+ * Take care of the LZMA2 control layer, and forward the job of actual LZMA
1917+ * decoding or copying of uncompressed chunks to other functions.
1918+ */
1919+XZ_EXTERN enum xz_ret INIT xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
1920+ struct xz_buf *b)
1921+{
1922+ uint32_t tmp;
1923+
1924+ while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) {
1925+ switch (s->lzma2.sequence) {
1926+ case SEQ_CONTROL:
1927+ /*
1928+ * LZMA2 control byte
1929+ *
1930+ * Exact values:
1931+ * 0x00 End marker
1932+ * 0x01 Dictionary reset followed by
1933+ * an uncompressed chunk
1934+ * 0x02 Uncompressed chunk (no dictionary reset)
1935+ *
1936+ * Highest three bits (s->control & 0xE0):
1937+ * 0xE0 Dictionary reset, new properties and state
1938+ * reset, followed by LZMA compressed chunk
1939+ * 0xC0 New properties and state reset, followed
1940+ * by LZMA compressed chunk (no dictionary
1941+ * reset)
1942+ * 0xA0 State reset using old properties,
1943+ * followed by LZMA compressed chunk (no
1944+ * dictionary reset)
1945+ * 0x80 LZMA chunk (no dictionary or state reset)
1946+ *
1947+ * For LZMA compressed chunks, the lowest five bits
1948+ * (s->control & 1F) are the highest bits of the
1949+ * uncompressed size (bits 16-20).
1950+ *
1951+ * A new LZMA2 stream must begin with a dictionary
1952+ * reset. The first LZMA chunk must set new
1953+ * properties and reset the LZMA state.
1954+ *
1955+ * Values that don't match anything described above
1956+ * are invalid and we return XZ_DATA_ERROR.
1957+ */
1958+ tmp = b->in[b->in_pos++];
1959+
1545f2de
JR
1960+ if (tmp == 0x00)
1961+ return XZ_STREAM_END;
1962+
53ff2d70
AM
1963+ if (tmp >= 0xE0 || tmp == 0x01) {
1964+ s->lzma2.need_props = true;
1965+ s->lzma2.need_dict_reset = false;
1966+ dict_reset(&s->dict, b);
1967+ } else if (s->lzma2.need_dict_reset) {
1968+ return XZ_DATA_ERROR;
1969+ }
1970+
1971+ if (tmp >= 0x80) {
1972+ s->lzma2.uncompressed = (tmp & 0x1F) << 16;
1973+ s->lzma2.sequence = SEQ_UNCOMPRESSED_1;
1974+
1975+ if (tmp >= 0xC0) {
1976+ /*
1977+ * When there are new properties,
1978+ * state reset is done at
1979+ * SEQ_PROPERTIES.
1980+ */
1981+ s->lzma2.need_props = false;
1982+ s->lzma2.next_sequence
1983+ = SEQ_PROPERTIES;
1984+
1985+ } else if (s->lzma2.need_props) {
1986+ return XZ_DATA_ERROR;
1987+
1988+ } else {
1989+ s->lzma2.next_sequence
1990+ = SEQ_LZMA_PREPARE;
1991+ if (tmp >= 0xA0)
1992+ lzma_reset(s);
1993+ }
1994+ } else {
53ff2d70
AM
1995+ if (tmp > 0x02)
1996+ return XZ_DATA_ERROR;
1997+
1998+ s->lzma2.sequence = SEQ_COMPRESSED_0;
1999+ s->lzma2.next_sequence = SEQ_COPY;
2000+ }
2001+
2002+ break;
2003+
2004+ case SEQ_UNCOMPRESSED_1:
2005+ s->lzma2.uncompressed
2006+ += (uint32_t)b->in[b->in_pos++] << 8;
2007+ s->lzma2.sequence = SEQ_UNCOMPRESSED_2;
2008+ break;
2009+
2010+ case SEQ_UNCOMPRESSED_2:
2011+ s->lzma2.uncompressed
2012+ += (uint32_t)b->in[b->in_pos++] + 1;
2013+ s->lzma2.sequence = SEQ_COMPRESSED_0;
2014+ break;
2015+
2016+ case SEQ_COMPRESSED_0:
2017+ s->lzma2.compressed
2018+ = (uint32_t)b->in[b->in_pos++] << 8;
2019+ s->lzma2.sequence = SEQ_COMPRESSED_1;
2020+ break;
2021+
2022+ case SEQ_COMPRESSED_1:
2023+ s->lzma2.compressed
2024+ += (uint32_t)b->in[b->in_pos++] + 1;
2025+ s->lzma2.sequence = s->lzma2.next_sequence;
2026+ break;
2027+
2028+ case SEQ_PROPERTIES:
2029+ if (!lzma_props(s, b->in[b->in_pos++]))
2030+ return XZ_DATA_ERROR;
2031+
2032+ s->lzma2.sequence = SEQ_LZMA_PREPARE;
2033+
2034+ case SEQ_LZMA_PREPARE:
2035+ if (s->lzma2.compressed < RC_INIT_BYTES)
2036+ return XZ_DATA_ERROR;
2037+
2038+ if (!rc_read_init(&s->rc, b))
2039+ return XZ_OK;
2040+
2041+ s->lzma2.compressed -= RC_INIT_BYTES;
2042+ s->lzma2.sequence = SEQ_LZMA_RUN;
2043+
2044+ case SEQ_LZMA_RUN:
2045+ /*
2046+ * Set dictionary limit to indicate how much we want
2047+ * to be encoded at maximum. Decode new data into the
2048+ * dictionary. Flush the new data from dictionary to
2049+ * b->out. Check if we finished decoding this chunk.
2050+ * In case the dictionary got full but we didn't fill
2051+ * the output buffer yet, we may run this loop
2052+ * multiple times without changing s->lzma2.sequence.
2053+ */
2054+ dict_limit(&s->dict, min_t(size_t,
2055+ b->out_size - b->out_pos,
2056+ s->lzma2.uncompressed));
2057+ if (!lzma2_lzma(s, b))
2058+ return XZ_DATA_ERROR;
2059+
2060+ s->lzma2.uncompressed -= dict_flush(&s->dict, b);
2061+
2062+ if (s->lzma2.uncompressed == 0) {
2063+ if (s->lzma2.compressed > 0 || s->lzma.len > 0
2064+ || !rc_is_finished(&s->rc))
2065+ return XZ_DATA_ERROR;
2066+
2067+ rc_reset(&s->rc);
2068+ s->lzma2.sequence = SEQ_CONTROL;
2069+
2070+ } else if (b->out_pos == b->out_size
2071+ || (b->in_pos == b->in_size
2072+ && s->temp.size
2073+ < s->lzma2.compressed)) {
2074+ return XZ_OK;
2075+ }
2076+
2077+ break;
2078+
2079+ case SEQ_COPY:
2080+ dict_uncompressed(&s->dict, b, &s->lzma2.compressed);
2081+ if (s->lzma2.compressed > 0)
2082+ return XZ_OK;
2083+
2084+ s->lzma2.sequence = SEQ_CONTROL;
2085+ break;
2086+ }
2087+ }
2088+
2089+ return XZ_OK;
2090+}
2091+
2092+XZ_EXTERN struct xz_dec_lzma2 *INIT xz_dec_lzma2_create(enum xz_mode mode,
2093+ uint32_t dict_max)
2094+{
2095+ struct xz_dec_lzma2 *s = malloc(sizeof(*s));
2096+ if (s == NULL)
2097+ return NULL;
2098+
2099+ s->dict.mode = mode;
2100+ s->dict.size_max = dict_max;
2101+
2102+ if (DEC_IS_PREALLOC(mode)) {
2103+ s->dict.buf = large_malloc(dict_max);
2104+ if (s->dict.buf == NULL) {
2105+ free(s);
2106+ return NULL;
2107+ }
2108+ } else if (DEC_IS_DYNALLOC(mode)) {
2109+ s->dict.buf = NULL;
2110+ s->dict.allocated = 0;
2111+ }
2112+
2113+ return s;
2114+}
2115+
2116+XZ_EXTERN enum xz_ret INIT xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
2117+{
2118+ /* This limits dictionary size to 3 GiB to keep parsing simpler. */
2119+ if (props > 39)
2120+ return XZ_OPTIONS_ERROR;
2121+
2122+ s->dict.size = 2 + (props & 1);
2123+ s->dict.size <<= (props >> 1) + 11;
2124+
2125+ if (DEC_IS_MULTI(s->dict.mode)) {
2126+ if (s->dict.size > s->dict.size_max)
2127+ return XZ_MEMLIMIT_ERROR;
2128+
2129+ s->dict.end = s->dict.size;
2130+
2131+ if (DEC_IS_DYNALLOC(s->dict.mode)) {
2132+ if (s->dict.allocated < s->dict.size) {
2133+ large_free(s->dict.buf);
2134+ s->dict.buf = large_malloc(s->dict.size);
2135+ if (s->dict.buf == NULL) {
2136+ s->dict.allocated = 0;
2137+ return XZ_MEM_ERROR;
2138+ }
2139+ }
2140+ }
2141+ }
2142+
2143+ s->lzma.len = 0;
2144+
2145+ s->lzma2.sequence = SEQ_CONTROL;
2146+ s->lzma2.need_dict_reset = true;
2147+
2148+ s->temp.size = 0;
2149+
2150+ return XZ_OK;
2151+}
2152+
2153+XZ_EXTERN void INIT xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
2154+{
2155+ if (DEC_IS_MULTI(s->dict.mode))
2156+ large_free(s->dict.buf);
2157+
2158+ free(s);
2159+}
2160diff --git a/xen/common/xz/dec_stream.c b/xen/common/xz/dec_stream.c
2161new file mode 100644
2162--- /dev/null
2163+++ b/xen/common/xz/dec_stream.c
2164@@ -0,0 +1,821 @@
2165+/*
2166+ * .xz Stream decoder
2167+ *
2168+ * Author: Lasse Collin <lasse.collin@tukaani.org>
2169+ *
2170+ * This file has been put into the public domain.
2171+ * You can do whatever you want with this file.
2172+ */
2173+
2174+#include "private.h"
2175+#include "stream.h"
2176+
2177+/* Hash used to validate the Index field */
2178+struct xz_dec_hash {
2179+ vli_type unpadded;
2180+ vli_type uncompressed;
2181+ uint32_t crc32;
2182+};
2183+
2184+struct xz_dec {
2185+ /* Position in dec_main() */
2186+ enum {
2187+ SEQ_STREAM_HEADER,
2188+ SEQ_BLOCK_START,
2189+ SEQ_BLOCK_HEADER,
2190+ SEQ_BLOCK_UNCOMPRESS,
2191+ SEQ_BLOCK_PADDING,
2192+ SEQ_BLOCK_CHECK,
2193+ SEQ_INDEX,
2194+ SEQ_INDEX_PADDING,
2195+ SEQ_INDEX_CRC32,
2196+ SEQ_STREAM_FOOTER
2197+ } sequence;
2198+
2199+ /* Position in variable-length integers and Check fields */
2200+ uint32_t pos;
2201+
2202+ /* Variable-length integer decoded by dec_vli() */
2203+ vli_type vli;
2204+
2205+ /* Saved in_pos and out_pos */
2206+ size_t in_start;
2207+ size_t out_start;
2208+
2209+ /* CRC32 value in Block or Index */
2210+ uint32_t crc32;
2211+
2212+ /* Type of the integrity check calculated from uncompressed data */
2213+ enum xz_check check_type;
2214+
2215+ /* Operation mode */
2216+ enum xz_mode mode;
2217+
2218+ /*
2219+ * True if the next call to xz_dec_run() is allowed to return
2220+ * XZ_BUF_ERROR.
2221+ */
2222+ bool_t allow_buf_error;
2223+
2224+ /* Information stored in Block Header */
2225+ struct {
2226+ /*
2227+ * Value stored in the Compressed Size field, or
2228+ * VLI_UNKNOWN if Compressed Size is not present.
2229+ */
2230+ vli_type compressed;
2231+
2232+ /*
2233+ * Value stored in the Uncompressed Size field, or
2234+ * VLI_UNKNOWN if Uncompressed Size is not present.
2235+ */
2236+ vli_type uncompressed;
2237+
2238+ /* Size of the Block Header field */
2239+ uint32_t size;
2240+ } block_header;
2241+
2242+ /* Information collected when decoding Blocks */
2243+ struct {
2244+ /* Observed compressed size of the current Block */
2245+ vli_type compressed;
2246+
2247+ /* Observed uncompressed size of the current Block */
2248+ vli_type uncompressed;
2249+
2250+ /* Number of Blocks decoded so far */
2251+ vli_type count;
2252+
2253+ /*
2254+ * Hash calculated from the Block sizes. This is used to
2255+ * validate the Index field.
2256+ */
2257+ struct xz_dec_hash hash;
2258+ } block;
2259+
2260+ /* Variables needed when verifying the Index field */
2261+ struct {
2262+ /* Position in dec_index() */
2263+ enum {
2264+ SEQ_INDEX_COUNT,
2265+ SEQ_INDEX_UNPADDED,
2266+ SEQ_INDEX_UNCOMPRESSED
2267+ } sequence;
2268+
2269+ /* Size of the Index in bytes */
2270+ vli_type size;
2271+
2272+ /* Number of Records (matches block.count in valid files) */
2273+ vli_type count;
2274+
2275+ /*
2276+ * Hash calculated from the Records (matches block.hash in
2277+ * valid files).
2278+ */
2279+ struct xz_dec_hash hash;
2280+ } index;
2281+
2282+ /*
2283+ * Temporary buffer needed to hold Stream Header, Block Header,
2284+ * and Stream Footer. The Block Header is the biggest (1 KiB)
2285+ * so we reserve space according to that. buf[] has to be aligned
2286+ * to a multiple of four bytes; the size_t variables before it
2287+ * should guarantee this.
2288+ */
2289+ struct {
2290+ size_t pos;
2291+ size_t size;
2292+ uint8_t buf[1024];
2293+ } temp;
2294+
2295+ struct xz_dec_lzma2 *lzma2;
2296+
2297+#ifdef XZ_DEC_BCJ
2298+ struct xz_dec_bcj *bcj;
2299+ bool_t bcj_active;
2300+#endif
2301+};
2302+
2303+#ifdef XZ_DEC_ANY_CHECK
2304+/* Sizes of the Check field with different Check IDs */
2305+static const uint8_t check_sizes[16] = {
2306+ 0,
2307+ 4, 4, 4,
2308+ 8, 8, 8,
2309+ 16, 16, 16,
2310+ 32, 32, 32,
2311+ 64, 64, 64
2312+};
2313+#endif
2314+
2315+/*
2316+ * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
2317+ * must have set s->temp.pos to indicate how much data we are supposed
2318+ * to copy into s->temp.buf. Return true once s->temp.pos has reached
2319+ * s->temp.size.
2320+ */
2321+static bool_t INIT fill_temp(struct xz_dec *s, struct xz_buf *b)
2322+{
2323+ size_t copy_size = min_t(size_t,
2324+ b->in_size - b->in_pos, s->temp.size - s->temp.pos);
2325+
2326+ memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size);
2327+ b->in_pos += copy_size;
2328+ s->temp.pos += copy_size;
2329+
2330+ if (s->temp.pos == s->temp.size) {
2331+ s->temp.pos = 0;
2332+ return true;
2333+ }
2334+
2335+ return false;
2336+}
2337+
2338+/* Decode a variable-length integer (little-endian base-128 encoding) */
2339+static enum xz_ret INIT dec_vli(struct xz_dec *s, const uint8_t *in,
2340+ size_t *in_pos, size_t in_size)
2341+{
2342+ uint8_t byte;
2343+
2344+ if (s->pos == 0)
2345+ s->vli = 0;
2346+
2347+ while (*in_pos < in_size) {
2348+ byte = in[*in_pos];
2349+ ++*in_pos;
2350+
2351+ s->vli |= (vli_type)(byte & 0x7F) << s->pos;
2352+
2353+ if ((byte & 0x80) == 0) {
2354+ /* Don't allow non-minimal encodings. */
2355+ if (byte == 0 && s->pos != 0)
2356+ return XZ_DATA_ERROR;
2357+
2358+ s->pos = 0;
2359+ return XZ_STREAM_END;
2360+ }
2361+
2362+ s->pos += 7;
2363+ if (s->pos == 7 * VLI_BYTES_MAX)
2364+ return XZ_DATA_ERROR;
2365+ }
2366+
2367+ return XZ_OK;
2368+}
2369+
2370+/*
2371+ * Decode the Compressed Data field from a Block. Update and validate
2372+ * the observed compressed and uncompressed sizes of the Block so that
2373+ * they don't exceed the values possibly stored in the Block Header
2374+ * (validation assumes that no integer overflow occurs, since vli_type
2375+ * is normally uint64_t). Update the CRC32 if presence of the CRC32
2376+ * field was indicated in Stream Header.
2377+ *
2378+ * Once the decoding is finished, validate that the observed sizes match
2379+ * the sizes possibly stored in the Block Header. Update the hash and
2380+ * Block count, which are later used to validate the Index field.
2381+ */
2382+static enum xz_ret INIT dec_block(struct xz_dec *s, struct xz_buf *b)
2383+{
2384+ enum xz_ret ret;
2385+
2386+ s->in_start = b->in_pos;
2387+ s->out_start = b->out_pos;
2388+
2389+#ifdef XZ_DEC_BCJ
2390+ if (s->bcj_active)
2391+ ret = xz_dec_bcj_run(s->bcj, s->lzma2, b);
2392+ else
2393+#endif
2394+ ret = xz_dec_lzma2_run(s->lzma2, b);
2395+
2396+ s->block.compressed += b->in_pos - s->in_start;
2397+ s->block.uncompressed += b->out_pos - s->out_start;
2398+
2399+ /*
2400+ * There is no need to separately check for VLI_UNKNOWN, since
2401+ * the observed sizes are always smaller than VLI_UNKNOWN.
2402+ */
2403+ if (s->block.compressed > s->block_header.compressed
2404+ || s->block.uncompressed
2405+ > s->block_header.uncompressed)
2406+ return XZ_DATA_ERROR;
2407+
2408+ if (s->check_type == XZ_CHECK_CRC32)
2409+ s->crc32 = xz_crc32(b->out + s->out_start,
2410+ b->out_pos - s->out_start, s->crc32);
2411+
2412+ if (ret == XZ_STREAM_END) {
2413+ if (s->block_header.compressed != VLI_UNKNOWN
2414+ && s->block_header.compressed
2415+ != s->block.compressed)
2416+ return XZ_DATA_ERROR;
2417+
2418+ if (s->block_header.uncompressed != VLI_UNKNOWN
2419+ && s->block_header.uncompressed
2420+ != s->block.uncompressed)
2421+ return XZ_DATA_ERROR;
2422+
2423+ s->block.hash.unpadded += s->block_header.size
2424+ + s->block.compressed;
2425+
2426+#ifdef XZ_DEC_ANY_CHECK
2427+ s->block.hash.unpadded += check_sizes[s->check_type];
2428+#else
2429+ if (s->check_type == XZ_CHECK_CRC32)
2430+ s->block.hash.unpadded += 4;
2431+#endif
2432+
2433+ s->block.hash.uncompressed += s->block.uncompressed;
2434+ s->block.hash.crc32 = xz_crc32(
2435+ (const uint8_t *)&s->block.hash,
2436+ sizeof(s->block.hash), s->block.hash.crc32);
2437+
2438+ ++s->block.count;
2439+ }
2440+
2441+ return ret;
2442+}
2443+
2444+/* Update the Index size and the CRC32 value. */
2445+static void INIT index_update(struct xz_dec *s, const struct xz_buf *b)
2446+{
2447+ size_t in_used = b->in_pos - s->in_start;
2448+ s->index.size += in_used;
2449+ s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32);
2450+}
2451+
2452+/*
2453+ * Decode the Number of Records, Unpadded Size, and Uncompressed Size
2454+ * fields from the Index field. That is, Index Padding and CRC32 are not
2455+ * decoded by this function.
2456+ *
2457+ * This can return XZ_OK (more input needed), XZ_STREAM_END (everything
2458+ * successfully decoded), or XZ_DATA_ERROR (input is corrupt).
2459+ */
2460+static enum xz_ret INIT dec_index(struct xz_dec *s, struct xz_buf *b)
2461+{
2462+ enum xz_ret ret;
2463+
2464+ do {
2465+ ret = dec_vli(s, b->in, &b->in_pos, b->in_size);
2466+ if (ret != XZ_STREAM_END) {
2467+ index_update(s, b);
2468+ return ret;
2469+ }
2470+
2471+ switch (s->index.sequence) {
2472+ case SEQ_INDEX_COUNT:
2473+ s->index.count = s->vli;
2474+
2475+ /*
2476+ * Validate that the Number of Records field
2477+ * indicates the same number of Records as
2478+ * there were Blocks in the Stream.
2479+ */
2480+ if (s->index.count != s->block.count)
2481+ return XZ_DATA_ERROR;
2482+
2483+ s->index.sequence = SEQ_INDEX_UNPADDED;
2484+ break;
2485+
2486+ case SEQ_INDEX_UNPADDED:
2487+ s->index.hash.unpadded += s->vli;
2488+ s->index.sequence = SEQ_INDEX_UNCOMPRESSED;
2489+ break;
2490+
2491+ case SEQ_INDEX_UNCOMPRESSED:
2492+ s->index.hash.uncompressed += s->vli;
2493+ s->index.hash.crc32 = xz_crc32(
2494+ (const uint8_t *)&s->index.hash,
2495+ sizeof(s->index.hash),
2496+ s->index.hash.crc32);
2497+ --s->index.count;
2498+ s->index.sequence = SEQ_INDEX_UNPADDED;
2499+ break;
2500+ }
2501+ } while (s->index.count > 0);
2502+
2503+ return XZ_STREAM_END;
2504+}
2505+
2506+/*
2507+ * Validate that the next four input bytes match the value of s->crc32.
2508+ * s->pos must be zero when starting to validate the first byte.
2509+ */
2510+static enum xz_ret INIT crc32_validate(struct xz_dec *s, struct xz_buf *b)
2511+{
2512+ do {
2513+ if (b->in_pos == b->in_size)
2514+ return XZ_OK;
2515+
2516+ if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++])
2517+ return XZ_DATA_ERROR;
2518+
2519+ s->pos += 8;
2520+
2521+ } while (s->pos < 32);
2522+
2523+ s->crc32 = 0;
2524+ s->pos = 0;
2525+
2526+ return XZ_STREAM_END;
2527+}
2528+
2529+#ifdef XZ_DEC_ANY_CHECK
2530+/*
2531+ * Skip over the Check field when the Check ID is not supported.
2532+ * Returns true once the whole Check field has been skipped over.
2533+ */
2534+static bool_t INIT check_skip(struct xz_dec *s, struct xz_buf *b)
2535+{
2536+ while (s->pos < check_sizes[s->check_type]) {
2537+ if (b->in_pos == b->in_size)
2538+ return false;
2539+
2540+ ++b->in_pos;
2541+ ++s->pos;
2542+ }
2543+
2544+ s->pos = 0;
2545+
2546+ return true;
2547+}
2548+#endif
2549+
2550+/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
2551+static enum xz_ret INIT dec_stream_header(struct xz_dec *s)
2552+{
2553+ if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
2554+ return XZ_FORMAT_ERROR;
2555+
2556+ if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
2557+ != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
2558+ return XZ_DATA_ERROR;
2559+
2560+ if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
2561+ return XZ_OPTIONS_ERROR;
2562+
2563+ /*
2564+ * Of integrity checks, we support only none (Check ID = 0) and
2565+ * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined,
2566+ * we will accept other check types too, but then the check won't
2567+ * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given.
2568+ */
2569+ s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
2570+
2571+#ifdef XZ_DEC_ANY_CHECK
2572+ if (s->check_type > XZ_CHECK_MAX)
2573+ return XZ_OPTIONS_ERROR;
2574+
2575+ if (s->check_type > XZ_CHECK_CRC32)
2576+ return XZ_UNSUPPORTED_CHECK;
2577+#else
2578+ if (s->check_type > XZ_CHECK_CRC32)
2579+ return XZ_OPTIONS_ERROR;
2580+#endif
2581+
2582+ return XZ_OK;
2583+}
2584+
2585+/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */
2586+static enum xz_ret INIT dec_stream_footer(struct xz_dec *s)
2587+{
2588+ if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
2589+ return XZ_DATA_ERROR;
2590+
2591+ if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
2592+ return XZ_DATA_ERROR;
2593+
2594+ /*
2595+ * Validate Backward Size. Note that we never added the size of the
2596+ * Index CRC32 field to s->index.size, thus we use s->index.size / 4
2597+ * instead of s->index.size / 4 - 1.
2598+ */
2599+ if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
2600+ return XZ_DATA_ERROR;
2601+
2602+ if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
2603+ return XZ_DATA_ERROR;
2604+
2605+ /*
2606+ * Use XZ_STREAM_END instead of XZ_OK to be more convenient
2607+ * for the caller.
2608+ */
2609+ return XZ_STREAM_END;
2610+}
2611+
2612+/* Decode the Block Header and initialize the filter chain. */
2613+static enum xz_ret INIT dec_block_header(struct xz_dec *s)
2614+{
2615+ enum xz_ret ret;
2616+
2617+ /*
2618+ * Validate the CRC32. We know that the temp buffer is at least
2619+ * eight bytes so this is safe.
2620+ */
2621+ s->temp.size -= 4;
2622+ if (xz_crc32(s->temp.buf, s->temp.size, 0)
2623+ != get_le32(s->temp.buf + s->temp.size))
2624+ return XZ_DATA_ERROR;
2625+
2626+ s->temp.pos = 2;
2627+
2628+ /*
2629+ * Catch unsupported Block Flags. We support only one or two filters
2630+ * in the chain, so we catch that with the same test.
2631+ */
2632+#ifdef XZ_DEC_BCJ
2633+ if (s->temp.buf[1] & 0x3E)
2634+#else
2635+ if (s->temp.buf[1] & 0x3F)
2636+#endif
2637+ return XZ_OPTIONS_ERROR;
2638+
2639+ /* Compressed Size */
2640+ if (s->temp.buf[1] & 0x40) {
2641+ if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
2642+ != XZ_STREAM_END)
2643+ return XZ_DATA_ERROR;
2644+
2645+ s->block_header.compressed = s->vli;
2646+ } else {
2647+ s->block_header.compressed = VLI_UNKNOWN;
2648+ }
2649+
2650+ /* Uncompressed Size */
2651+ if (s->temp.buf[1] & 0x80) {
2652+ if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size)
2653+ != XZ_STREAM_END)
2654+ return XZ_DATA_ERROR;
2655+
2656+ s->block_header.uncompressed = s->vli;
2657+ } else {
2658+ s->block_header.uncompressed = VLI_UNKNOWN;
2659+ }
2660+
2661+#ifdef XZ_DEC_BCJ
2662+ /* If there are two filters, the first one must be a BCJ filter. */
2663+ s->bcj_active = s->temp.buf[1] & 0x01;
2664+ if (s->bcj_active) {
2665+ if (s->temp.size - s->temp.pos < 2)
2666+ return XZ_OPTIONS_ERROR;
2667+
2668+ ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]);
2669+ if (ret != XZ_OK)
2670+ return ret;
2671+
2672+ /*
2673+ * We don't support custom start offset,
2674+ * so Size of Properties must be zero.
2675+ */
2676+ if (s->temp.buf[s->temp.pos++] != 0x00)
2677+ return XZ_OPTIONS_ERROR;
2678+ }
2679+#endif
2680+
2681+ /* Valid Filter Flags always take at least two bytes. */
2682+ if (s->temp.size - s->temp.pos < 2)
2683+ return XZ_DATA_ERROR;
2684+
2685+ /* Filter ID = LZMA2 */
2686+ if (s->temp.buf[s->temp.pos++] != 0x21)
2687+ return XZ_OPTIONS_ERROR;
2688+
2689+ /* Size of Properties = 1-byte Filter Properties */
2690+ if (s->temp.buf[s->temp.pos++] != 0x01)
2691+ return XZ_OPTIONS_ERROR;
2692+
2693+ /* Filter Properties contains LZMA2 dictionary size. */
2694+ if (s->temp.size - s->temp.pos < 1)
2695+ return XZ_DATA_ERROR;
2696+
2697+ ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]);
2698+ if (ret != XZ_OK)
2699+ return ret;
2700+
2701+ /* The rest must be Header Padding. */
2702+ while (s->temp.pos < s->temp.size)
2703+ if (s->temp.buf[s->temp.pos++] != 0x00)
2704+ return XZ_OPTIONS_ERROR;
2705+
2706+ s->temp.pos = 0;
2707+ s->block.compressed = 0;
2708+ s->block.uncompressed = 0;
2709+
2710+ return XZ_OK;
2711+}
2712+
2713+static enum xz_ret INIT dec_main(struct xz_dec *s, struct xz_buf *b)
2714+{
2715+ enum xz_ret ret;
2716+
2717+ /*
2718+ * Store the start position for the case when we are in the middle
2719+ * of the Index field.
2720+ */
2721+ s->in_start = b->in_pos;
2722+
2723+ while (true) {
2724+ switch (s->sequence) {
2725+ case SEQ_STREAM_HEADER:
2726+ /*
2727+ * Stream Header is copied to s->temp, and then
2728+ * decoded from there. This way if the caller
2729+ * gives us only little input at a time, we can
2730+ * still keep the Stream Header decoding code
2731+ * simple. Similar approach is used in many places
2732+ * in this file.
2733+ */
2734+ if (!fill_temp(s, b))
2735+ return XZ_OK;
2736+
2737+ /*
2738+ * If dec_stream_header() returns
2739+ * XZ_UNSUPPORTED_CHECK, it is still possible
2740+ * to continue decoding if working in multi-call
2741+ * mode. Thus, update s->sequence before calling
2742+ * dec_stream_header().
2743+ */
2744+ s->sequence = SEQ_BLOCK_START;
2745+
2746+ ret = dec_stream_header(s);
2747+ if (ret != XZ_OK)
2748+ return ret;
2749+
2750+ case SEQ_BLOCK_START:
2751+ /* We need one byte of input to continue. */
2752+ if (b->in_pos == b->in_size)
2753+ return XZ_OK;
2754+
2755+ /* See if this is the beginning of the Index field. */
2756+ if (b->in[b->in_pos] == 0) {
2757+ s->in_start = b->in_pos++;
2758+ s->sequence = SEQ_INDEX;
2759+ break;
2760+ }
2761+
2762+ /*
2763+ * Calculate the size of the Block Header and
2764+ * prepare to decode it.
2765+ */
2766+ s->block_header.size
2767+ = ((uint32_t)b->in[b->in_pos] + 1) * 4;
2768+
2769+ s->temp.size = s->block_header.size;
2770+ s->temp.pos = 0;
2771+ s->sequence = SEQ_BLOCK_HEADER;
2772+
2773+ case SEQ_BLOCK_HEADER:
2774+ if (!fill_temp(s, b))
2775+ return XZ_OK;
2776+
2777+ ret = dec_block_header(s);
2778+ if (ret != XZ_OK)
2779+ return ret;
2780+
2781+ s->sequence = SEQ_BLOCK_UNCOMPRESS;
2782+
2783+ case SEQ_BLOCK_UNCOMPRESS:
2784+ ret = dec_block(s, b);
2785+ if (ret != XZ_STREAM_END)
2786+ return ret;
2787+
2788+ s->sequence = SEQ_BLOCK_PADDING;
2789+
2790+ case SEQ_BLOCK_PADDING:
2791+ /*
2792+ * Size of Compressed Data + Block Padding
2793+ * must be a multiple of four. We don't need
2794+ * s->block.compressed for anything else
2795+ * anymore, so we use it here to test the size
2796+ * of the Block Padding field.
2797+ */
2798+ while (s->block.compressed & 3) {
2799+ if (b->in_pos == b->in_size)
2800+ return XZ_OK;
2801+
2802+ if (b->in[b->in_pos++] != 0)
2803+ return XZ_DATA_ERROR;
2804+
2805+ ++s->block.compressed;
2806+ }
2807+
2808+ s->sequence = SEQ_BLOCK_CHECK;
2809+
2810+ case SEQ_BLOCK_CHECK:
2811+ if (s->check_type == XZ_CHECK_CRC32) {
2812+ ret = crc32_validate(s, b);
2813+ if (ret != XZ_STREAM_END)
2814+ return ret;
2815+ }
2816+#ifdef XZ_DEC_ANY_CHECK
2817+ else if (!check_skip(s, b)) {
2818+ return XZ_OK;
2819+ }
2820+#endif
2821+
2822+ s->sequence = SEQ_BLOCK_START;
2823+ break;
2824+
2825+ case SEQ_INDEX:
2826+ ret = dec_index(s, b);
2827+ if (ret != XZ_STREAM_END)
2828+ return ret;
2829+
2830+ s->sequence = SEQ_INDEX_PADDING;
2831+
2832+ case SEQ_INDEX_PADDING:
2833+ while ((s->index.size + (b->in_pos - s->in_start))
2834+ & 3) {
2835+ if (b->in_pos == b->in_size) {
2836+ index_update(s, b);
2837+ return XZ_OK;
2838+ }
2839+
2840+ if (b->in[b->in_pos++] != 0)
2841+ return XZ_DATA_ERROR;
2842+ }
2843+
2844+ /* Finish the CRC32 value and Index size. */
2845+ index_update(s, b);
2846+
2847+ /* Compare the hashes to validate the Index field. */
2848+ if (!memeq(&s->block.hash, &s->index.hash,
2849+ sizeof(s->block.hash)))
2850+ return XZ_DATA_ERROR;
2851+
2852+ s->sequence = SEQ_INDEX_CRC32;
2853+
2854+ case SEQ_INDEX_CRC32:
2855+ ret = crc32_validate(s, b);
2856+ if (ret != XZ_STREAM_END)
2857+ return ret;
2858+
2859+ s->temp.size = STREAM_HEADER_SIZE;
2860+ s->sequence = SEQ_STREAM_FOOTER;
2861+
2862+ case SEQ_STREAM_FOOTER:
2863+ if (!fill_temp(s, b))
2864+ return XZ_OK;
2865+
2866+ return dec_stream_footer(s);
2867+ }
2868+ }
2869+
2870+ /* Never reached */
2871+}
2872+
2873+XZ_EXTERN void INIT xz_dec_reset(struct xz_dec *s)
2874+{
2875+ s->sequence = SEQ_STREAM_HEADER;
2876+ s->allow_buf_error = false;
2877+ s->pos = 0;
2878+ s->crc32 = 0;
2879+ memzero(&s->block, sizeof(s->block));
2880+ memzero(&s->index, sizeof(s->index));
2881+ s->temp.pos = 0;
2882+ s->temp.size = STREAM_HEADER_SIZE;
2883+}
2884+
2885+/*
2886+ * xz_dec_run() is a wrapper for dec_main() to handle some special cases in
2887+ * multi-call and single-call decoding.
2888+ *
2889+ * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we
2890+ * are not going to make any progress anymore. This is to prevent the caller
2891+ * from calling us infinitely when the input file is truncated or otherwise
2892+ * corrupt. Since zlib-style API allows that the caller fills the input buffer
2893+ * only when the decoder doesn't produce any new output, we have to be careful
2894+ * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only
2895+ * after the second consecutive call to xz_dec_run() that makes no progress.
2896+ *
2897+ * In single-call mode, if we couldn't decode everything and no error
2898+ * occurred, either the input is truncated or the output buffer is too small.
2899+ * Since we know that the last input byte never produces any output, we know
2900+ * that if all the input was consumed and decoding wasn't finished, the file
2901+ * must be corrupt. Otherwise the output buffer has to be too small or the
2902+ * file is corrupt in a way that decoding it produces too big output.
2903+ *
2904+ * If single-call decoding fails, we reset b->in_pos and b->out_pos back to
2905+ * their original values. This is because with some filter chains there won't
2906+ * be any valid uncompressed data in the output buffer unless the decoding
2907+ * actually succeeds (that's the price to pay of using the output buffer as
2908+ * the workspace).
2909+ */
2910+XZ_EXTERN enum xz_ret INIT xz_dec_run(struct xz_dec *s, struct xz_buf *b)
2911+{
2912+ size_t in_start;
2913+ size_t out_start;
2914+ enum xz_ret ret;
2915+
2916+ if (DEC_IS_SINGLE(s->mode))
2917+ xz_dec_reset(s);
2918+
2919+ in_start = b->in_pos;
2920+ out_start = b->out_pos;
2921+ ret = dec_main(s, b);
2922+
2923+ if (DEC_IS_SINGLE(s->mode)) {
2924+ if (ret == XZ_OK)
2925+ ret = b->in_pos == b->in_size
2926+ ? XZ_DATA_ERROR : XZ_BUF_ERROR;
2927+
2928+ if (ret != XZ_STREAM_END) {
2929+ b->in_pos = in_start;
2930+ b->out_pos = out_start;
2931+ }
2932+
2933+ } else if (ret == XZ_OK && in_start == b->in_pos
2934+ && out_start == b->out_pos) {
2935+ if (s->allow_buf_error)
2936+ ret = XZ_BUF_ERROR;
2937+
2938+ s->allow_buf_error = true;
2939+ } else {
2940+ s->allow_buf_error = false;
2941+ }
2942+
2943+ return ret;
2944+}
2945+
2946+XZ_EXTERN struct xz_dec *INIT xz_dec_init(enum xz_mode mode, uint32_t dict_max)
2947+{
2948+ struct xz_dec *s = malloc(sizeof(*s));
2949+ if (s == NULL)
2950+ return NULL;
2951+
2952+ s->mode = mode;
2953+
2954+#ifdef XZ_DEC_BCJ
2955+ s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode));
2956+ if (s->bcj == NULL)
2957+ goto error_bcj;
2958+#endif
2959+
2960+ s->lzma2 = xz_dec_lzma2_create(mode, dict_max);
2961+ if (s->lzma2 == NULL)
2962+ goto error_lzma2;
2963+
2964+ xz_dec_reset(s);
2965+ return s;
2966+
2967+error_lzma2:
2968+#ifdef XZ_DEC_BCJ
2969+ xz_dec_bcj_end(s->bcj);
2970+error_bcj:
2971+#endif
2972+ free(s);
2973+ return NULL;
2974+}
2975+
2976+XZ_EXTERN void INIT xz_dec_end(struct xz_dec *s)
2977+{
2978+ if (s != NULL) {
2979+ xz_dec_lzma2_end(s->lzma2);
2980+#ifdef XZ_DEC_BCJ
2981+ xz_dec_bcj_end(s->bcj);
2982+#endif
2983+ free(s);
2984+ }
2985+}
2986diff --git a/xen/common/xz/lzma2.h b/xen/common/xz/lzma2.h
2987new file mode 100644
2988--- /dev/null
2989+++ b/xen/common/xz/lzma2.h
2990@@ -0,0 +1,204 @@
2991+/*
2992+ * LZMA2 definitions
2993+ *
2994+ * Authors: Lasse Collin <lasse.collin@tukaani.org>
2995+ * Igor Pavlov <http://7-zip.org/>
2996+ *
2997+ * This file has been put into the public domain.
2998+ * You can do whatever you want with this file.
2999+ */
3000+
3001+#ifndef XZ_LZMA2_H
3002+#define XZ_LZMA2_H
3003+
3004+/* Range coder constants */
3005+#define RC_SHIFT_BITS 8
3006+#define RC_TOP_BITS 24
3007+#define RC_TOP_VALUE (1 << RC_TOP_BITS)
3008+#define RC_BIT_MODEL_TOTAL_BITS 11
3009+#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS)
3010+#define RC_MOVE_BITS 5
3011+
3012+/*
3013+ * Maximum number of position states. A position state is the lowest pb
3014+ * number of bits of the current uncompressed offset. In some places there
3015+ * are different sets of probabilities for different position states.
3016+ */
3017+#define POS_STATES_MAX (1 << 4)
3018+
3019+/*
3020+ * This enum is used to track which LZMA symbols have occurred most recently
3021+ * and in which order. This information is used to predict the next symbol.
3022+ *
3023+ * Symbols:
3024+ * - Literal: One 8-bit byte
3025+ * - Match: Repeat a chunk of data at some distance
3026+ * - Long repeat: Multi-byte match at a recently seen distance
3027+ * - Short repeat: One-byte repeat at a recently seen distance
3028+ *
3029+ * The symbol names are in from STATE_oldest_older_previous. REP means
3030+ * either short or long repeated match, and NONLIT means any non-literal.
3031+ */
3032+enum lzma_state {
3033+ STATE_LIT_LIT,
3034+ STATE_MATCH_LIT_LIT,
3035+ STATE_REP_LIT_LIT,
3036+ STATE_SHORTREP_LIT_LIT,
3037+ STATE_MATCH_LIT,
3038+ STATE_REP_LIT,
3039+ STATE_SHORTREP_LIT,
3040+ STATE_LIT_MATCH,
3041+ STATE_LIT_LONGREP,
3042+ STATE_LIT_SHORTREP,
3043+ STATE_NONLIT_MATCH,
3044+ STATE_NONLIT_REP
3045+};
3046+
3047+/* Total number of states */
3048+#define STATES 12
3049+
3050+/* The lowest 7 states indicate that the previous state was a literal. */
3051+#define LIT_STATES 7
3052+
3053+/* Indicate that the latest symbol was a literal. */
3054+static inline void INIT lzma_state_literal(enum lzma_state *state)
3055+{
3056+ if (*state <= STATE_SHORTREP_LIT_LIT)
3057+ *state = STATE_LIT_LIT;
3058+ else if (*state <= STATE_LIT_SHORTREP)
3059+ *state -= 3;
3060+ else
3061+ *state -= 6;
3062+}
3063+
3064+/* Indicate that the latest symbol was a match. */
3065+static inline void INIT lzma_state_match(enum lzma_state *state)
3066+{
3067+ *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH;
3068+}
3069+
3070+/* Indicate that the latest state was a long repeated match. */
3071+static inline void INIT lzma_state_long_rep(enum lzma_state *state)
3072+{
3073+ *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP;
3074+}
3075+
3076+/* Indicate that the latest symbol was a short match. */
3077+static inline void INIT lzma_state_short_rep(enum lzma_state *state)
3078+{
3079+ *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP;
3080+}
3081+
3082+/* Test if the previous symbol was a literal. */
3083+static inline bool_t INIT lzma_state_is_literal(enum lzma_state state)
3084+{
3085+ return state < LIT_STATES;
3086+}
3087+
3088+/* Each literal coder is divided in three sections:
3089+ * - 0x001-0x0FF: Without match byte
3090+ * - 0x101-0x1FF: With match byte; match bit is 0
3091+ * - 0x201-0x2FF: With match byte; match bit is 1
3092+ *
3093+ * Match byte is used when the previous LZMA symbol was something else than
3094+ * a literal (that is, it was some kind of match).
3095+ */
3096+#define LITERAL_CODER_SIZE 0x300
3097+
3098+/* Maximum number of literal coders */
3099+#define LITERAL_CODERS_MAX (1 << 4)
3100+
3101+/* Minimum length of a match is two bytes. */
3102+#define MATCH_LEN_MIN 2
3103+
3104+/* Match length is encoded with 4, 5, or 10 bits.
3105+ *
3106+ * Length Bits
3107+ * 2-9 4 = Choice=0 + 3 bits
3108+ * 10-17 5 = Choice=1 + Choice2=0 + 3 bits
3109+ * 18-273 10 = Choice=1 + Choice2=1 + 8 bits
3110+ */
3111+#define LEN_LOW_BITS 3
3112+#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS)
3113+#define LEN_MID_BITS 3
3114+#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS)
3115+#define LEN_HIGH_BITS 8
3116+#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS)
3117+#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS)
3118+
3119+/*
3120+ * Maximum length of a match is 273 which is a result of the encoding
3121+ * described above.
3122+ */
3123+#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1)
3124+
3125+/*
3126+ * Different sets of probabilities are used for match distances that have
3127+ * very short match length: Lengths of 2, 3, and 4 bytes have a separate
3128+ * set of probabilities for each length. The matches with longer length
3129+ * use a shared set of probabilities.
3130+ */
3131+#define DIST_STATES 4
3132+
3133+/*
3134+ * Get the index of the appropriate probability array for decoding
3135+ * the distance slot.
3136+ */
3137+static inline uint32_t INIT lzma_get_dist_state(uint32_t len)
3138+{
3139+ return len < DIST_STATES + MATCH_LEN_MIN
3140+ ? len - MATCH_LEN_MIN : DIST_STATES - 1;
3141+}
3142+
3143+/*
3144+ * The highest two bits of a 32-bit match distance are encoded using six bits.
3145+ * This six-bit value is called a distance slot. This way encoding a 32-bit
3146+ * value takes 6-36 bits, larger values taking more bits.
3147+ */
3148+#define DIST_SLOT_BITS 6
3149+#define DIST_SLOTS (1 << DIST_SLOT_BITS)
3150+
3151+/* Match distances up to 127 are fully encoded using probabilities. Since
3152+ * the highest two bits (distance slot) are always encoded using six bits,
3153+ * the distances 0-3 don't need any additional bits to encode, since the
3154+ * distance slot itself is the same as the actual distance. DIST_MODEL_START
3155+ * indicates the first distance slot where at least one additional bit is
3156+ * needed.
3157+ */
3158+#define DIST_MODEL_START 4
3159+
3160+/*
3161+ * Match distances greater than 127 are encoded in three pieces:
3162+ * - distance slot: the highest two bits
3163+ * - direct bits: 2-26 bits below the highest two bits
3164+ * - alignment bits: four lowest bits
3165+ *
3166+ * Direct bits don't use any probabilities.
3167+ *
3168+ * The distance slot value of 14 is for distances 128-191.
3169+ */
3170+#define DIST_MODEL_END 14
3171+
3172+/* Distance slots that indicate a distance <= 127. */
3173+#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2)
3174+#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS)
3175+
3176+/*
3177+ * For match distances greater than 127, only the highest two bits and the
3178+ * lowest four bits (alignment) is encoded using probabilities.
3179+ */
3180+#define ALIGN_BITS 4
3181+#define ALIGN_SIZE (1 << ALIGN_BITS)
3182+#define ALIGN_MASK (ALIGN_SIZE - 1)
3183+
3184+/* Total number of all probability variables */
3185+#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE)
3186+
3187+/*
3188+ * LZMA remembers the four most recent match distances. Reusing these
3189+ * distances tends to take less space than re-encoding the actual
3190+ * distance value.
3191+ */
3192+#define REPS 4
3193+
3194+#endif
3195diff --git a/xen/common/xz/private.h b/xen/common/xz/private.h
3196new file mode 100644
3197--- /dev/null
3198+++ b/xen/common/xz/private.h
3199@@ -0,0 +1,271 @@
3200+/*
3201+ * Private includes and definitions
3202+ *
3203+ * Author: Lasse Collin <lasse.collin@tukaani.org>
3204+ *
3205+ * This file has been put into the public domain.
3206+ * You can do whatever you want with this file.
3207+ */
3208+
3209+#ifndef XZ_PRIVATE_H
3210+#define XZ_PRIVATE_H
3211+
3212+#include <xen/kernel.h>
3213+#include <asm/byteorder.h>
3214+#define get_le32(p) le32_to_cpup((const uint32_t *)(p))
3215+
3216+#if 1 /* ndef CONFIG_??? */
3217+static inline u32 INIT get_unaligned_le32(void *p)
3218+{
3219+ return le32_to_cpup(p);
3220+}
3221+
3222+static inline void INIT put_unaligned_le32(u32 val, void *p)
3223+{
3224+ *(__force __le32*)p = cpu_to_le32(val);
3225+}
3226+#else
3227+#include <asm/unaligned.h>
3228+
3229+static inline u32 INIT get_unaligned_le32(void *p)
3230+{
3231+ return le32_to_cpu(__get_unaligned(p, 4));
3232+}
3233+
3234+static inline void INIT put_unaligned_le32(u32 val, void *p)
3235+{
3236+ __put_unaligned(cpu_to_le32(val), p, 4);
3237+}
3238+#endif
3239+
3240+#define false 0
3241+#define true 1
3242+
3243+/**
3244+ * enum xz_mode - Operation mode
3245+ *
3246+ * @XZ_SINGLE: Single-call mode. This uses less RAM than
3247+ * than multi-call modes, because the LZMA2
3248+ * dictionary doesn't need to be allocated as
3249+ * part of the decoder state. All required data
3250+ * structures are allocated at initialization,
3251+ * so xz_dec_run() cannot return XZ_MEM_ERROR.
3252+ * @XZ_PREALLOC: Multi-call mode with preallocated LZMA2
3253+ * dictionary buffer. All data structures are
3254+ * allocated at initialization, so xz_dec_run()
3255+ * cannot return XZ_MEM_ERROR.
3256+ * @XZ_DYNALLOC: Multi-call mode. The LZMA2 dictionary is
3257+ * allocated once the required size has been
3258+ * parsed from the stream headers. If the
3259+ * allocation fails, xz_dec_run() will return
3260+ * XZ_MEM_ERROR.
3261+ *
3262+ * It is possible to enable support only for a subset of the above
3263+ * modes at compile time by defining XZ_DEC_SINGLE, XZ_DEC_PREALLOC,
3264+ * or XZ_DEC_DYNALLOC. The xz_dec kernel module is always compiled
3265+ * with support for all operation modes, but the preboot code may
3266+ * be built with fewer features to minimize code size.
3267+ */
3268+enum xz_mode {
3269+ XZ_SINGLE,
3270+ XZ_PREALLOC,
3271+ XZ_DYNALLOC
3272+};
3273+
3274+/**
3275+ * enum xz_ret - Return codes
3276+ * @XZ_OK: Everything is OK so far. More input or more
3277+ * output space is required to continue. This
3278+ * return code is possible only in multi-call mode
3279+ * (XZ_PREALLOC or XZ_DYNALLOC).
3280+ * @XZ_STREAM_END: Operation finished successfully.
3281+ * @XZ_UNSUPPORTED_CHECK: Integrity check type is not supported. Decoding
3282+ * is still possible in multi-call mode by simply
3283+ * calling xz_dec_run() again.
3284+ * Note that this return value is used only if
3285+ * XZ_DEC_ANY_CHECK was defined at build time,
3286+ * which is not used in the kernel. Unsupported
3287+ * check types return XZ_OPTIONS_ERROR if
3288+ * XZ_DEC_ANY_CHECK was not defined at build time.
3289+ * @XZ_MEM_ERROR: Allocating memory failed. This return code is
3290+ * possible only if the decoder was initialized
3291+ * with XZ_DYNALLOC. The amount of memory that was
3292+ * tried to be allocated was no more than the
3293+ * dict_max argument given to xz_dec_init().
3294+ * @XZ_MEMLIMIT_ERROR: A bigger LZMA2 dictionary would be needed than
3295+ * allowed by the dict_max argument given to
3296+ * xz_dec_init(). This return value is possible
3297+ * only in multi-call mode (XZ_PREALLOC or
3298+ * XZ_DYNALLOC); the single-call mode (XZ_SINGLE)
3299+ * ignores the dict_max argument.
3300+ * @XZ_FORMAT_ERROR: File format was not recognized (wrong magic
3301+ * bytes).
3302+ * @XZ_OPTIONS_ERROR: This implementation doesn't support the requested
3303+ * compression options. In the decoder this means
3304+ * that the header CRC32 matches, but the header
3305+ * itself specifies something that we don't support.
3306+ * @XZ_DATA_ERROR: Compressed data is corrupt.
3307+ * @XZ_BUF_ERROR: Cannot make any progress. Details are slightly
3308+ * different between multi-call and single-call
3309+ * mode; more information below.
3310+ *
3311+ * In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls
3312+ * to XZ code cannot consume any input and cannot produce any new output.
3313+ * This happens when there is no new input available, or the output buffer
3314+ * is full while at least one output byte is still pending. Assuming your
3315+ * code is not buggy, you can get this error only when decoding a compressed
3316+ * stream that is truncated or otherwise corrupt.
3317+ *
3318+ * In single-call mode, XZ_BUF_ERROR is returned only when the output buffer
3319+ * is too small or the compressed input is corrupt in a way that makes the
3320+ * decoder produce more output than the caller expected. When it is
3321+ * (relatively) clear that the compressed input is truncated, XZ_DATA_ERROR
3322+ * is used instead of XZ_BUF_ERROR.
3323+ */
3324+enum xz_ret {
3325+ XZ_OK,
3326+ XZ_STREAM_END,
3327+ XZ_UNSUPPORTED_CHECK,
3328+ XZ_MEM_ERROR,
3329+ XZ_MEMLIMIT_ERROR,
3330+ XZ_FORMAT_ERROR,
3331+ XZ_OPTIONS_ERROR,
3332+ XZ_DATA_ERROR,
3333+ XZ_BUF_ERROR
3334+};
3335+
3336+/**
3337+ * struct xz_buf - Passing input and output buffers to XZ code
3338+ * @in: Beginning of the input buffer. This may be NULL if and only
3339+ * if in_pos is equal to in_size.
3340+ * @in_pos: Current position in the input buffer. This must not exceed
3341+ * in_size.
3342+ * @in_size: Size of the input buffer
3343+ * @out: Beginning of the output buffer. This may be NULL if and only
3344+ * if out_pos is equal to out_size.
3345+ * @out_pos: Current position in the output buffer. This must not exceed
3346+ * out_size.
3347+ * @out_size: Size of the output buffer
3348+ *
3349+ * Only the contents of the output buffer from out[out_pos] onward, and
3350+ * the variables in_pos and out_pos are modified by the XZ code.
3351+ */
3352+struct xz_buf {
3353+ const uint8_t *in;
3354+ size_t in_pos;
3355+ size_t in_size;
3356+
3357+ uint8_t *out;
3358+ size_t out_pos;
3359+ size_t out_size;
3360+};
3361+
3362+/**
3363+ * struct xz_dec - Opaque type to hold the XZ decoder state
3364+ */
3365+struct xz_dec;
3366+
3367+/* If no specific decoding mode is requested, enable support for all modes. */
3368+#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \
3369+ && !defined(XZ_DEC_DYNALLOC)
3370+# define XZ_DEC_SINGLE
3371+# define XZ_DEC_PREALLOC
3372+# define XZ_DEC_DYNALLOC
3373+#endif
3374+
3375+/*
3376+ * The DEC_IS_foo(mode) macros are used in "if" statements. If only some
3377+ * of the supported modes are enabled, these macros will evaluate to true or
3378+ * false at compile time and thus allow the compiler to omit unneeded code.
3379+ */
3380+#ifdef XZ_DEC_SINGLE
3381+# define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE)
3382+#else
3383+# define DEC_IS_SINGLE(mode) (false)
3384+#endif
3385+
3386+#ifdef XZ_DEC_PREALLOC
3387+# define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC)
3388+#else
3389+# define DEC_IS_PREALLOC(mode) (false)
3390+#endif
3391+
3392+#ifdef XZ_DEC_DYNALLOC
3393+# define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC)
3394+#else
3395+# define DEC_IS_DYNALLOC(mode) (false)
3396+#endif
3397+
3398+#if !defined(XZ_DEC_SINGLE)
3399+# define DEC_IS_MULTI(mode) (true)
3400+#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC)
3401+# define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE)
3402+#else
3403+# define DEC_IS_MULTI(mode) (false)
3404+#endif
3405+
3406+/*
3407+ * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ.
3408+ * XZ_DEC_BCJ is used to enable generic support for BCJ decoders.
3409+ */
3410+#ifndef XZ_DEC_BCJ
3411+# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \
3412+ || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \
3413+ || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \
3414+ || defined(XZ_DEC_SPARC)
3415+# define XZ_DEC_BCJ
3416+# endif
3417+#endif
3418+
3419+/*
3420+ * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
3421+ * before calling xz_dec_lzma2_run().
3422+ */
3423+XZ_EXTERN struct xz_dec_lzma2 *xz_dec_lzma2_create(enum xz_mode mode,
3424+ uint32_t dict_max);
3425+
3426+/*
3427+ * Decode the LZMA2 properties (one byte) and reset the decoder. Return
3428+ * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not
3429+ * big enough, and XZ_OPTIONS_ERROR if props indicates something that this
3430+ * decoder doesn't support.
3431+ */
3432+XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s,
3433+ uint8_t props);
3434+
3435+/* Decode raw LZMA2 stream from b->in to b->out. */
3436+XZ_EXTERN enum xz_ret xz_dec_lzma2_run(struct xz_dec_lzma2 *s,
3437+ struct xz_buf *b);
3438+
3439+/* Free the memory allocated for the LZMA2 decoder. */
3440+XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s);
3441+
3442+#ifdef XZ_DEC_BCJ
3443+/*
3444+ * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before
3445+ * calling xz_dec_bcj_run().
3446+ */
3447+XZ_EXTERN struct xz_dec_bcj *xz_dec_bcj_create(bool_t single_call);
3448+
3449+/*
3450+ * Decode the Filter ID of a BCJ filter. This implementation doesn't
3451+ * support custom start offsets, so no decoding of Filter Properties
3452+ * is needed. Returns XZ_OK if the given Filter ID is supported.
3453+ * Otherwise XZ_OPTIONS_ERROR is returned.
3454+ */
3455+XZ_EXTERN enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id);
3456+
3457+/*
3458+ * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is
3459+ * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run()
3460+ * must be called directly.
3461+ */
3462+XZ_EXTERN enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s,
3463+ struct xz_dec_lzma2 *lzma2,
3464+ struct xz_buf *b);
3465+
3466+/* Free the memory allocated for the BCJ filters. */
3467+#define xz_dec_bcj_end(s) free(s)
3468+#endif
3469+
3470+#endif
3471diff --git a/xen/common/xz/stream.h b/xen/common/xz/stream.h
3472new file mode 100644
3473--- /dev/null
3474+++ b/xen/common/xz/stream.h
3475@@ -0,0 +1,55 @@
3476+/*
3477+ * Definitions for handling the .xz file format
3478+ *
3479+ * Author: Lasse Collin <lasse.collin@tukaani.org>
3480+ *
3481+ * This file has been put into the public domain.
3482+ * You can do whatever you want with this file.
3483+ */
3484+
3485+#ifndef XZ_STREAM_H
3486+#define XZ_STREAM_H
3487+
3488+/*
3489+ * See the .xz file format specification at
3490+ * http://tukaani.org/xz/xz-file-format.txt
3491+ * to understand the container format.
3492+ */
3493+
3494+#define STREAM_HEADER_SIZE 12
3495+
3496+#define HEADER_MAGIC "\3757zXZ"
3497+#define HEADER_MAGIC_SIZE 6
3498+
3499+#define FOOTER_MAGIC "YZ"
3500+#define FOOTER_MAGIC_SIZE 2
3501+
3502+/*
3503+ * Variable-length integer can hold a 63-bit unsigned integer or a special
3504+ * value indicating that the value is unknown.
3505+ *
3506+ * Experimental: vli_type can be defined to uint32_t to save a few bytes
3507+ * in code size (no effect on speed). Doing so limits the uncompressed and
3508+ * compressed size of the file to less than 256 MiB and may also weaken
3509+ * error detection slightly.
3510+ */
3511+typedef uint64_t vli_type;
3512+
3513+#define VLI_MAX ((vli_type)-1 / 2)
3514+#define VLI_UNKNOWN ((vli_type)-1)
3515+
3516+/* Maximum encoded size of a VLI */
3517+#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
3518+
3519+/* Integrity Check types */
3520+enum xz_check {
3521+ XZ_CHECK_NONE = 0,
3522+ XZ_CHECK_CRC32 = 1,
3523+ XZ_CHECK_CRC64 = 4,
3524+ XZ_CHECK_SHA256 = 10
3525+};
3526+
3527+/* Maximum possible Check ID */
3528+#define XZ_CHECK_MAX 15
3529+
3530+#endif
3531diff --git a/xen/include/xen/decompress.h b/xen/include/xen/decompress.h
3532--- a/xen/include/xen/decompress.h
3533+++ b/xen/include/xen/decompress.h
3534@@ -31,7 +31,7 @@
3535 * dependent).
3536 */
3537
3538-decompress_fn bunzip2, unlzma, unlzo;
3539+decompress_fn bunzip2, unxz, unlzma, unlzo;
3540
a1ea1ec2
JR
3541 int decompress(void *inbuf, unsigned int len, void *outbuf);
3542
70f68138
JR
3543--- a/tools/libxc/xc_dom_bzimageloader.c 2011-10-20 19:05:42.000000000 +0200
3544+++ b/tools/libxc/xc_dom_bzimageloader.c 2012-03-04 23:34:53.797635804 +0100
3545@@ -163,11 +163,10 @@
3546
3547 #include <lzma.h>
3548
3549-static int xc_try_lzma_decode(
3550- struct xc_dom_image *dom, void **blob, size_t *size)
3551+static int _xc_try_lzma_decode(
3552+ struct xc_dom_image *dom, void **blob, size_t *size,
3553+ lzma_stream *stream, lzma_ret ret, const char *what)
3554 {
3555- lzma_stream stream = LZMA_STREAM_INIT;
3556- lzma_ret ret;
3557 lzma_action action = LZMA_RUN;
3558 unsigned char *out_buf;
3559 unsigned char *tmp_buf;
3560@@ -175,10 +174,9 @@
3561 int outsize;
3562 const char *msg;
3563
3564- ret = lzma_alone_decoder(&stream, 128*1024*1024);
3565 if ( ret != LZMA_OK )
3566 {
3567- DOMPRINTF("LZMA: Failed to init stream decoder");
3568+ DOMPRINTF("%s: Failed to init decoder", what);
3569 return -1;
3570 }
3571
3572@@ -190,22 +188,22 @@
3573 out_buf = malloc(outsize);
3574 if ( out_buf == NULL )
3575 {
3576- DOMPRINTF("LZMA: Failed to alloc memory");
3577+ DOMPRINTF("%s: Failed to alloc memory", what);
3578 goto lzma_cleanup;
3579 }
3580
3581- stream.next_in = dom->kernel_blob;
3582- stream.avail_in = dom->kernel_size;
3583+ stream->next_in = dom->kernel_blob;
3584+ stream->avail_in = dom->kernel_size;
3585
3586- stream.next_out = out_buf;
3587- stream.avail_out = dom->kernel_size;
3588+ stream->next_out = out_buf;
3589+ stream->avail_out = dom->kernel_size;
3590
3591 for ( ; ; )
3592 {
3593- ret = lzma_code(&stream, action);
3594+ ret = lzma_code(stream, action);
3595 if ( ret == LZMA_STREAM_END )
3596 {
3597- DOMPRINTF("LZMA: Saw data stream end");
3598+ DOMPRINTF("%s: Saw data stream end", what);
3599 retval = 0;
3600 break;
3601 }
3602@@ -242,18 +240,18 @@
3603 msg = "Internal program error (bug)";
3604 break;
3605 }
3606- DOMPRINTF("%s: LZMA decompression error %s",
3607- __FUNCTION__, msg);
3608+ DOMPRINTF("%s: %s decompression error %s",
3609+ __FUNCTION__, what, msg);
3610 free(out_buf);
3611 goto lzma_cleanup;
3612 }
3613
3614- if ( stream.avail_out == 0 )
3615+ if ( stream->avail_out == 0 )
3616 {
3617 /* Protect against output buffer overflow */
3618 if ( outsize > INT_MAX / 2 )
3619 {
3620- DOMPRINTF("LZMA: output buffer overflow");
3621+ DOMPRINTF("%s: output buffer overflow", what);
3622 free(out_buf);
3623 goto lzma_cleanup;
3624 }
3625@@ -261,32 +259,61 @@
3626 tmp_buf = realloc(out_buf, outsize * 2);
3627 if ( tmp_buf == NULL )
3628 {
3629- DOMPRINTF("LZMA: Failed to realloc memory");
3630+ DOMPRINTF("%s: Failed to realloc memory", what);
3631 free(out_buf);
3632 goto lzma_cleanup;
3633 }
3634 out_buf = tmp_buf;
3635
3636- stream.next_out = out_buf + outsize;
3637- stream.avail_out = (outsize * 2) - outsize;
3638+ stream->next_out = out_buf + outsize;
3639+ stream->avail_out = (outsize * 2) - outsize;
3640 outsize *= 2;
3641 }
3642 }
3643
3644- DOMPRINTF("%s: LZMA decompress OK, 0x%zx -> 0x%zx",
3645- __FUNCTION__, *size, (size_t)stream.total_out);
3646+ DOMPRINTF("%s: %s decompress OK, 0x%zx -> 0x%zx",
3647+ __FUNCTION__, what, *size, (size_t)stream->total_out);
3648
3649 *blob = out_buf;
3650- *size = stream.total_out;
3651+ *size = stream->total_out;
3652
3653 lzma_cleanup:
3654- lzma_end(&stream);
3655+ lzma_end(stream);
3656
3657 return retval;
3658 }
3659
3660+/* 128 Mb is the minimum size (half-way) documented to work for all inputs. */
3661+#define LZMA_BLOCK_SIZE (128*1024*1024)
3662+
3663+static int xc_try_xz_decode(
3664+ struct xc_dom_image *dom, void **blob, size_t *size)
3665+{
3666+ lzma_stream stream = LZMA_STREAM_INIT;
3667+ lzma_ret ret = lzma_stream_decoder(&stream, LZMA_BLOCK_SIZE, 0);
3668+
3669+ return _xc_try_lzma_decode(dom, blob, size, &stream, ret, "XZ");
3670+}
3671+
3672+static int xc_try_lzma_decode(
3673+ struct xc_dom_image *dom, void **blob, size_t *size)
3674+{
3675+ lzma_stream stream = LZMA_STREAM_INIT;
3676+ lzma_ret ret = lzma_alone_decoder(&stream, LZMA_BLOCK_SIZE);
3677+
3678+ return _xc_try_lzma_decode(dom, blob, size, &stream, ret, "LZMA");
3679+}
3680+
3681 #else /* !defined(HAVE_LZMA) */
3682
3683+static int xc_try_xz_decode(
3684+ struct xc_dom_image *dom, void **blob, size_t *size)
3685+{
3686+ DOMPRINTF("%s: XZ decompress support unavailable",
3687+ __FUNCTION__);
3688+ return -1;
3689+}
3690+
3691 static int xc_try_lzma_decode(
3692 struct xc_dom_image *dom, void **blob, size_t *size)
3693 {
3694@@ -607,6 +634,17 @@
3695 __FUNCTION__);
3696 return -EINVAL;
3697 }
3698+ }
37bcb228 3699+ else if ( check_magic(dom, "\3757zXZ", 6) )
70f68138
JR
3700+ {
3701+ ret = xc_try_xz_decode(dom, &dom->kernel_blob, &dom->kernel_size);
3702+ if ( ret < 0 )
3703+ {
3704+ xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
3705+ "%s unable to XZ decompress kernel",
3706+ __FUNCTION__);
3707+ return -EINVAL;
3708+ }
3709 }
3710 else if ( check_magic(dom, "\135\000", 2) )
3711 {
This page took 0.845333 seconds and 4 git commands to generate.