xz_embedded_sys/
lib.rs

1#![allow(non_camel_case_types)]
2
3//! xz-embedded-sys
4//!
5//! FFI Bindings to the xz-embedded library, which is a simple xz decompression library
6//!
7//! The documentation in this crate is copied almost verbatim from the xz-embedded header file, and
8//! so there might be some C-isms that aren't applicable to this rust crate.  Please read
9//! carefully.
10//!
11
12extern crate libc;
13use libc::{
14    size_t
15};
16
17
18/// A wrapper around xz_ret
19#[derive(Debug)]
20pub struct XZRawError {
21    pub code: xz_ret
22}
23
24impl std::fmt::Display for XZRawError {
25    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
26        write!(f, "XZRawError: {:?}", self.code)
27    }
28}
29
30impl std::convert::From<xz_ret> for XZRawError {
31    fn from(e: xz_ret) -> XZRawError {
32        XZRawError{code: e}
33    }
34}
35
36impl std::error::Error for XZRawError {
37    fn description(&self) -> &str { 
38        match self.code {
39            xz_ret::XZ_OK => "Everything is OK so far",
40            xz_ret::XZ_STREAM_END => "Operation finished successfully",
41            xz_ret::XZ_UNSUPPORTED_CHECK => "Integrity check type is not supported",
42            xz_ret::XZ_MEM_ERROR => "Allocating memory failed",
43            xz_ret::XZ_MEMLIMIT_ERROR => "A bigger LZMA2 dictionary is needed than allowed by dict_max",
44            xz_ret::XZ_FORMAT_ERROR => "File format was not recognized",
45            xz_ret::XZ_OPTIONS_ERROR => "This implementation doesn't support the requested compression options",
46            xz_ret::XZ_DATA_ERROR => "Compressed data is corrupt",
47            xz_ret::XZ_BUF_ERROR => "Cannot make any progress"
48        }
49    }
50}
51
52/// Operation mode
53///
54/// It is possible to enable support only for a subset of the above
55/// modes at compile time by defining XZ_DEC_SINGLE, XZ_DEC_PREALLOC,
56/// or XZ_DEC_DYNALLOC. The xz_dec kernel module is always compiled
57/// with support for all operation modes, but the preboot code may
58/// be built with fewer features to minimize code size.
59#[derive(Debug, Clone, Copy, PartialEq)]
60#[repr(C)]
61pub enum xz_mode {
62    /// Single-call mode.
63    ///
64    /// This uses less RAM than than multi-call modes, because the LZMA2 dictionary doesn't need to
65    /// be allocated as part of the decoder state. All required data structures are allocated at
66    /// initialization, so xz_dec_run() cannot return XZ_MEM_ERROR.
67	XZ_SINGLE,
68
69    /// Multi-call mode with preallocated LZMA2 dictionary buffer.
70    ///
71    /// All data structures are allocated at initialization, so xz_dec_run() cannot return
72    /// XZ_MEM_ERROR.
73	XZ_PREALLOC,
74
75    /// Multi-call mode.
76    ///
77    /// The LZMA2 dictionary is allocated once the required size has been parsed from the stream
78    /// headers. If the allocation fails, xz_dec_run() will return XZ_MEM_ERROR.
79	XZ_DYNALLOC
80}
81
82/// Return codes
83///
84/// In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls
85/// to XZ code cannot consume any input and cannot produce any new output.
86/// This happens when there is no new input available, or the output buffer
87/// is full while at least one output byte is still pending. Assuming your
88/// code is not buggy, you can get this error only when decoding a compressed
89/// stream that is truncated or otherwise corrupt.
90///                                                                           
91/// In single-call mode, XZ_BUF_ERROR is returned only when the output buffer
92/// is too small or the compressed input is corrupt in a way that makes the
93/// decoder produce more output than the caller expected. When it is
94/// (relatively) clear that the compressed input is truncated, XZ_DATA_ERROR
95/// is used instead of XZ_BUF_ERROR.
96#[derive(Debug, Clone, Copy, PartialEq)]
97#[repr(C)]
98pub enum xz_ret {
99
100    /// Everything is OK so far. More input or more
101    /// output space is required to continue. This
102    /// return code is possible only in multi-call mode
103    /// (XZ_PREALLOC or XZ_DYNALLOC).
104	XZ_OK,
105
106    /// Operation finished successfully.
107	XZ_STREAM_END,
108
109    /// Integrity check type is not supported. Decoding
110    /// is still possible in multi-call mode by simply
111    /// calling xz_dec_run() again.
112    /// Note that this return value is used only if
113    /// XZ_DEC_ANY_CHECK was defined at build time,
114    /// which is not used in the kernel. Unsupported
115    /// check types return XZ_OPTIONS_ERROR if
116    /// XZ_DEC_ANY_CHECK was not defined at build time.
117	XZ_UNSUPPORTED_CHECK,
118
119    /// Allocating memory failed. This return code is
120    /// possible only if the decoder was initialized
121    /// with XZ_DYNALLOC. The amount of memory that was
122    /// tried to be allocated was no more than the
123    /// dict_max argument given to xz_dec_init().
124	XZ_MEM_ERROR,
125
126    /// A bigger LZMA2 dictionary would be needed than
127    /// allowed by the dict_max argument given to
128    /// xz_dec_init(). This return value is possible
129    /// only in multi-call mode (XZ_PREALLOC or
130    /// XZ_DYNALLOC); the single-call mode (XZ_SINGLE)
131    /// ignores the dict_max argument.
132	XZ_MEMLIMIT_ERROR,
133
134    /// File format was not recognized (wrong magic
135    /// bytes).
136	XZ_FORMAT_ERROR,
137
138    /// This implementation doesn't support the requested
139    /// compression options. In the decoder this means
140    /// that the header CRC32 matches, but the header
141    /// itself specifies something that we don't support.
142	XZ_OPTIONS_ERROR,
143
144    /// Compressed data is corrupt.
145	XZ_DATA_ERROR,
146
147    /// Cannot make any progress. Details are slightly
148    /// different between multi-call and single-call
149    /// mode; more information below.
150	XZ_BUF_ERROR
151}
152
153///  Passing input and output buffers to XZ code
154///
155///
156#[repr(C)]
157pub struct xz_buf {
158    /// Beginning of the input buffer. This may be NULL if and only
159    /// if in_pos is equal to in_size.
160    pub _in: *const u8,
161    /// Current position in the input buffer. This must not exceed
162    /// in_size.
163    pub in_pos: size_t,
164    /// Size of the input buffer
165    ///
166    pub in_size: size_t,
167
168    /// Beginning of the output buffer. This may be NULL if and only
169    /// if out_pos is equal to out_size.
170    pub out: *mut u8,
171    /// Current position in the output buffer. This must not exceed
172    /// out_size.
173    pub out_pos: size_t,
174    /// Size of the output buffer
175    pub out_size: size_t
176}
177
178/// Opaque type to hold the XZ decoder state
179pub enum xz_dec {}
180
181extern "C" {
182    /// Allocate and initialize a XZ decoder state
183    ///
184    /// @mode: Operation mode
185    ///
186    /// @dict_max: Maximum size of the LZMA2 dictionary (history buffer) for
187    /// multi-call decoding. This is ignored in single-call mode
188    /// (mode == XZ_SINGLE). LZMA2 dictionary is always 2^n bytes
189    /// or 2^n + 2^(n-1) bytes (the latter sizes are less common
190    /// in practice), so other values for dict_max don't make sense.
191    /// In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB,
192    /// 512 KiB, and 1 MiB are probably the only reasonable values,
193    /// except for kernel and initramfs images where a bigger
194    /// dictionary can be fine and useful.
195    ///
196    /// Single-call mode (XZ_SINGLE): xz_dec_run() decodes the whole stream at
197    /// once. The caller must provide enough output space or the decoding will
198    /// fail. The output space is used as the dictionary buffer, which is why
199    /// there is no need to allocate the dictionary as part of the decoder's
200    /// internal state.
201    ///
202    /// Because the output buffer is used as the workspace, streams encoded using
203    /// a big dictionary are not a problem in single-call mode. It is enough that
204    /// the output buffer is big enough to hold the actual uncompressed data; it
205    /// can be smaller than the dictionary size stored in the stream headers.
206    ///
207    /// Multi-call mode with preallocated dictionary (XZ_PREALLOC): dict_max bytes
208    /// of memory is preallocated for the LZMA2 dictionary. This way there is no
209    /// risk that xz_dec_run() could run out of memory, since xz_dec_run() will
210    /// never allocate any memory. Instead, if the preallocated dictionary is too
211    /// small for decoding the given input stream, xz_dec_run() will return
212    /// XZ_MEMLIMIT_ERROR. Thus, it is important to know what kind of data will be
213    /// decoded to avoid allocating excessive amount of memory for the dictionary.
214    ///
215    /// Multi-call mode with dynamically allocated dictionary (XZ_DYNALLOC):
216    /// dict_max specifies the maximum allowed dictionary size that xz_dec_run()
217    /// may allocate once it has parsed the dictionary size from the stream
218    /// headers. This way excessive allocations can be avoided while still
219    /// limiting the maximum memory usage to a sane value to prevent running the
220    /// system out of memory when decompressing streams from untrusted sources.
221    ///
222    /// On success, xz_dec_init() returns a pointer to struct xz_dec, which is
223    /// ready to be used with xz_dec_run(). If memory allocation fails,
224    /// xz_dec_init() returns NULL.
225    pub fn xz_dec_init(mode: xz_mode, dict_max: u32) -> *mut xz_dec;
226
227    /// Run the XZ decoder
228    ///
229    /// @s:          Decoder state allocated using xz_dec_init()
230    ///
231    /// @b:          Input and output buffers
232    ///
233    ///
234    /// The possible return values depend on build options and operation mode.
235    /// See enum xz_ret for details.
236    ///
237    /// Note that if an error occurs in single-call mode (return value is not
238    /// XZ_STREAM_END), b->in_pos and b->out_pos are not modified and the
239    /// contents of the output buffer from b->out[b->out_pos] onward are
240    /// undefined. This is true even after XZ_BUF_ERROR, because with some filter
241    /// chains, there may be a second pass over the output buffer, and this pass
242    /// cannot be properly done if the output buffer is truncated. Thus, you
243    /// cannot give the single-call decoder a too small buffer and then expect to
244    /// get that amount valid data from the beginning of the stream. You must use
245    /// the multi-call decoder if you don't want to uncompress the whole stream.
246    pub fn xz_dec_run(s: *mut xz_dec, b: *mut xz_buf) -> xz_ret;
247
248    /// Reset an already allocated decoder state
249    ///
250    /// @s:          Decoder state allocated using xz_dec_init()
251    ///
252    /// This function can be used to reset the multi-call decoder state without
253    /// freeing and reallocating memory with xz_dec_end() and xz_dec_init().
254    ///                                                                          
255    /// In single-call mode, xz_dec_reset() is always called in the beginning of
256    /// xz_dec_run().  Thus, explicit call to xz_dec_reset() is useful only in
257    /// multi-call mode.
258    pub fn xz_dec_reset(s: *mut xz_dec);
259
260    /// Free the memory allocated for the decoder state
261    ///
262    /// @s: Decoder state allocated using xz_dec_init(). If s is NULL, this function does nothing.
263    pub fn xz_dec_end(s: *mut xz_dec);
264
265
266    /// Initialize the CRC32 lookup table
267    ///
268    /// This must be called before any other xz_* function to initialize
269    /// the CRC32 lookup table.
270    pub fn xz_crc32_init();
271
272    /// Update CRC32 value using the polynomial from IEEE-802.3.
273    ///
274    /// To start a new calculation, the third argument must be zero. To continue the calculation,
275    /// the previously returned value is passed as the third argument.
276    pub fn xz_crc32(buf: *const u8, size: size_t, crc: u32) -> u32;
277
278
279    /// Initialize the CRC64 lookup table
280    ///
281    /// This must be called before any other xz_* function (except xz_crc32_init())
282    /// to initialize the CRC64 lookup table.
283    pub fn xz_crc64_init();
284
285    /// Update CRC64 value using the polynomial from ECMA-182.
286    ///
287    /// To start a new calculation, the third argument must be zero. To continue the calculation,
288    /// the previously returned value is passed as the third argument.
289    pub fn xz_crc64(buf: *const u8, size: size_t, crc: u64) -> u64;
290
291}
292
293
294
295#[test]
296fn test_full_hello_decompress() {
297    let data: Vec<u8> = vec!(
298        0xfd,0x37,0x7a,0x58,0x5a,0x00,0x00,0x04,0xe6,0xd6,0xb4,0x46,0x02,0x00,0x21,0x01,
299        0x16,0x00,0x00,0x00,0x74,0x2f,0xe5,0xa3,0x01,0x00,0x04,0x68,0x65,0x6c,0x6c,0x6f,
300        0x00,0x00,0x00,0x00,0xb1,0x37,0xb9,0xdb,0xe5,0xda,0x1e,0x9b,0x00,0x01,0x1d,0x05,
301        0xb8,0x2d,0x80,0xaf,0x1f,0xb6,0xf3,0x7d,0x01,0x00,0x00,0x00,0x00,0x04,0x59,0x5a
302    );
303    unsafe {
304        xz_crc32_init();
305        xz_crc64_init();
306
307        let state = xz_dec_init(xz_mode::XZ_DYNALLOC, 1 << 26);
308  
309        let mut out_buf: [u8; 32] = [0; 32];
310        let in_buf = data;
311
312        let mut buf = xz_buf {
313            _in: in_buf.as_ptr(),
314            in_size: in_buf.len() as u64,
315            in_pos:0,
316
317            out: out_buf.as_mut_ptr(),
318            out_pos: 0,
319            out_size: 32,
320            
321        };
322
323        let ret = xz_dec_run(state, &mut buf);
324        println!("ret={:?}", ret);
325        println!("out_pos: {}", buf.out_pos);
326        println!("out_size: {}", buf.out_size);
327        let mut v = Vec::from(&out_buf[..]);
328        v.truncate(buf.out_pos as usize);
329        println!("in_pos: {}", buf.in_pos);
330        xz_dec_end(state);
331        
332        assert_eq!(ret, xz_ret::XZ_STREAM_END);
333        assert_eq!(buf.out_pos, 5);
334        assert_eq!(buf.in_size, buf.in_pos);
335        assert_eq!(v, "hello".as_bytes());
336
337    }
338}
339
340
341
342#[test]
343fn test_partial_hello_decompress() {
344    let data: Vec<u8> = vec!(
345        0xfd,0x37,0x7a,0x58,0x5a,0x00,0x00,0x04,0xe6,0xd6,0xb4,0x46,0x02,0x00,0x21,0x01,
346        0x16,0x00,0x00,0x00,0x74,0x2f,0xe5,0xa3,0x01,0x00,0x04,0x68,0x65,0x6c,0x6c,0x6f,
347        0x00,0x00,0x00,0x00,0xb1,0x37,0xb9,0xdb,0xe5,0xda,0x1e,0x9b,0x00,0x01,0x1d,0x05,
348        0xb8,0x2d,0x80,0xaf,0x1f,0xb6,0xf3,0x7d,0x01,0x00,0x00,0x00,0x00,0x04,0x59,0x5a
349    );
350    unsafe {
351        xz_crc32_init();
352        xz_crc64_init();
353
354        let state = xz_dec_init(xz_mode::XZ_DYNALLOC, 1 << 26);
355  
356        let mut out_buf: [u8; 32] = [0; 32];
357        let in_buf = data;
358
359        let mut buf = xz_buf {
360            _in: in_buf.as_ptr(),
361            in_size: in_buf.len() as u64,
362            in_pos:0,
363
364            out: out_buf.as_mut_ptr(),
365            out_pos: 0,
366            out_size: 2,
367            // set out_size to be smaller than "hello", so that two calls to xz_dec_run are needed 
368        };
369
370        let ret = xz_dec_run(state, &mut buf);
371        println!("ret={:?}", ret);
372        println!("out_pos: {}", buf.out_pos);
373        println!("out_size: {}", buf.out_size);
374        let mut v = Vec::from(&out_buf[..]);
375        v.truncate(buf.out_pos as usize);
376        println!("in_pos: {}", buf.in_pos);
377        
378        assert_eq!(ret, xz_ret::XZ_OK);
379        assert_eq!(buf.out_pos, 2);
380        assert_eq!(v, "he".as_bytes());
381
382        buf.out_size = 5;
383        let ret = xz_dec_run(state, &mut buf);
384        println!("ret={:?}", ret);
385        assert_eq!(ret, xz_ret::XZ_STREAM_END);
386        let mut v = Vec::from(&out_buf[..]);
387        v.truncate(buf.out_pos as usize);
388        assert_eq!(v, "hello".as_bytes());
389
390
391
392    }
393}