delharc/
decode.rs

1//! # Decoding algorithms.
2use core::fmt;
3use crate::error::{LhaResult, LhaError};
4use crate::stub_io::{Read, Take, discard_to_end};
5
6use crate::crc::Crc16;
7use crate::header::{CompressionMethod, LhaHeader};
8
9#[cfg(feature = "lz")]
10mod lzs;
11#[cfg(feature = "lz")]
12mod lz5;
13#[cfg(feature = "lh1")]
14mod lhv1;
15mod lhv2;
16
17#[cfg(feature = "lz")]
18pub use lzs::*;
19#[cfg(feature = "lz")]
20pub use lz5::*;
21#[cfg(feature = "lh1")]
22pub use lhv1::*;
23pub use lhv2::*;
24
25/// The trait implemented by decoders.
26pub trait Decoder<R> {
27    type Error: fmt::Debug;
28    /// Unwraps and returns the inner reader.
29    fn into_inner(self) -> R;
30    /// Fills the whole `buf` with decoded data.
31    ///
32    /// The caller should be aware of how large buffer can be provided to not exceed the size
33    /// of the decompressed file. Otherwise it will most likely result in an unexpected EOF error.
34    fn fill_buffer(&mut self, buf: &mut[u8]) -> Result<(), LhaError<Self::Error>>;
35}
36
37/// `LhaDecodeReader` provides a convenient way to parse and decode LHA/LZH files.
38///
39/// To read the current archived file's content use the [`std::io::Read`] trait methods on the instance
40/// of this type. After reading the whole file (until EOF), the calculated checksum should be verified
41/// using [`LhaDecodeReader::crc_check`].
42///
43/// To parse and decode the next archive file, invoke [`LhaDecodeReader::next_file`].
44///
45/// After parsing the LHA header, a decompressed content of a file can be simply read from the
46/// `LhaDecodeReader<R>`, which decompresses it using a proper decoder, designated in the header,
47/// while reading data from the underlying stream.
48///
49/// If the compression method is not supported by the decoder, but otherwise the header has been parsed
50/// successfully, invoke [`LhaDecodeReader::is_decoder_supported`] to ensure you can actually read the file.
51/// Otherwise, trying to read from an unsupported decoder will result in an error.
52///
53/// # `no_std`
54/// Without the `std` feature in the absence of `std::io` the crate's [`Read`] trait methods should
55/// be used instead to read the content of the decompressed files.
56#[derive(Debug)]
57pub struct LhaDecodeReader<R> {
58    header: LhaHeader,
59    crc: Crc16,
60    output_length: u64,
61    decoder: Option<DecoderAny<Take<R>>>
62}
63
64/// An empty decoder for storage only methods.
65#[derive(Debug)]
66pub struct PassthroughDecoder<R> {
67    inner: R
68}
69
70/// A decoder used when compression method is unsupported.
71/// Reading from it will always produce an error.
72#[derive(Debug)]
73pub struct UnsupportedDecoder<R> {
74    inner: R
75}
76
77/// An error returned from methods of [LhaDecodeReader].
78///
79/// The error contains a stream source that can be accessed or unwrapped.
80///
81/// Alternatively, the error can be converted to the underlying [LhaError] using [From]
82/// trait, thus discarding the contained stream.
83pub struct LhaDecodeError<R: Read> {
84    read: R,
85    source: LhaError<R::Error>
86}
87
88#[non_exhaustive]
89#[derive(Debug)]
90pub enum DecoderAny<R> {
91    PassthroughDecoder(PassthroughDecoder<R>),
92    UnsupportedDecoder(UnsupportedDecoder<R>),
93    #[cfg(feature = "lz")]
94    LzsDecoder(LzsDecoder<R>),
95    #[cfg(feature = "lz")]
96    Lz5Decoder(Lz5Decoder<R>),
97    #[cfg(feature = "lh1")]
98    Lh1Decoder(Lh1Decoder<R>),
99    Lh4Decoder(Lh5Decoder<R>),
100    Lh5Decoder(Lh5Decoder<R>),
101    Lh6Decoder(Lh7Decoder<R>),
102    Lh7Decoder(Lh7Decoder<R>),
103    #[cfg(feature = "lhx")]
104    LhxDecoder(LhxDecoder<R>),
105}
106
107macro_rules! decoder_any_dispatch {
108    (($model:expr)($($spec:tt)*) => $expr:expr) => {
109        match $model {
110            DecoderAny::PassthroughDecoder($($spec)*) => $expr,
111            DecoderAny::UnsupportedDecoder($($spec)*) => $expr,
112            #[cfg(feature = "lz")]
113            DecoderAny::LzsDecoder($($spec)*) => $expr,
114            #[cfg(feature = "lz")]
115            DecoderAny::Lz5Decoder($($spec)*) => $expr,
116            #[cfg(feature = "lh1")]
117            DecoderAny::Lh1Decoder($($spec)*) => $expr,
118            DecoderAny::Lh4Decoder($($spec)*)|
119            DecoderAny::Lh5Decoder($($spec)*) => $expr,
120            DecoderAny::Lh6Decoder($($spec)*)|
121            DecoderAny::Lh7Decoder($($spec)*) => $expr,
122            #[cfg(feature = "lhx")]
123            DecoderAny::LhxDecoder($($spec)*) => $expr,
124        }
125    };
126}
127
128/// A default implementation creates an instance of `LhaDecodeReader<R>` with no reader present and
129/// with a phony header.
130impl<R: Read> Default for LhaDecodeReader<R> {
131    fn default() -> Self {
132        LhaDecodeReader {
133            header: Default::default(),
134            crc: Crc16::default(),
135            output_length: 0,
136            decoder: None
137        }
138    } 
139}
140
141impl<R: Read> LhaDecodeReader<R> where R::Error: fmt::Debug {
142    /// Return a new instance of `LhaDecodeReader<R>` after reading and parsing the first header from source.
143    ///
144    /// Provide a stream reader as `rd`.
145    ///
146    /// # Errors
147    /// Return an error if the header could not be read or parsed.
148    pub fn new(mut rd: R) -> Result<LhaDecodeReader<R>, LhaDecodeError<R>> {
149        let header = match LhaHeader::read(rd.by_ref()).and_then(|h|
150                        h.ok_or_else(|| LhaError::HeaderParse("a header is missing"))
151                    )
152        {
153            Ok(h) => h,
154            Err(e) => return Err(wrap_err(rd, e))
155        };
156        let decoder = DecoderAny::new_from_header(&header, rd);
157        let crc = Crc16::default();
158        Ok(LhaDecodeReader {
159            header,
160            crc,
161            output_length: 0,
162            decoder: Some(decoder)
163        })
164    }
165    /// Attempt to read the first file header from a new source stream and initialize a decoder returning
166    /// `Ok(true)` on success. Return `Ok(false)` if there are no more headers in the stream.
167    ///
168    /// Provide a stream reader as `rd`.
169    ///
170    /// When `Ok` is returned, regardles of the retuned boolean value, the inner reader is being always
171    /// replaced with the given `rd`.
172    ///
173    /// When `Ok(false)` has been returned, trying to read from the decoder will result in an error.
174    ///
175    /// # Errors
176    /// Returns an error if the header could not be read or parsed. In this instance the inner stream
177    /// reader is not being replaced by a new one and the provided source stream can be retrieved from
178    /// the returned error.
179    pub fn begin_new(&mut self, mut rd: R) -> Result<bool, LhaDecodeError<R>> {
180        let res = match LhaHeader::read(rd.by_ref()) {
181            Ok(Some(header)) => {
182                let decoder = DecoderAny::new_from_header(&header, rd);
183                self.decoder = Some(decoder);
184                self.header = header;
185                true
186            }
187            Ok(None) => {
188                let decoder = UnsupportedDecoder::new(rd.take(0));
189                self.decoder = Some(DecoderAny::UnsupportedDecoder(decoder));
190                false
191            }
192            Err(e) => return Err(wrap_err(rd, e))
193        };
194        self.crc.reset();
195        self.output_length = 0;
196        Ok(res)
197    }
198    /// Assign externally parsed header and decoder to this instance of `LhaDecodeReader<R>`.
199    ///
200    /// It is up to the caller to make sure the decoder and the header are matching each other.
201    ///
202    /// The decoder should be initialized with the reader limited by the [`Take`] wrapper
203    /// with its limit set to the [`LhaHeader::compressed_size`] number of bytes.
204    ///
205    /// This method assumes the file will be read and decoded from its beginning.
206    pub fn begin_with_header_and_decoder(&mut self, header: LhaHeader, decoder: DecoderAny<Take<R>>) {
207        self.decoder = Some(decoder);
208        self.header = header;
209        self.crc.reset();
210        self.output_length = 0;
211    }
212    /// Attempt to parse the next file's header.
213    ///
214    /// The remaining content of the previous file is being skipped if the current file's content
215    /// has not been read entirely.
216    ///
217    /// On success returns `Ok(true)` if the next header has been read and parsed successfully.
218    /// If there are no more headers, returns `Ok(false)`.
219    ///
220    /// # Errors
221    /// Returns an error if the header could not be read or parsed.
222    /// In this instance the underlying stream source will be taken and returned with the error.
223    ///
224    /// # Panic
225    /// Panics if called when the underlying stream reader has been already taken.
226    ///
227    /// # `no_std`
228    /// To skip the remaining file's content this function uses 8 KB stack-allocated buffer
229    /// when using with `std` feature enabled. Without `std` the buffer size is 512 bytes.
230    /// See also [`LhaDecodeReader::next_file_with_sink`].
231    #[cfg(feature = "std")]
232    pub fn next_file(&mut self) -> Result<bool, LhaDecodeError<R>> {
233        self.next_file_with_sink::<{8*1024}>()
234    }
235    #[cfg(not(feature = "std"))]
236    pub fn next_file(&mut self) -> Result<bool, LhaDecodeError<R>> {
237        self.next_file_with_sink::<512>()
238    }
239    /// Attempt to parse the next file's header.
240    ///
241    /// Exactly like [`LhaDecodeReader::next_file`] but allows to specify the sink buffer
242    /// size as `BUF`.
243    ///
244    /// # Panics
245    /// Panics when `BUF` = `0`.
246    pub fn next_file_with_sink<const BUF: usize>(&mut self) -> Result<bool, LhaDecodeError<R>> {
247        let mut limited_rd = self.decoder.take().expect("decoder not empty").into_inner();
248        if limited_rd.limit() != 0 {
249            if let Err(e) = discard_to_end::<_, BUF>(&mut limited_rd).map_err(LhaError::Io) {
250                return Err(wrap_err(limited_rd.into_inner(), e))
251            }
252        }
253        self.begin_new(limited_rd.into_inner())
254    }
255    /// Return a reference to the last parsed file's [LhaHeader].
256    pub fn header(&self) -> &LhaHeader {
257        &self.header
258    }
259    /// Unwrap the underlying stream reader and return it.
260    ///
261    /// # Panics
262    /// Panics if the reader has been already taken.
263    pub fn into_inner(self) -> R {
264        self.decoder.expect("decoder not empty").into_inner().into_inner()
265    }
266    /// Take the inner stream reader value out of the decoder, leaving a none in its place.
267    ///
268    /// After this call, reading from this instance will result in a panic.
269    pub fn take_inner(&mut self) -> Option<R> {
270        self.header.original_size = 0;
271        self.output_length = 0;
272        self.crc.reset();
273        self.decoder.take().map(|decoder| decoder.into_inner().into_inner())
274    }
275    /// Return the number of remaining bytes of the currently decompressed file to be read.
276    pub fn len(&self) -> u64 {
277        self.header.original_size - self.output_length
278    }
279    /// Return whether the current file has been finished reading or if the file was empty.
280    pub fn is_empty(&self) -> bool {
281        self.header.original_size == self.output_length
282    }
283    /// Return whether an underlying stream reader is present in the decoder.
284    pub fn is_present(&self) -> bool {
285        self.decoder.is_some()
286    }
287    /// Return whether an underlying stream reader is absent from the decoder.
288    ///
289    /// An attempt to read file's content in this state will result in a panic.
290    pub fn is_absent(&self) -> bool {
291        self.decoder.is_none()
292    }
293    /// Return whether the computed CRC-16 matches the checksum in the header.
294    ///
295    /// This should be called after the whole file has been read.
296    pub fn crc_is_ok(&self) -> bool {
297        self.crc.sum16() == self.header.file_crc
298    }
299    /// Return CRC-16 checksum if the computed checksum matches the one in the header.
300    /// Otherwise return an [`LhaError::Checksum`] error.
301    ///
302    /// This should be called after the whole file has been read.
303    pub fn crc_check(&self) -> LhaResult<u16, R> {
304        if self.crc_is_ok() {
305            Ok(self.header.file_crc)
306        }
307        else {
308            Err(LhaError::Checksum("crc16 mismatch"))
309        }
310    }
311    /// Return whether the current file's compression method is supported.
312    ///
313    /// If this method returns `false`, trying to read from the decoder will result in an error.
314    /// In this instance it is still ok to skip to the next file.
315    ///
316    /// # Note
317    /// If the variant of compression is [`CompressionMethod::Lhd`] this method will return `false`.
318    /// In this instance check the result from header's [`LhaHeader::is_directory`] to determine
319    /// what steps should be taken next.
320    pub fn is_decoder_supported(&self) -> bool {
321        self.decoder.as_ref().map(|d| d.is_supported()).unwrap_or(false)
322    }
323}
324
325#[cfg(feature = "std")]
326impl<R: Read<Error=std::io::Error>> std::io::Read for LhaDecodeReader<R> {
327    fn read(&mut self, buf: &mut[u8]) -> std::io::Result<usize> {
328        let len = buf.len().min((self.header.original_size - self.output_length) as usize);
329        let target = &mut buf[..len];
330        self.decoder.as_mut().unwrap().fill_buffer(target)?;
331        self.output_length += len as u64;
332        self.crc.digest(target);
333        Ok(len)
334    }
335}
336
337#[cfg(not(feature = "std"))]
338impl<R: Read> Read for LhaDecodeReader<R> where R::Error: fmt::Debug {
339    type Error = LhaError<R::Error>;
340
341    fn unexpected_eof() -> Self::Error {
342        LhaError::Io(R::unexpected_eof())
343    }
344
345    fn read_all(&mut self, buf: &mut[u8]) -> Result<usize, Self::Error> {
346        let len = buf.len().min((self.header.original_size - self.output_length) as usize);
347        let target = &mut buf[..len];
348        self.decoder.as_mut().unwrap().fill_buffer(target)?;
349        self.output_length += len as u64;
350        self.crc.digest(target);
351        Ok(len)
352    }
353}
354
355impl<R: Read> DecoderAny<R> {
356    /// Creates an instance of `DecoderAny<Take<R>>` from the given `LhaHeader` reference and a stream reader.
357    pub fn new_from_header(header: &LhaHeader, rd: R) -> DecoderAny<Take<R>> {
358        let limited_rd = rd.take(header.compressed_size);
359        match header.compression_method() {
360            Ok(compression) => DecoderAny::new_from_compression(compression, limited_rd),
361            Err(..) => DecoderAny::UnsupportedDecoder(UnsupportedDecoder::new(limited_rd))
362        }
363    }
364    /// Creates an instance of `DecoderAny<R>` from the given compression method and a stream reader.
365    pub fn new_from_compression(
366            compression: CompressionMethod,
367            rd: R
368        ) -> Self
369    {
370        match compression {
371            CompressionMethod::Pm0|
372            CompressionMethod::Lz4|
373            CompressionMethod::Lh0 => DecoderAny::PassthroughDecoder(PassthroughDecoder::new(rd)),
374            #[cfg(feature = "lz")]
375            CompressionMethod::Lzs => DecoderAny::LzsDecoder(LzsDecoder::new(rd)),
376            #[cfg(feature = "lz")]
377            CompressionMethod::Lz5 => DecoderAny::Lz5Decoder(Lz5Decoder::new(rd)),
378            #[cfg(feature = "lh1")]
379            CompressionMethod::Lh1 => DecoderAny::Lh1Decoder(Lh1Decoder::new(rd)),
380            CompressionMethod::Lh4 => DecoderAny::Lh4Decoder(Lh5Decoder::new(rd)),
381            CompressionMethod::Lh5 => DecoderAny::Lh5Decoder(Lh5Decoder::new(rd)),
382            CompressionMethod::Lh6 => DecoderAny::Lh6Decoder(Lh7Decoder::new(rd)),
383            CompressionMethod::Lh7 => DecoderAny::Lh7Decoder(Lh7Decoder::new(rd)),
384            #[cfg(feature = "lhx")]
385            CompressionMethod::Lhx => DecoderAny::LhxDecoder(LhxDecoder::new(rd)),
386            _ => DecoderAny::UnsupportedDecoder(UnsupportedDecoder::new(rd))
387        }
388    }
389    /// Returns `true` if the decoder is able to decode the file's content.
390    pub fn is_supported(&self) -> bool {
391        !matches!(self, DecoderAny::UnsupportedDecoder(..))
392    }
393}
394
395impl<R: Read> Decoder<R> for DecoderAny<R> where R::Error: fmt::Debug {
396    type Error = R::Error;
397
398    fn into_inner(self) -> R {
399        decoder_any_dispatch!((self)(decoder) => decoder.into_inner())
400    }
401
402    #[inline]
403    fn fill_buffer(&mut self, buf: &mut[u8]) -> Result<(), LhaError<Self::Error>> {
404        decoder_any_dispatch!((self)(decoder) => decoder.fill_buffer(buf))
405    }
406}
407
408impl<R: Read> PassthroughDecoder<R> {
409    pub fn new(inner: R) -> Self {
410        PassthroughDecoder { inner }
411    }
412}
413
414impl<R: Read> Decoder<R> for PassthroughDecoder<R> where R::Error: fmt::Debug {
415    type Error = R::Error;
416
417    fn into_inner(self) -> R {
418        self.inner
419    }
420
421    #[inline]
422    fn fill_buffer(&mut self, buf: &mut[u8]) -> Result<(), LhaError<Self::Error>> {
423        self.inner.read_exact(buf).map_err(LhaError::Io)
424    }
425}
426
427impl<R: Read> UnsupportedDecoder<R> {
428    pub fn new(inner: R) -> Self {
429        UnsupportedDecoder { inner }
430    }
431}
432
433impl<R: Read> Decoder<R> for UnsupportedDecoder<R> where R::Error: fmt::Debug {
434    type Error = R::Error;
435
436    fn into_inner(self) -> R {
437        self.inner
438    }
439
440    #[inline]
441    fn fill_buffer(&mut self, _buf: &mut[u8]) -> Result<(), LhaError<Self::Error>> {
442        Err(LhaError::Decompress("unsupported compression method"))
443    }
444}
445
446impl<R: Read> LhaDecodeError<R> {
447    /// Gets a reference to the contained reader.
448    pub fn get_ref(&self) -> &R {
449        &self.read
450    }
451    /// Gets a mutable reference to the contained reader.
452    pub fn get_mut(&mut self) -> &mut R {
453        &mut self.read
454    }
455    /// Unwraps this `LhaDecodeError<R>`, returning the contained reader.
456    pub fn into_inner(self) -> R {
457        self.read
458    }
459}
460
461#[cfg(feature = "std")]
462impl<R: Read> std::error::Error for LhaDecodeError<R>
463    where LhaError<R::Error>: std::error::Error + 'static
464{
465    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
466        Some(&self.source)
467    }
468}
469
470impl<R: Read> fmt::Debug for LhaDecodeError<R>
471    where LhaError<R::Error>: fmt::Debug
472{
473    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
474        f.debug_struct("LhaDecodeError")
475         .field("source", &self.source)
476         .finish()
477    }
478}
479
480impl<R: Read> fmt::Display for LhaDecodeError<R>
481    where LhaError<R::Error>: fmt::Display
482{
483    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
484        write!(f, "LHA decode error: {}", self.source)
485    }
486}
487
488impl<R: Read> From<LhaDecodeError<R>> for LhaError<R::Error> {
489    fn from(e: LhaDecodeError<R>) -> Self {
490        e.source
491    }
492}
493
494#[cfg(feature = "std")]
495impl<R: Read> From<LhaDecodeError<R>> for std::io::Error
496    where std::io::Error: From<LhaError<R::Error>>
497{
498    fn from(e: LhaDecodeError<R>) -> Self {
499        e.source.into()
500    }
501}
502
503fn wrap_err<R: Read>(read: R, source: LhaError<R::Error>) -> LhaDecodeError<R> {
504    LhaDecodeError { read, source }
505}
506
507
508#[cfg(feature = "std")]
509#[cfg(test)]
510mod tests {
511    use std::io;
512    use super::*;
513
514    #[test]
515    fn decode_error_works() {
516        let rd = io::Cursor::new(vec![0u8;3]);
517        let mut err = LhaDecodeReader::new(rd).unwrap_err();
518        assert_eq!(err.to_string(), "LHA decode error: while parsing LHA header: a header is missing");
519        assert_eq!(err.get_ref().get_ref(), &vec![0u8;3]);
520        assert_eq!(err.get_mut().get_mut(), &mut vec![0u8;3]);
521        let rd = err.into_inner();
522        assert_eq!(rd.position(), 1);
523        assert_eq!(rd.into_inner(), vec![0u8;3]);
524    }
525}