1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
use bytes::Bytes;
use std::collections::HashSet;

#[derive(Debug, thiserror::Error)]
pub enum Error<'a> {
    #[error("unsupported compression: {0}")]
    Unsupported(&'a str),
    #[error(transparent)]
    Io(#[from] std::io::Error),
}

#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub enum Compression {
    None,
    #[cfg(any(feature = "bzip2", feature = "bzip2-rs"))]
    Bzip2,
    #[cfg(feature = "liblzma")]
    Xz,
}

#[non_exhaustive]
#[derive(Clone, Debug, PartialEq, Eq, Default)]
pub struct DecompressionOptions {
    /// The maximum decompressed payload size.
    ///
    /// If the size of the uncompressed payload exceeds this limit, and error would be returned
    /// instead. Zero means, unlimited.
    pub limit: usize,
}

impl DecompressionOptions {
    pub fn new() -> Self {
        Self::default()
    }

    /// Set the limit of the maximum uncompressed payload size.
    pub fn limit(mut self, limit: usize) -> Self {
        self.limit = limit;
        self
    }
}

impl Compression {
    /// Perform decompression.
    ///
    /// Returns the original data for [`Compression::None`].
    pub fn decompress(&self, data: Bytes) -> Result<Bytes, std::io::Error> {
        Ok(self.decompress_opt(&data)?.unwrap_or(data))
    }

    /// Perform decompression.
    ///
    /// Returns the original data for [`Compression::None`].
    pub fn decompress_with(
        &self,
        data: Bytes,
        opts: &DecompressionOptions,
    ) -> Result<Bytes, std::io::Error> {
        Ok(self.decompress_opt_with(&data, opts)?.unwrap_or(data))
    }

    /// Perform decompression.
    ///
    /// Returns `None` for [`Compression::None`]
    pub fn decompress_opt(&self, data: &[u8]) -> Result<Option<Bytes>, std::io::Error> {
        self.decompress_opt_with(data, &Default::default())
    }

    /// Perform decompression.
    ///
    /// Returns `None` for [`Compression::None`]
    pub fn decompress_opt_with(
        &self,
        data: &[u8],
        opts: &DecompressionOptions,
    ) -> Result<Option<Bytes>, std::io::Error> {
        match self {
            #[cfg(any(feature = "bzip2", feature = "bzip2-rs"))]
            Compression::Bzip2 => super::decompress_bzip2_with(data, opts).map(Some),
            #[cfg(feature = "liblzma")]
            Compression::Xz => super::decompress_xz_with(data, opts).map(Some),
            Compression::None => Ok(None),
        }
    }
}

#[derive(Clone, Debug, Default)]
pub struct Detector<'a> {
    /// File name
    pub file_name: Option<&'a str>,

    /// Disable detection by magic bytes
    pub disable_magic: bool,

    /// File name extensions to ignore.
    pub ignore_file_extensions: HashSet<&'a str>,
    /// If a file name is present, but the extension is unknown, report as an error
    pub fail_unknown_file_extension: bool,
}

impl<'a> Detector<'a> {
    /// Detect and decompress in a single step.
    pub fn decompress(&'a self, data: Bytes) -> Result<Bytes, Error<'a>> {
        self.decompress_with(data, &Default::default())
    }

    /// Detect and decompress in a single step.
    pub fn decompress_with(
        &'a self,
        data: Bytes,
        opts: &DecompressionOptions,
    ) -> Result<Bytes, Error<'a>> {
        let compression = self.detect(&data)?;
        Ok(compression.decompress_with(data, opts)?)
    }

    pub fn detect(&'a self, #[allow(unused)] data: &[u8]) -> Result<Compression, Error<'a>> {
        // detect by file name extension

        if let Some(file_name) = self.file_name {
            #[cfg(any(feature = "bzip2", feature = "bzip2-rs"))]
            if file_name.ends_with(".bz2") {
                return Ok(Compression::Bzip2);
            }
            #[cfg(feature = "liblzma")]
            if file_name.ends_with(".xz") {
                return Ok(Compression::Xz);
            }
            if self.fail_unknown_file_extension {
                if let Some((_, ext)) = file_name.rsplit_once('.') {
                    if !self.ignore_file_extensions.contains(ext) {
                        return Err(Error::Unsupported(ext));
                    }
                }
            }
        }

        // magic bytes

        if !self.disable_magic {
            #[cfg(any(feature = "bzip2", feature = "bzip2-rs"))]
            if data.starts_with(b"BZh") {
                return Ok(Compression::Bzip2);
            }
            #[cfg(feature = "liblzma")]
            if data.starts_with(&[0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00]) {
                return Ok(Compression::Xz);
            }
        }

        // done

        Ok(Compression::None)
    }
}