detect_compression/
lib.rs

1#![warn(missing_docs)]
2
3//! `BufRead` and `Write`r detects compression algorithms from file extension.
4//!
5//! Supported formats:
6//! * Gzip (`.gz`) by [`flate2`](https://crates.io/crates/flate2) crate
7//! * LZ4 (`.lz4`) by [`lz4`](https://crates.io/crates/lz4) crate
8
9use std::fs::File;
10use std::io::{BufRead, BufReader, BufWriter, Error, ErrorKind, Read, Result, Write};
11use std::path::Path;
12
13use flate2::read::GzDecoder;
14use flate2::write::GzEncoder;
15use flate2::Compression;
16use lz4::liblz4::ContentChecksum;
17use lz4::{Decoder as Lz4Decoder, Encoder as Lz4Encoder, EncoderBuilder as Lz4EncoderBuilder};
18
19/// The [`BufRead`](https://doc.rust-lang.org/std/io/trait.BufRead.html) type reads from compressed or uncompressed file.
20///
21/// This reader detects compression algorithms from file name extension.
22pub struct DetectReader {
23    inner: Box<dyn BufRead>,
24}
25
26impl DetectReader {
27    /// Open compressed or uncompressed file.
28    pub fn open<P: AsRef<Path>>(path: P) -> Result<DetectReader> {
29        DetectReader::open_with_wrapper::<P, Id>(path, Id)
30    }
31
32    /// Open compressed or uncompressed file using wrapper type.
33    ///
34    /// [`InnerReadWrapper`](trait.InnerReadWrapper.html) is the wrapepr type's trait handles compressed byte stream.
35    /// For example, the progress-counting wrapper enables you to calculate progress of loading.
36    pub fn open_with_wrapper<P: AsRef<Path>, B: ReadWrapperBuilder>(
37        path: P,
38        builder: B,
39    ) -> Result<DetectReader> {
40        let path = path.as_ref();
41
42        let f = File::open(path)?;
43        let wf = builder.new_wrapped_reader(f);
44
45        let inner: Box<dyn BufRead> = match path.extension() {
46            Some(e) if e == "gz" => {
47                let d = GzDecoder::new(wf);
48                let br = BufReader::new(d);
49                Box::new(br)
50            }
51            Some(e) if e == "lz4" => {
52                let d = Lz4Decoder::new(wf)?;
53                let br = BufReader::new(d);
54                Box::new(br)
55            }
56            _ => {
57                let br = BufReader::new(wf);
58                Box::new(br)
59            }
60        };
61
62        Ok(DetectReader { inner })
63    }
64}
65
66impl Read for DetectReader {
67    fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
68        self.inner.read(buf)
69    }
70}
71
72impl BufRead for DetectReader {
73    fn fill_buf(&mut self) -> Result<&[u8]> {
74        self.inner.fill_buf()
75    }
76
77    fn consume(&mut self, amt: usize) {
78        self.inner.consume(amt)
79    }
80}
81
82/// The [`Write`](https://doc.rust-lang.org/std/io/trait.Write.html) type writes to compressed or uncompressed file.
83///
84/// This writer detects compression algorithms from file name extension.
85///
86/// You must [`finalize`](struct.DetectWriter.html#method.finalize) this writer.
87pub struct DetectWriter {
88    inner: Box<dyn Finalize>,
89    not_closed: bool,
90}
91
92impl DetectWriter {
93    /// Create compressed or uncompressed file.
94    pub fn create<P: AsRef<Path>>(path: P, level: Level) -> Result<DetectWriter> {
95        DetectWriter::create_with_wrapper::<P, Id>(path, level, Id)
96    }
97
98    /// Create compressed or uncompressed file using wrapper type.
99    ///
100    /// [`InnerWriteWrapper`](trait.InnerWriteWrapper.html) is the wrapepr type's trait handles compressed byte stream.
101    /// For example, the size-accumulating wrapper enables you to calculate size of compressed output.
102    pub fn create_with_wrapper<P: AsRef<Path>, B: WriteWrapperBuilder>(
103        path: P,
104        level: Level,
105        builder: B,
106    ) -> Result<DetectWriter> {
107        let path = path.as_ref();
108
109        let f = File::create(path)?;
110        let wf = builder.new_wrapped_writer(f);
111        let w = BufWriter::new(wf);
112
113        let inner: Box<dyn Finalize> = match path.extension() {
114            Some(e) if e == "gz" => {
115                let e = GzEncoder::new(w, level.into_flate2_compression());
116                Box::new(e)
117            }
118            Some(e) if e == "lz4" => {
119                let mut builder = Lz4EncoderBuilder::new();
120                builder
121                    .level(level.into_lz4_level()?)
122                    .checksum(ContentChecksum::ChecksumEnabled);
123
124                let e = builder.build(w)?;
125                Box::new(FinalizeLz4Encoder::new(e))
126            }
127            _ => Box::new(w),
128        };
129
130        Ok(DetectWriter {
131            inner,
132            not_closed: true,
133        })
134    }
135
136    /// Finalize this writer.
137    ///
138    /// Some encodings requires finalization.
139    ///
140    pub fn finalize(mut self) -> Result<()> {
141        if self.not_closed {
142            self.inner.finalize()?;
143            self.not_closed = false;
144        }
145        Ok(())
146    }
147}
148
149impl Write for DetectWriter {
150    fn write(&mut self, bytes: &[u8]) -> Result<usize> {
151        self.inner.write(bytes)
152    }
153
154    fn flush(&mut self) -> Result<()> {
155        self.inner.flush()
156    }
157}
158
159impl Drop for DetectWriter {
160    fn drop(&mut self) {
161        if self.not_closed {
162            panic!("DetectWriter must be finalized. But dropped before finalization.");
163        }
164    }
165}
166
167/// Compression level.
168#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
169pub enum Level {
170    /// Uncompressed
171    None,
172    /// Minimum compression (fastest and large)
173    Minimum,
174    /// Maximum compression (smallest and slow)
175    Maximum,
176}
177
178impl Level {
179    fn into_flate2_compression(self) -> Compression {
180        match self {
181            Level::None => Compression::none(),
182            Level::Minimum => Compression::fast(),
183            Level::Maximum => Compression::best(),
184        }
185    }
186
187    fn into_lz4_level(self) -> Result<u32> {
188        match self {
189            Level::None => Err(Error::new(
190                ErrorKind::InvalidInput,
191                "LZ4 don't support non-compression mode",
192            )),
193            Level::Minimum => Ok(1),
194            Level::Maximum => Ok(3),
195        }
196    }
197}
198
199/// The [`Read`](https://doc.rust-lang.org/std/io/trait.Read.html) wrapper builder.
200///
201/// For more information, see [`DetectReader::open_with_wrapper()`](struct.DetectReader.html#method.open_with_wrapper).
202pub trait ReadWrapperBuilder {
203    /// Read wrapper of `File`
204    type Wrapper: 'static + Read;
205    /// Create new wrapper.
206    fn new_wrapped_reader(self, f: File) -> Self::Wrapper;
207}
208
209/// The [`Write`](https://doc.rust-lang.org/std/io/trait.Write.html) wrapper builder.
210///
211/// For more information, see [`DetectWriter::create_with_wrapper()`](struct.DetectWriter.html#method.create_with_wrapper).
212pub trait WriteWrapperBuilder {
213    /// Write wrapper of `File`
214    type Wrapper: 'static + Write;
215    /// Create new wrapper.
216    fn new_wrapped_writer(self, f: File) -> Self::Wrapper;
217}
218
219#[derive(Debug, Clone, Copy)]
220struct Id;
221
222impl ReadWrapperBuilder for Id {
223    type Wrapper = File;
224    fn new_wrapped_reader(self, f: File) -> Self::Wrapper {
225        f
226    }
227}
228
229impl WriteWrapperBuilder for Id {
230    type Wrapper = File;
231    fn new_wrapped_writer(self, f: File) -> Self::Wrapper {
232        f
233    }
234}
235
236trait Finalize: Write {
237    fn finalize(&mut self) -> Result<()> {
238        self.flush()
239    }
240}
241
242impl Finalize for File {}
243impl<W: Write> Finalize for GzEncoder<W> {}
244impl<W: Write> Finalize for BufWriter<W> {}
245
246struct FinalizeLz4Encoder<W: Write>(Option<Lz4Encoder<W>>);
247
248impl<W: Write> FinalizeLz4Encoder<W> {
249    fn new(inner: Lz4Encoder<W>) -> FinalizeLz4Encoder<W> {
250        FinalizeLz4Encoder(Some(inner))
251    }
252}
253
254impl<W: Write> Write for FinalizeLz4Encoder<W> {
255    fn write(&mut self, buf: &[u8]) -> Result<usize> {
256        self.0
257            .as_mut()
258            .expect("writer already finalized")
259            .write(buf)
260    }
261
262    fn flush(&mut self) -> Result<()> {
263        self.0.as_mut().expect("writer already finalized").flush()
264    }
265}
266
267impl<W: Write> Finalize for FinalizeLz4Encoder<W> {
268    fn finalize(&mut self) -> Result<()> {
269        self.flush()?;
270        let enc = self.0.take().expect("writer already finalized");
271        enc.finish().1
272    }
273}