zopen/
lib.rs

1/*
2 * MIT License
3 *
4 * Copyright (c) 2017-2025 Frank Fischer <frank-fischer@shadow-soft.de>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25//! Simple crate that automatically open compressed files.
26//!
27//! The compressor used is determined by the file extension. If the
28//! corresponding compression library is not available (i.e. the corresponding
29//! feature is not activated), the crate tries to use an external compression
30//! tool (gzip, bzip2, xz or zstd).
31//!
32//! The crate exports two functions [`read`] and [`write`]. Given a file path,
33//! they return a `Box<Read>` or a `Box<Write>`, respectively, accessing the file.
34//! Depending on the file extension, the file is filtered through an appropriate
35//! compressor/decompressor.
36//!
37//! # Features
38//!
39//! The following features enable compression/decompression through external
40//! crates. If a features is disabled, `zopen` uses the corresponding command line tool
41//! instead.
42//!
43//! - `gzip`: enables gzip compression through the `flate2` crate. If disabled, uses
44//!           the external `gzip` command line tool.
45//! - `bzip2`: enables bzip2 compression through the `bzip2` crate. If disabled, uses
46//!           the external `bzip2` command line tool.
47//! - `xz`: enables xz compression through the `rust-lzma` crate. If disabled, uses
48//!           the external `xz` command line tool.
49//! - `zstd`: enables zstd compression through the `zstd` crate. If disabled, uses
50//!           the external `zstd` command line tool.
51//! - `all`: enabled all of above.
52//!
53//! # Example
54//!
55//! Reading a compressed file:
56//! ```rust,no_run
57//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
58//! let mut f = zopen::read("test.file.gz")?; // open gzip compressed file.
59//! let mut data = String::new();
60//! f.read_to_string(&mut data)?;
61//! # Ok(())
62//! # }
63//! ```
64//!
65//! Writing to a compressed file:
66//! ```rust,no_run
67//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
68//! let mut f = zopen::write("test.file.zst")?; // create zstd compressed file.
69//! writeln!(f, "{}: {}", "Hello world", 42)?;
70//! # Ok(())
71//! # }
72//! ```
73use std::ffi::OsStr;
74use std::fs::File;
75use std::path::Path;
76
77#[allow(dead_code)]
78mod tool;
79
80#[cfg(feature = "gzip")]
81mod gz {
82    use std::fs::File;
83    use std::io::Result;
84    use std::path::Path;
85
86    type Read = flate2::read::GzDecoder<File>;
87    type Write = flate2::write::GzEncoder<File>;
88
89    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
90        File::open(path).map(Read::new)
91    }
92
93    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
94        File::create(path).map(|f| Write::new(f, flate2::Compression::default()))
95    }
96}
97
98#[cfg(not(feature = "gzip"))]
99mod gz {
100    use std::ffi::OsStr;
101    use std::io::Result;
102    use std::path::Path;
103
104    type Read = crate::tool::ToolRead;
105    type Write = crate::tool::ToolWrite;
106
107    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
108        Read::new(
109            "/usr/bin/gzip",
110            [OsStr::new("-d"), OsStr::new("-c"), path.as_ref().as_os_str()],
111        )
112    }
113
114    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
115        Write::new_with_file("/usr/bin/gzip", [OsStr::new("-")], path.as_ref().as_os_str())
116    }
117}
118
119#[cfg(feature = "bzip2")]
120mod bzip {
121    use std::fs::File;
122    use std::io::Result;
123    use std::path::Path;
124
125    type Read = bzip2::read::BzDecoder<File>;
126    type Write = bzip2::write::BzEncoder<File>;
127
128    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
129        Ok(Read::new(File::open(path)?))
130    }
131
132    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
133        Ok(Write::new(File::create(path)?, bzip2::Compression::default()))
134    }
135}
136
137#[cfg(not(feature = "bzip2"))]
138mod bzip {
139    use std::ffi::OsStr;
140    use std::io::Result;
141    use std::path::Path;
142    type Read = crate::tool::ToolRead;
143    type Write = crate::tool::ToolWrite;
144
145    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
146        Read::new(
147            "/usr/bin/bzip2",
148            [OsStr::new("-d"), OsStr::new("-c"), path.as_ref().as_os_str()],
149        )
150    }
151
152    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
153        Write::new_with_file("/usr/bin/bzip2", [OsStr::new("-")], path.as_ref().as_os_str())
154    }
155}
156
157#[cfg(feature = "xz")]
158mod xz {
159    use std::fmt::Arguments;
160    use std::fs::File;
161    use std::io::{self, Error, ErrorKind, Result};
162    use std::path::Path;
163    use std::result;
164
165    type Read = lzma::LzmaReader<File>;
166
167    pub struct Write(Option<lzma::LzmaWriter<File>>);
168
169    impl Write {
170        fn new(path: impl AsRef<Path>) -> result::Result<Self, lzma::error::LzmaError> {
171            Ok(Write(Some(lzma::LzmaWriter::new_compressor(File::create(path)?, 6)?)))
172        }
173    }
174
175    impl io::Write for Write {
176        fn write(&mut self, buf: &[u8]) -> Result<usize> {
177            self.0.as_mut().unwrap().write(buf)
178        }
179
180        fn flush(&mut self) -> Result<()> {
181            self.0.as_mut().unwrap().flush()
182        }
183
184        fn write_all(&mut self, buf: &[u8]) -> Result<()> {
185            self.0.as_mut().unwrap().write_all(buf)
186        }
187
188        fn write_fmt(&mut self, fmt: Arguments) -> Result<()> {
189            self.0.as_mut().unwrap().write_fmt(fmt)
190        }
191    }
192
193    impl Drop for Write {
194        fn drop(&mut self) {
195            self.0.take().unwrap().finish().expect("Finish XZ stream");
196        }
197    }
198
199    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
200        match Read::new_decompressor(File::open(path)?) {
201            Ok(r) => Ok(r),
202            Err(lzma::error::LzmaError::Io(e)) => Err(e),
203            Err(e) => Err(Error::new(ErrorKind::Other, e)),
204        }
205    }
206
207    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
208        match Write::new(path) {
209            Ok(r) => Ok(r),
210            Err(lzma::error::LzmaError::Io(e)) => Err(e),
211            Err(e) => Err(Error::new(ErrorKind::Other, e)),
212        }
213    }
214}
215
216#[cfg(not(feature = "xz"))]
217mod xz {
218    use std::ffi::OsStr;
219    use std::io::Result;
220    use std::path::Path;
221    type Read = crate::tool::ToolRead;
222    type Write = crate::tool::ToolWrite;
223
224    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
225        Read::new(
226            "/usr/bin/xz",
227            [OsStr::new("-d"), OsStr::new("-c"), path.as_ref().as_os_str()],
228        )
229    }
230
231    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
232        Write::new_with_file("/usr/bin/xz", [OsStr::new("-")], path.as_ref().as_os_str())
233    }
234}
235
236#[cfg(feature = "zstd")]
237mod zstd {
238    use std::fs::File;
239    use std::io::{BufReader, Result};
240    use std::path::Path;
241
242    type Read = zstd::stream::Decoder<'static, BufReader<File>>;
243    type Write = zstd::stream::AutoFinishEncoder<'static, File>;
244
245    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
246        zstd::stream::Decoder::new(File::open(path)?)
247    }
248
249    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
250        zstd::stream::Encoder::new(File::create(path)?, 0).map(|e| e.auto_finish())
251    }
252}
253
254#[cfg(not(feature = "zstd"))]
255mod zstd {
256    use std::ffi::OsStr;
257    use std::io::Result;
258    use std::path::Path;
259    type Read = crate::tool::ToolRead;
260    type Write = crate::tool::ToolWrite;
261
262    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
263        Read::new(
264            "/usr/bin/zstd",
265            [OsStr::new("-d"), OsStr::new("-c"), path.as_ref().as_os_str()],
266        )
267    }
268
269    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
270        Write::new_with_file("/usr/bin/zstd", [OsStr::new("-")], path.as_ref().as_os_str())
271    }
272}
273/// Open a possibly compressed file for reading.
274///
275/// The file is specified by the given `path`. The file is
276/// decompressed by an external compression tool determined by the
277/// file extension:
278///
279///  * .gz uses `gzip`
280///  * .bzip2 uses `bzip2`
281///  * .xz and .lzma uses `xz`
282///  * .zst uses `zstd`
283///  * everything else open the file directly without compression.
284pub fn read(path: impl AsRef<Path>) -> std::io::Result<Box<dyn std::io::Read>> {
285    let ext = path.as_ref().extension().unwrap_or_else(|| OsStr::new(""));
286    Ok(if ext == "gz" {
287        Box::new(gz::read(path)?)
288    } else if ext == "bz2" {
289        Box::new(bzip::read(path)?)
290    } else if ext == "xz" || ext == "lzma" {
291        Box::new(xz::read(path)?)
292    } else if ext == "zst" {
293        Box::new(zstd::read(path)?)
294    } else {
295        Box::new(File::open(path)?)
296    })
297}
298
299/// Open a possibly compressed file for writing.
300///
301/// The file is specified by the given `path`. The file is
302/// decompressed by an external compression tool determined by the
303/// file extension:
304///
305///  * .gz uses `gzip`
306///  * .bzip2 uses `bzip2`
307///  * .xz and .lzma use `xz`
308///  * .zst use `zstd`
309///  * everything else open the file directly without compression.
310pub fn write(path: impl AsRef<Path>) -> std::io::Result<Box<dyn std::io::Write>> {
311    let ext = path.as_ref().extension().unwrap_or_else(|| OsStr::new(""));
312    Ok(if ext == "gz" {
313        Box::new(gz::write(path)?)
314    } else if ext == "bz2" {
315        Box::new(bzip::write(path)?)
316    } else if ext == "xz" || ext == "lzma" {
317        Box::new(xz::write(path)?)
318    } else if ext == "zst" {
319        Box::new(zstd::write(path)?)
320    } else {
321        Box::new(File::create(path)?)
322    })
323}
324
325#[test]
326fn test_write_and_read() {
327    let test_str = "Hello World!\n";
328
329    for &ext in &["", ".gz", ".bz2", ".lzma", ".xz", ".zst"] {
330        let mut dir = std::env::temp_dir();
331        dir.push(format!("__zopen-rs-test__{}", ext));
332        {
333            let mut f = write(dir.to_str().unwrap()).unwrap();
334            write!(f, "{}", test_str).unwrap();
335            f.flush().unwrap();
336        }
337
338        assert!(dir.exists());
339
340        {
341            let mut f = read(dir.to_str().unwrap()).unwrap();
342            let mut data = "".to_string();
343            assert_eq!(f.read_to_string(&mut data).unwrap(), test_str.len());
344            assert_eq!(data, test_str);
345        }
346
347        std::fs::remove_file(dir).unwrap();
348    }
349}