zopen 1.0.1

Automatically open compressed files.
Documentation
/*
 * MIT License
 *
 * Copyright (c) 2017-2025 Frank Fischer <frank-fischer@shadow-soft.de>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

//! Simple crate that automatically open compressed files.
//!
//! The compressor used is determined by the file extension. If the
//! corresponding compression library is not available (i.e. the corresponding
//! feature is not activated), the crate tries to use an external compression
//! tool (gzip, bzip2, xz or zstd).
//!
//! The crate exports two functions [`read`] and [`write`]. Given a file path,
//! they return a `Box<Read>` or a `Box<Write>`, respectively, accessing the file.
//! Depending on the file extension, the file is filtered through an appropriate
//! compressor/decompressor.
//!
//! # Features
//!
//! The following features enable compression/decompression through external
//! crates. If a features is disabled, `zopen` uses the corresponding command line tool
//! instead.
//!
//! - `gzip`: enables gzip compression through the `flate2` crate. If disabled, uses
//!           the external `gzip` command line tool.
//! - `bzip2`: enables bzip2 compression through the `bzip2` crate. If disabled, uses
//!           the external `bzip2` command line tool.
//! - `xz`: enables xz compression through the `rust-lzma` crate. If disabled, uses
//!           the external `xz` command line tool.
//! - `zstd`: enables zstd compression through the `zstd` crate. If disabled, uses
//!           the external `zstd` command line tool.
//! - `all`: enabled all of above.
//!
//! # Example
//!
//! Reading a compressed file:
//! ```rust,no_run
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let mut f = zopen::read("test.file.gz")?; // open gzip compressed file.
//! let mut data = String::new();
//! f.read_to_string(&mut data)?;
//! # Ok(())
//! # }
//! ```
//!
//! Writing to a compressed file:
//! ```rust,no_run
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! let mut f = zopen::write("test.file.zst")?; // create zstd compressed file.
//! writeln!(f, "{}: {}", "Hello world", 42)?;
//! # Ok(())
//! # }
//! ```
use std::ffi::OsStr;
use std::fs::File;
use std::path::Path;

#[allow(dead_code)]
mod tool;

#[cfg(feature = "gzip")]
mod gz {
    use std::fs::File;
    use std::io::Result;
    use std::path::Path;

    type Read = flate2::read::GzDecoder<File>;
    type Write = flate2::write::GzEncoder<File>;

    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
        File::open(path).map(Read::new)
    }

    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
        File::create(path).map(|f| Write::new(f, flate2::Compression::default()))
    }
}

#[cfg(not(feature = "gzip"))]
mod gz {
    use std::ffi::OsStr;
    use std::io::Result;
    use std::path::Path;

    type Read = crate::tool::ToolRead;
    type Write = crate::tool::ToolWrite;

    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
        Read::new(
            "/usr/bin/gzip",
            [OsStr::new("-d"), OsStr::new("-c"), path.as_ref().as_os_str()],
        )
    }

    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
        Write::new_with_file("/usr/bin/gzip", [OsStr::new("-")], path.as_ref().as_os_str())
    }
}

#[cfg(feature = "bzip2")]
mod bzip {
    use std::fs::File;
    use std::io::Result;
    use std::path::Path;

    type Read = bzip2::read::BzDecoder<File>;
    type Write = bzip2::write::BzEncoder<File>;

    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
        Ok(Read::new(File::open(path)?))
    }

    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
        Ok(Write::new(File::create(path)?, bzip2::Compression::default()))
    }
}

#[cfg(not(feature = "bzip2"))]
mod bzip {
    use std::ffi::OsStr;
    use std::io::Result;
    use std::path::Path;
    type Read = crate::tool::ToolRead;
    type Write = crate::tool::ToolWrite;

    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
        Read::new(
            "/usr/bin/bzip2",
            [OsStr::new("-d"), OsStr::new("-c"), path.as_ref().as_os_str()],
        )
    }

    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
        Write::new_with_file("/usr/bin/bzip2", [OsStr::new("-")], path.as_ref().as_os_str())
    }
}

#[cfg(feature = "xz")]
mod xz {
    use std::fmt::Arguments;
    use std::fs::File;
    use std::io::{self, Error, ErrorKind, Result};
    use std::path::Path;
    use std::result;

    type Read = lzma::LzmaReader<File>;

    pub struct Write(Option<lzma::LzmaWriter<File>>);

    impl Write {
        fn new(path: impl AsRef<Path>) -> result::Result<Self, lzma::error::LzmaError> {
            Ok(Write(Some(lzma::LzmaWriter::new_compressor(File::create(path)?, 6)?)))
        }
    }

    impl io::Write for Write {
        fn write(&mut self, buf: &[u8]) -> Result<usize> {
            self.0.as_mut().unwrap().write(buf)
        }

        fn flush(&mut self) -> Result<()> {
            self.0.as_mut().unwrap().flush()
        }

        fn write_all(&mut self, buf: &[u8]) -> Result<()> {
            self.0.as_mut().unwrap().write_all(buf)
        }

        fn write_fmt(&mut self, fmt: Arguments) -> Result<()> {
            self.0.as_mut().unwrap().write_fmt(fmt)
        }
    }

    impl Drop for Write {
        fn drop(&mut self) {
            self.0.take().unwrap().finish().expect("Finish XZ stream");
        }
    }

    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
        match Read::new_decompressor(File::open(path)?) {
            Ok(r) => Ok(r),
            Err(lzma::error::LzmaError::Io(e)) => Err(e),
            Err(e) => Err(Error::new(ErrorKind::Other, e)),
        }
    }

    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
        match Write::new(path) {
            Ok(r) => Ok(r),
            Err(lzma::error::LzmaError::Io(e)) => Err(e),
            Err(e) => Err(Error::new(ErrorKind::Other, e)),
        }
    }
}

#[cfg(not(feature = "xz"))]
mod xz {
    use std::ffi::OsStr;
    use std::io::Result;
    use std::path::Path;
    type Read = crate::tool::ToolRead;
    type Write = crate::tool::ToolWrite;

    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
        Read::new(
            "/usr/bin/xz",
            [OsStr::new("-d"), OsStr::new("-c"), path.as_ref().as_os_str()],
        )
    }

    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
        Write::new_with_file("/usr/bin/xz", [OsStr::new("-")], path.as_ref().as_os_str())
    }
}

#[cfg(feature = "zstd")]
mod zstd {
    use std::fs::File;
    use std::io::{BufReader, Result};
    use std::path::Path;

    type Read = zstd::stream::Decoder<'static, BufReader<File>>;
    type Write = zstd::stream::AutoFinishEncoder<'static, File>;

    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
        zstd::stream::Decoder::new(File::open(path)?)
    }

    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
        zstd::stream::Encoder::new(File::create(path)?, 0).map(|e| e.auto_finish())
    }
}

#[cfg(not(feature = "zstd"))]
mod zstd {
    use std::ffi::OsStr;
    use std::io::Result;
    use std::path::Path;
    type Read = crate::tool::ToolRead;
    type Write = crate::tool::ToolWrite;

    pub fn read(path: impl AsRef<Path>) -> Result<Read> {
        Read::new(
            "/usr/bin/zstd",
            [OsStr::new("-d"), OsStr::new("-c"), path.as_ref().as_os_str()],
        )
    }

    pub fn write(path: impl AsRef<Path>) -> Result<Write> {
        Write::new_with_file("/usr/bin/zstd", [OsStr::new("-")], path.as_ref().as_os_str())
    }
}
/// Open a possibly compressed file for reading.
///
/// The file is specified by the given `path`. The file is
/// decompressed by an external compression tool determined by the
/// file extension:
///
///  * .gz uses `gzip`
///  * .bzip2 uses `bzip2`
///  * .xz and .lzma uses `xz`
///  * .zst uses `zstd`
///  * everything else open the file directly without compression.
pub fn read(path: impl AsRef<Path>) -> std::io::Result<Box<dyn std::io::Read>> {
    let ext = path.as_ref().extension().unwrap_or_else(|| OsStr::new(""));
    Ok(if ext == "gz" {
        Box::new(gz::read(path)?)
    } else if ext == "bz2" {
        Box::new(bzip::read(path)?)
    } else if ext == "xz" || ext == "lzma" {
        Box::new(xz::read(path)?)
    } else if ext == "zst" {
        Box::new(zstd::read(path)?)
    } else {
        Box::new(File::open(path)?)
    })
}

/// Open a possibly compressed file for writing.
///
/// The file is specified by the given `path`. The file is
/// decompressed by an external compression tool determined by the
/// file extension:
///
///  * .gz uses `gzip`
///  * .bzip2 uses `bzip2`
///  * .xz and .lzma use `xz`
///  * .zst use `zstd`
///  * everything else open the file directly without compression.
pub fn write(path: impl AsRef<Path>) -> std::io::Result<Box<dyn std::io::Write>> {
    let ext = path.as_ref().extension().unwrap_or_else(|| OsStr::new(""));
    Ok(if ext == "gz" {
        Box::new(gz::write(path)?)
    } else if ext == "bz2" {
        Box::new(bzip::write(path)?)
    } else if ext == "xz" || ext == "lzma" {
        Box::new(xz::write(path)?)
    } else if ext == "zst" {
        Box::new(zstd::write(path)?)
    } else {
        Box::new(File::create(path)?)
    })
}

#[test]
fn test_write_and_read() {
    let test_str = "Hello World!\n";

    for &ext in &["", ".gz", ".bz2", ".lzma", ".xz", ".zst"] {
        let mut dir = std::env::temp_dir();
        dir.push(format!("__zopen-rs-test__{}", ext));
        {
            let mut f = write(dir.to_str().unwrap()).unwrap();
            write!(f, "{}", test_str).unwrap();
            f.flush().unwrap();
        }

        assert!(dir.exists());

        {
            let mut f = read(dir.to_str().unwrap()).unwrap();
            let mut data = "".to_string();
            assert_eq!(f.read_to_string(&mut data).unwrap(), test_str.len());
            assert_eq!(data, test_str);
        }

        std::fs::remove_file(dir).unwrap();
    }
}