autocompress 0.6.0

Automatically select suitable decoder from magic bytes or encoder from file extension.
Documentation
#![cfg_attr(doc_cfg, feature(doc_cfg))]
//! # autocompress
//!
//! A library for reading and writing compressed files with async support and automatic format detection.
//!
//! ## Feature flags
//! * `gzip` : Gzip format support
//! * `bgzip` : [bgzip](https://github.com/informationsea/bgzip-rs) format support
//! * `bzip2` : Bzip2 format support
//! * `xz` : XZ format support
//! * `zstd` : Zstd format support
//! * `rayon` : Off-load compression and decompression process to another thread using [rayon](https://crates.io/crates/rayon)
//! * `tokio` : Async reader and writer support with [tokio](https://crates.io/crates/tokio)
//! * `tokio_fs`: Enable `autodetect_async_open` function
//!
//! ## Migration from previous versions
//!
//! This version drops supports of some formats, such as snappy, lz4 and brotli.
//! Names of functions are also changed. Please replace following functions to migrate from previous versions.
//!
//! * `create` -> [`autodetect_create`]
//! * `open` -> [`autodetect_open`]
//! * `create_or_stdout` -> [`autodetect_create_or_stdout`]
//! * `open_or_stdin` -> [`autodetect_open_or_stdin`]
//! * `suggest_format` -> [`FileFormat::from_buf_reader`]
//! * `suggest_format_from_path` -> [`FileFormat::from_path`]
//!
//! ## Example
//!
//! ### Read from a file
//! ```
//! # use std::io::prelude::*;
//! use autocompress::autodetect_open;
//!
//! # fn main() -> anyhow::Result<()> {
//! let mut reader = autodetect_open("testfiles/pg2701.txt.xz")?;
//! let mut buf = Vec::new();
//! reader.read_to_end(&mut buf)?;
//! # Ok(())
//! # }
//! ```
//!
//! ### Write to a file
//!
//! ```
//! # use std::io::prelude::*;
//! use autocompress::{autodetect_create, CompressionLevel};
//!
//! # fn main() -> anyhow::Result<()> {
//! let mut writer = autodetect_create("target/doc-index.xz", CompressionLevel::Default)?;
//! writer.write_all(&b"Hello, world\n"[..])?;
//! # Ok(())
//! # }
//! ```
//!
//! ### Parallel Compression
//!
//! ```
//! # use std::io::prelude::*;
//! use autocompress::{autodetect_parallel_create, CompressionLevel};
//!
//! # fn main() -> anyhow::Result<()> {
//! let mut writer = autodetect_parallel_create("target/doc-index2.xz", CompressionLevel::Default)?;
//! writer.write_all(&b"Hello, world\n"[..])?;
//! # Ok(())
//! # }
//! ```
mod autodetect;
#[cfg(feature = "bgzip")]
#[cfg_attr(doc_cfg, doc(cfg(feature = "bgzip")))]
pub mod bgzip;
#[cfg(feature = "bzip2")]
#[cfg_attr(doc_cfg, doc(cfg(feature = "bzip2")))]
pub mod bzip2;
mod error;
#[cfg(feature = "flate2")]
#[cfg_attr(doc_cfg, doc(cfg(feature = "gzip")))]
pub mod gzip;
#[cfg(feature = "xz")]
#[cfg_attr(doc_cfg, doc(cfg(feature = "xz")))]
/// XZ format support
pub mod xz;
#[cfg(feature = "flate2")]
#[cfg_attr(doc_cfg, doc(cfg(feature = "gzip")))]
/// Zlib format support
pub mod zlib;
#[cfg(feature = "zstd")]
#[cfg_attr(doc_cfg, doc(cfg(feature = "zstd")))]
pub mod zstd;

/// Reader and Writer implementations for [`Processor`]
pub mod io;

pub use autodetect::*;
pub use error::{Error, Result};

pub(crate) trait ReadExt: std::io::Read {
    fn read_u8(&mut self) -> std::io::Result<u8> {
        let mut buf = [0u8; 1];
        self.read_exact(&mut buf)?;
        Ok(buf[0])
    }

    fn read_u16_le(&mut self) -> std::io::Result<u16> {
        let mut buf = [0u8; 2];
        self.read_exact(&mut buf)?;
        Ok(u16::from_le_bytes(buf))
    }

    fn read_u32_le(&mut self) -> std::io::Result<u32> {
        let mut buf = [0u8; 4];
        self.read_exact(&mut buf)?;
        Ok(u32::from_le_bytes(buf))
    }
}

impl<T: std::io::Read> ReadExt for T {}

pub(crate) trait WriteExt: std::io::Write {
    fn write_u8(&mut self, value: u8) -> std::io::Result<()> {
        self.write_all(&[value])
    }

    fn write_u16_le(&mut self, value: u16) -> std::io::Result<()> {
        self.write_all(&value.to_le_bytes())
    }

    fn write_u32_le(&mut self, value: u32) -> std::io::Result<()> {
        self.write_all(&value.to_le_bytes())
    }
}

impl<T: std::io::Write> WriteExt for T {}

/// Processed status
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Status {
    /// No error
    Ok,
    /// End of stream
    StreamEnd,
}

/// Values which indicate the form of flushing to be used when processing data.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Flush {
    /// No flush. Continue processing input data.
    None,
    /// Flush and finish the stream.
    Finish,
}

/// Process in-memory stream of data.
///
/// This type is inspired from [flate2::Compress](https://docs.rs/flate2/1.0.20/flate2/struct.Compress.html).
/// This type will process input bytes and generate output bytes. The number of processed bytes and
/// generated bytes can be obtained by [`total_in`](Processor::total_in) and [`total_out`](Processor::total_out) methods.
/// Example of implementation is available at [`PlainProcessor`], [`GzipCompress`](gzip::GzipCompress) and [`GzipDecompress`](gzip::GzipDecompress).
pub trait Processor: Unpin {
    /// Total number of bytes processed
    fn total_in(&self) -> u64;

    /// Total number of bytes generated
    fn total_out(&self) -> u64;

    /// Process some input data and generate output data.
    ///
    /// If `flush` is [`Flush::Finish`], the processor will try to finish the stream.
    fn process(&mut self, input: &[u8], output: &mut [u8], flush: Flush) -> Result<Status>;

    /// Reset processor state
    ///
    /// `total_in` and `total_out` will be reset to zero.
    fn reset(&mut self);
}

impl Processor for Box<dyn Processor> {
    fn process(&mut self, input: &[u8], output: &mut [u8], flush: Flush) -> Result<Status> {
        self.as_mut().process(input, output, flush)
    }

    fn reset(&mut self) {
        self.as_mut().reset()
    }

    fn total_in(&self) -> u64 {
        self.as_ref().total_in()
    }

    fn total_out(&self) -> u64 {
        self.as_ref().total_out()
    }
}

impl Processor for Box<dyn Processor + Unpin + Send> {
    fn process(&mut self, input: &[u8], output: &mut [u8], flush: Flush) -> Result<Status> {
        self.as_mut().process(input, output, flush)
    }

    fn reset(&mut self) {
        self.as_mut().reset()
    }

    fn total_in(&self) -> u64 {
        self.as_ref().total_in()
    }

    fn total_out(&self) -> u64 {
        self.as_ref().total_out()
    }
}

/// Pass-through processor
///
/// This processor does not compress or decompress data, transfer data as is.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub struct PlainProcessor {
    total_in: u64,
    total_out: u64,
}

impl PlainProcessor {
    pub fn new() -> Self {
        Self::default()
    }
}

impl Processor for PlainProcessor {
    fn total_in(&self) -> u64 {
        self.total_in
    }

    fn total_out(&self) -> u64 {
        self.total_out
    }

    fn process(&mut self, input: &[u8], output: &mut [u8], flush: Flush) -> Result<Status> {
        let len = std::cmp::min(input.len(), output.len());
        output[..len].copy_from_slice(&input[..len]);
        self.total_in += TryInto::<u64>::try_into(len).unwrap();
        self.total_out += TryInto::<u64>::try_into(len).unwrap();
        match flush {
            Flush::None => Ok(Status::Ok),
            Flush::Finish => Ok(Status::StreamEnd),
        }
    }

    fn reset(&mut self) {
        self.total_in = 0;
        self.total_out = 0;
    }
}

/// Compression level for compress processors
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum CompressionLevel {
    /// Fastest compression level
    Fastest,
    /// Fast compression level
    Fast,
    /// Default compression level
    Default,
    /// High compression level
    High,
    /// Highest compression level
    Highest,
}

impl Default for CompressionLevel {
    fn default() -> Self {
        Self::Default
    }
}

impl CompressionLevel {
    /// Fastest compression level
    pub fn fast() -> Self {
        Self::Fastest
    }

    /// Highest compression level
    pub fn best() -> Self {
        Self::Highest
    }

    #[cfg_attr(doc_cfg, doc(cfg(feature = "gzip")))]
    #[cfg(feature = "flate2")]
    pub fn flate2(self) -> flate2::Compression {
        match self {
            Self::Fastest => flate2::Compression::fast(),
            Self::Fast => flate2::Compression::new(3),
            Self::Default => flate2::Compression::default(),
            Self::High => flate2::Compression::new(7),
            Self::Highest => flate2::Compression::best(),
        }
    }

    #[cfg_attr(doc_cfg, doc(cfg(feature = "bgzip")))]
    #[cfg(feature = "bgzip")]
    pub fn bgzip(self) -> ::bgzip::Compression {
        match self {
            Self::Fastest => ::bgzip::Compression::fast(),
            Self::Fast => ::bgzip::Compression::new(3).expect("Unexpected compression level"),
            Self::Default => ::bgzip::Compression::default(),
            Self::High => ::bgzip::Compression::new(7).expect("Unexpected compression level"),
            Self::Highest => ::bgzip::Compression::best(),
        }
    }

    #[cfg_attr(doc_cfg, doc(cfg(feature = "bzip2")))]
    #[cfg(feature = "bzip2")]
    pub fn bzip2(self) -> ::bzip2::Compression {
        match self {
            Self::Fastest => ::bzip2::Compression::fast(),
            Self::Fast => ::bzip2::Compression::new(3),
            Self::Default => ::bzip2::Compression::default(),
            Self::High => ::bzip2::Compression::new(7),
            Self::Highest => ::bzip2::Compression::best(),
        }
    }

    #[cfg_attr(doc_cfg, doc(cfg(feature = "xz")))]
    #[cfg(feature = "xz")]
    pub fn xz(self) -> u32 {
        match self {
            Self::Fastest => 1,
            Self::Fast => 3,
            Self::Default => 6,
            Self::High => 7,
            Self::Highest => 9,
        }
    }

    #[cfg_attr(doc_cfg, doc(cfg(feature = "zstd")))]
    #[cfg(feature = "zstd")]
    pub fn zstd(self) -> i32 {
        match self {
            Self::Fastest => 1,
            Self::Fast => 2,
            Self::Default => ::zstd::DEFAULT_COMPRESSION_LEVEL,
            Self::High => 7,
            Self::Highest => 9,
        }
    }
}

#[cfg(test)]
mod tests;