compress_io 0.6.0

Convenience library for reading and writing compressed files/streams
Documentation
//! Convenience library for reading and writing compressed files / streams
//!
//! `compress_io`` does not provide the compression/decompression itself but uses external utilities
//! such as [gzip], [bzip2] or [zstd] as read or write filters.  The aim of `compress_io` is to make
//! it simple for an application to support multiple compression formats with a minimal effort
//! from the developer and also from the user (i.e., an application can accept uncompressed
//! or compressed input in a range of different formats and neither the developer nor the user
//! have to specify which formats have been used).
//!
//! ## Overview
//!
//! The main way to work with `compress_io` is via [`CompressIo`] (or [`AsyncCompressIo`] in the
//! case of `async` code).  A reader (implementing [`Read`]), buffered reader (implementing
//! [`BufRead`]), writer or buffered writer (both implementing [`Write`]) can be generated from
//! [`CompressIo`] (or [`AsyncCompressIo`]).  By default readers and writers use `stdin` and
//! `stdout`, but a file path can also be specified with [`path`].  By default `compress_io` will
//! detect the compression format of compressed input files automatically based on the initial
//! contents of the file/stream and select an appropriate utility if available in the users
//! `$PATH`, and the format of output files based on the file extension.  These automatic methods
//! can be overridden by [`ctype`].  `compress_io` will make use of parallel versions of
//! compression utilities if available.  By default the compression utilities will be run using
//! with the default threading options, but this behvaiour can be changed using [`cthreads`].
//!
//! ## Examples
//!
//! ```no_run
//! use std::io::{self, BufRead, Write};
//! use compress_io::compress::CompressIo;
//!
//! fn main() -> io::Result<()> {
//!   // Read from a (presumably) gzipped file foo.gz and write out to file `foo.xz` which will be
//!   // compressed using [xz] (assuming both [gzip] and [xz] are in the users Path.
//!   // In this example both read and write streams are buffered
//!   let mut reader = CompressIo::new().path("foo.gz").bufreader()?;
//!   let mut writer = CompressIo::new().path("foo.xz").bufwriter()?;
//!   for s in reader.lines().map(|l| l.expect("Read error")) {
//!     writeln!(writer, "{}", s)?
//!   }
//!   Ok(())
//! }
//! ```
//!
//! Decompression utilities can be specified by the user, or can be selected automatically
//! based on an examination of the first few bytes of the input.
//!
//! ```no_run
//! # use std::io;
//! use compress_io::{
//!   compress::CompressIo,
//!   compress_type::CompressType,
//! };
//!
//! # fn main() -> io::Result<()> {
//! // Open a reader from `stdin`, using the first bytes from the file to determine whether the
//! // file is compressed or not
//! let mut rd1 = CompressIo::new().reader()?;
//! // Open a buffered reader from `foo.bz2` using [bzip2] to decompress
//! let mut rd2 = CompressIo::new().path("foo.bz2").ctype(CompressType::Bzip2).bufreader()?;
//! # Ok(())
//! # }
//! ```
//!
//! Compression utilities can also either be explicitly selected, or they can
//! be set automatically based on the file name (so a file called `test.zst` would be
//! compressed using the [zstd] utility).  If the compression format is selected explicitly then
//! extension will be added to the filename unless the extension is already present, or the
//! [`fix_path`] option has been selected.
//!
//! ```no_run
//! # use std::io;
//! use compress_io::{
//!   compress::CompressIo,
//!   compress_type::CompressType,
//! };
//!
//! # fn main() -> io::Result<()> {
//! // Open a compressed writer to `stdout`, using [zstd] to compress the stream
//! let mut wrt1 = CompressIo::new().ctype(CompressType::Zstd).writer()?;
//! // Open a compressed buffered writer to the file `foo.lzma` using lzma to decompress
//! let mut wrt2 = CompressIo::new().path("foo").ctype(CompressType::Lzma).bufwriter()?;
//! # Ok(())
//! # }
//! ```
//!
//! Several of the possible compression formats can be
//! generated by multiple utilities, and this allows alternate utilities to be used if the
//! standard utility is not available.
//!
//! For example, the standard utility for *xz* compression
//! is the [xz] tool, however [zstd] can also perform *xz* compression and will be substituted by
//! the library if [xz] is not available.  Note the if *bgzip* compression is
//! requested then only the [bgzip] utility will be used; even though *bgzip* compression is
//! compatible with the *gzip* format and can be decoded by any compressor that handles
//! *gzip*, extra information is added during compression by [bgzip] that other utilities
//! do not generate.
//!
//! For compression, certain of the utilities are multi-threaded.   If multiple utilities are
//! available to perform a given compression type, preference will be given to multi-threaded
//! versions. For example, if *gzip* compression is requested and the [pigz] utility is available
//! in the current `$PATH` then this will be used in favour [gzip].  For compression the user can
//! specify a preference for threading (where available) using [`cthreads`].
//!
//! ```no_run
//! # use std::io;
//! use compress_io::{
//!   compress::CompressIo,
//!   compress_type::{CompressType, CompressThreads},
//! };
//!
//! # fn main() -> io::Result<()> {
//! // Open a compressed buffered writer to `foo.zstd`, using [zstd] to compress the stream
//! // using 4 threads
//! let mut wrt = CompressIo::new().ctype(CompressType::Zstd)
//!   .cthreads(CompressThreads::Set(4)).bufwriter()?;
//! # Ok(())
//! # }
//! ```
//!
//! ## Usage
//!
//! For usage with synchronous code only, add `compress_io` as a dependency in your `Cargo.toml` to
//! use from crates.io:
//!
//! ```toml
//! [dependencies ]
//! compress_io = "0.2"
//! ```
//!
//! For use with asynchronous code then the `async` feature should be enabled:
//!
//! ```toml
//! [dependencies ]
//! compress_io = { version = "0.2", features = ["async"] }
//! ```
//!
//! [`CompressIo`]: crate::compress::CompressIo
//! [`AsyncCompressIo`]: crate::async::compress::AsyncCompressIo
//! [`path`]: crate::compress::CompressIo::path
//! [`ctype`]: crate::compress::CompressIo::ctype
//! [`cthreads`]: crate::compress::CompressIo::cthreads
//! [`fix_path`]: crate::compress::CompressIo::fix_path
//!
//! [`Read`]: std::io::Read
//! [`BufRead`]: std::io::BufRead
//! [`Write`]: std::io::Write
//!
//! [gzip]: http://www.gzip.org/
//! [bgzip]: https://www.htslib.org/doc/bgzip.html
//! [pigz]: https://www.zlib.net/pigz/
//! [bzip2]: https://sourceware.org/bzip2/
//! [zstd]: https://facebook.github.io/zstd/
//! [xz]: https://tukaani.org/xz/
//! [lzma]: https://tukaani.org/lzma/

#[macro_use]
extern crate lazy_static;

pub mod compress;
pub mod compress_type;
pub mod filter_spec;
pub mod tools;
pub mod path_utils;

#[cfg(feature = "async")]
pub mod r#async;

#[cfg(test)]
mod tests {
	use crate::{
		compress::CompressIo,
		compress_type::*
	};
	use std::{
		path::{PathBuf, Path},
		io::{Read, Write},
	};
	use tempfile::TempDir;

	struct TmpDir {
		path: PathBuf,
		_tempdir: TempDir,
	}	

	impl TmpDir {
		fn new() -> Self {
			let tempdir = tempfile::tempdir().unwrap();
			let path = PathBuf::from(&tempdir.path());
			TmpDir { _tempdir: tempdir, path }
		}
		
		fn mkpath(&self, name: &str) -> PathBuf {
			self.path.join(name)
		}
		
		fn test_rw(&self, name: &str, ctypes: [CompressType; 2], bufwriter: bool) {
			let name = self.mkpath(name);
			let test_string = "Testing testing 123";
			assert_eq!(CompressType::from_suffix(&name), ctypes[0]);
			{
				if bufwriter {
					let mut wrt = CompressIo::new().path(&name).bufwriter().expect("Could not make file");
					write!(wrt, "{}", test_string).expect("Error writing to file");
				} else {
					let mut wrt = CompressIo::new().path(&name).writer().expect("Could not make file");
					write!(wrt, "{}", test_string).expect("Error writing to file");
				}
			}
			test_rd(&name, ctypes[1], test_string);		
		}

		fn test1_rw(&self, name: &str, ctypes: [CompressType; 2]) {
			let name = self.mkpath(name);
			let test_string = "Testing testing 123";
			{
				let mut wrt = CompressIo::new().path(&name).ctype(ctypes[0]).writer().expect("Could not make file");
				write!(wrt, "{}", test_string).expect("Error writing to file");
			}
			let name = crate::path_utils::cond_add_suffix(name, ctypes[0].suffix());
			test_rd(&name, ctypes[1], test_string);		
		}
	}	

	fn test_rd(name: &Path, ctype: CompressType, test_string: &str) {
		assert_eq!(crate::path_utils::guess_ctype_from_file(name, None).expect("Error reading file"), ctype);
			
		let mut rdr = CompressIo::new().path(name).bufreader().expect("Couldn't open bufreader");
		let mut buf = String::new();
		rdr.read_to_string(&mut buf).expect("Couldn't read from file");
			
		assert_eq!(&buf, test_string) 
	}
	
	#[test]
	fn test_gzip() {
		let tdir = TmpDir::new();
		tdir.test_rw("test.gz", [CompressType::Gzip, CompressType::Gzip], false);
	}	
	#[test]
	fn test_gzip_buf() {
		let tdir = TmpDir::new();
		tdir.test_rw("test.gz", [CompressType::Gzip, CompressType::Gzip], true);
	}	
	#[test]
	fn test_bzip2() {
		let tdir = TmpDir::new();
		tdir.test_rw("test.bz2", [CompressType::Bzip2, CompressType::Bzip2], false);
	}	
	#[test]
	fn test_xz() {
		let tdir = TmpDir::new();
		tdir.test_rw("test.xz", [CompressType::Xz, CompressType::Xz], false);
	}
	#[test]
	fn test_lz4() {
		let tdir = TmpDir::new();
		tdir.test_rw("test.lz4", [CompressType::Lz4, CompressType::Lz4], false);
	}	
	#[test]
	fn test_lzma() {
		let tdir = TmpDir::new();
		tdir.test_rw("test.lzma", [CompressType::Lzma, CompressType::Lzma], false);
	}	
	#[test]
	fn test_zstd() {
		let tdir = TmpDir::new();
		tdir.test_rw("test.zst", [CompressType::Zstd, CompressType::Zstd], false);
	}
	#[test]
	fn test1_gzip() {
		let tdir = TmpDir::new();
		tdir.test1_rw("test1", [CompressType::Gzip, CompressType::Gzip]);
	}	
	#[test]
	fn test1_pigz() {
		let tdir = TmpDir::new();
		tdir.test1_rw("test1", [CompressType::Gzip, CompressType::Gzip]);
	}
	#[test]
	fn test1_bgzip() {
		let tdir = TmpDir::new();
		tdir.test1_rw("test1", [CompressType::Bgzip, CompressType::Bgzip]);
	}
	#[test]
	fn test1_bzip2() {
		let tdir = TmpDir::new();
		tdir.test1_rw("test1", [CompressType::Bzip2, CompressType::Bzip2]);
	}
	#[test]
	fn test1_xz() {
		let tdir = TmpDir::new();
		tdir.test1_rw("test1", [CompressType::Xz, CompressType::Xz]);
	}
	#[test]
	fn test1_lz4() {
		let tdir = TmpDir::new();
		tdir.test1_rw("test1", [CompressType::Lz4, CompressType::Lz4]);
	}
	#[test]
	fn test1_lzma() {
		let tdir = TmpDir::new();
		tdir.test1_rw("test1", [CompressType::Lzma, CompressType::Lzma]);
	}
	#[test]
	fn test1_zstd() {
		let tdir = TmpDir::new();
		tdir.test1_rw("test1", [CompressType::Zstd, CompressType::Zstd]);
	}
}