deflate 0.7.20

A DEFLATE, zlib and gzip encoder written in rust.
Documentation
//! An implementation an encoder using [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html)
//! compression algorightm in pure rust.
//!
//! This library provides functions to compress data using the DEFLATE algorithm,
//! optionally wrapped using the [zlib](https://tools.ietf.org/html/rfc1950) or
//! [gzip](http://www.gzip.org/zlib/rfc-gzip.html) formats.
//! The current implementation is still a bit lacking speed-wise compared to C-libraries
//! like zlib and miniz.
//!
//! The deflate algorithm is an older compression algorithm that is still widely used today,
//! by e.g html headers, the `.png` inage format, the unix `gzip` program and commonly in `.zip`
//! files. The `zlib` and `gzip` formats are wrappers around DEFLATE-compressed data, containing
//! some extra metadata and a checksum to validate the integrity of the raw data.
//!
//! The deflate algorithm does not perform as well as newer algorhitms used in file formats such as
//! `.7z`, `.rar`, `.xz` and `.bz2`, and is thus not the ideal choice for applications where
//! the `DEFLATE` format (with or without wrappers) is not required.
//!
//! Support for the gzip wrapper (the wrapper that is used in `.gz` files) is disabled by default,
//! but can be enabled with the `gzip` feature.
//!
//! As this library is still in development, the compression output may change slightly
//! between versions.
//!
//!
//! # Examples:
//! ## Simple compression function:
//! ``` rust
//! use deflate::deflate_bytes;
//!
//! let data = b"Some data";
//! let compressed = deflate_bytes(data);
//! # let _ = compressed;
//! ```
//!
//! ## Using a writer:
//! ``` rust
//! use std::io::Write;
//!
//! use deflate::Compression;
//! use deflate::write::ZlibEncoder;
//!
//! let data = b"This is some test data";
//! let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default);
//! encoder.write_all(data).expect("Write error!");
//! let compressed_data = encoder.finish().expect("Failed to finish compression!");
//! # let _ = compressed_data;
//! ```

#![cfg_attr(all(feature = "benchmarks", test), feature(test))]

#[cfg(all(test, feature = "benchmarks"))]
extern crate test as test_std;

#[cfg(test)]
extern crate flate2;
// #[cfg(test)]
// extern crate inflate;

extern crate adler32;
extern crate byteorder;
#[cfg(feature = "gzip")]
extern crate gzip_header;

mod compression_options;
mod huffman_table;
mod lz77;
mod lzvalue;
mod chained_hash_table;
mod length_encode;
mod output_writer;
mod stored_block;
mod huffman_lengths;
mod zlib;
mod checksum;
mod bit_reverse;
mod bitstream;
mod encoder_state;
mod matching;
mod input_buffer;
mod deflate_state;
mod compress;
mod rle;
mod writer;
#[cfg(test)]
mod test_utils;

use std::io::Write;
use std::io;

use byteorder::BigEndian;
#[cfg(feature = "gzip")]
use gzip_header::GzBuilder;
#[cfg(feature = "gzip")]
use gzip_header::Crc;
#[cfg(feature = "gzip")]
use byteorder::LittleEndian;

use checksum::RollingChecksum;
use deflate_state::DeflateState;

pub use compression_options::{CompressionOptions, SpecialOptions, Compression};
use compress::Flush;
pub use lz77::MatchingType;

use writer::compress_until_done;

/// Encoders implementing a `Write` interface.
pub mod write {
    pub use writer::{DeflateEncoder, ZlibEncoder};
    #[cfg(feature = "gzip")]
    pub use writer::gzip::GzEncoder;
}


fn compress_data_dynamic<RC: RollingChecksum, W: Write>(
    input: &[u8],
    writer: &mut W,
    mut checksum: RC,
    compression_options: CompressionOptions,
) -> io::Result<()> {
    checksum.update_from_slice(input);
    // We use a box here to avoid putting the buffers on the stack
    // It's done here rather than in the structs themselves for now to
    // keep the data close in memory.
    let mut deflate_state = Box::new(DeflateState::new(compression_options, writer));
    compress_until_done(input, &mut deflate_state, Flush::Finish)
}

/// Compress the given slice of bytes with DEFLATE compression.
///
/// Returns a `Vec<u8>` of the compressed data.
///
/// # Examples
///
/// ```
/// use deflate::{deflate_bytes_conf, Compression};
///
/// let data = b"This is some test data";
/// let compressed_data = deflate_bytes_conf(data, Compression::Best);
/// # let _ = compressed_data;
/// ```
pub fn deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
    let mut writer = Vec::with_capacity(input.len() / 3);
    compress_data_dynamic(
        input,
        &mut writer,
        checksum::NoChecksum::new(),
        options.into(),
    ).expect("Write error!");
    writer
}

/// Compress the given slice of bytes with DEFLATE compression using the default compression
/// level.
///
/// Returns a `Vec<u8>` of the compressed data.
///
/// # Examples
///
/// ```
/// use deflate::deflate_bytes;
///
/// let data = b"This is some test data";
/// let compressed_data = deflate_bytes(data);
/// # let _ = compressed_data;
/// ```
pub fn deflate_bytes(input: &[u8]) -> Vec<u8> {
    deflate_bytes_conf(input, Compression::Default)
}

/// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer.
///
/// Returns a `Vec<u8>` of the compressed data.
///
/// Zlib dictionaries are not yet suppored.
///
/// # Examples
///
/// ```
/// use deflate::{deflate_bytes_zlib_conf, Compression};
///
/// let data = b"This is some test data";
/// let compressed_data = deflate_bytes_zlib_conf(data, Compression::Best);
/// # let _ = compressed_data;
/// ```
pub fn deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
    use byteorder::WriteBytesExt;
    let mut writer = Vec::with_capacity(input.len() / 3);
    // Write header
    zlib::write_zlib_header(&mut writer, zlib::CompressionLevel::Default)
        .expect("Write error when writing zlib header!");

    let mut checksum = checksum::Adler32Checksum::new();
    compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
        .expect("Write error when writing compressed data!");

    let hash = checksum.current_hash();

    writer
        .write_u32::<BigEndian>(hash)
        .expect("Write error when writing checksum!");
    writer
}

/// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer,
/// using the default compression level.
///
/// Returns a Vec<u8> of the compressed data.
///
/// Zlib dictionaries are not yet suppored.
///
/// # Examples
///
/// ```
/// use deflate::deflate_bytes_zlib;
///
/// let data = b"This is some test data";
/// let compressed_data = deflate_bytes_zlib(data);
/// # let _ = compressed_data;
/// ```
pub fn deflate_bytes_zlib(input: &[u8]) -> Vec<u8> {
    deflate_bytes_zlib_conf(input, Compression::Default)
}

/// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer
/// using the given gzip header and compression options.
///
/// Returns a `Vec<u8>` of the compressed data.
///
///
/// # Examples
///
/// ```
/// extern crate gzip_header;
/// extern crate deflate;
///
/// # fn main() {
/// use deflate::{deflate_bytes_gzip_conf, Compression};
/// use gzip_header::GzBuilder;
///
/// let data = b"This is some test data";
/// let compressed_data = deflate_bytes_gzip_conf(data, Compression::Best, GzBuilder::new());
/// # let _ = compressed_data;
/// # }
/// ```
#[cfg(feature = "gzip")]
pub fn deflate_bytes_gzip_conf<O: Into<CompressionOptions>>(
    input: &[u8],
    options: O,
    gzip_header: GzBuilder,
) -> Vec<u8> {
    use byteorder::WriteBytesExt;
    let mut writer = Vec::with_capacity(input.len() / 3);

    // Write header
    writer
        .write_all(&gzip_header.into_header())
        .expect("Write error when writing header!");
    let mut checksum = checksum::NoChecksum::new();
    compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
        .expect("Write error when writing compressed data!");

    let mut crc = Crc::new();
    crc.update(input);

    writer
        .write_u32::<LittleEndian>(crc.sum())
        .expect("Write error when writing checksum!");
    writer
        .write_u32::<LittleEndian>(crc.amt_as_u32())
        .expect("Write error when writing amt!");
    writer
}

/// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer,
/// using the default compression level, and a gzip header with default values.
///
/// Returns a `Vec<u8>` of the compressed data.
///
///
/// # Examples
///
/// ```
/// use deflate::deflate_bytes_gzip;
/// let data = b"This is some test data";
/// let compressed_data = deflate_bytes_gzip(data);
/// # let _ = compressed_data;
/// ```
#[cfg(feature = "gzip")]
pub fn deflate_bytes_gzip(input: &[u8]) -> Vec<u8> {
    deflate_bytes_gzip_conf(input, Compression::Default, GzBuilder::new())
}

#[cfg(test)]
mod test {
    use super::*;
    use std::io::Write;

    use test_utils::{get_test_data, decompress_to_end, decompress_zlib};
    #[cfg(feature = "gzip")]
    use test_utils::decompress_gzip;

    type CO = CompressionOptions;

    /// Write data to the writer in chunks of chunk_size.
    fn chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize) {
        for chunk in data.chunks(chunk_size) {
            writer.write_all(&chunk).unwrap();
        }
    }

    #[test]
    fn dynamic_string_mem() {
        let test_data = String::from("                    GNU GENERAL PUBLIC LICENSE").into_bytes();
        let compressed = deflate_bytes(&test_data);

        assert!(compressed.len() < test_data.len());

        let result = decompress_to_end(&compressed);
        assert_eq!(test_data, result);
    }

    #[test]
    fn dynamic_string_file() {
        let input = get_test_data();
        let compressed = deflate_bytes(&input);

        let result = decompress_to_end(&compressed);
        for (n, (&a, &b)) in input.iter().zip(result.iter()).enumerate() {
            if a != b {
                println!("First difference at {}, input: {}, output: {}", n, a, b);
                println!(
                    "input: {:?}, output: {:?}",
                    &input[n - 3..n + 3],
                    &result[n - 3..n + 3]
                );
                break;
            }
        }
        // Not using assert_eq here deliberately to avoid massive amounts of output spam
        assert!(input == result);
        // Check that we actually managed to compress the input
        assert!(compressed.len() < input.len());
    }

    #[test]
    fn file_rle() {
        let input = get_test_data();
        let compressed = deflate_bytes_conf(&input, CO::rle());

        let result = decompress_to_end(&compressed);
        assert!(input == result);
    }

    #[test]
    fn file_zlib() {
        let test_data = get_test_data();

        let compressed = deflate_bytes_zlib(&test_data);
        // {
        //     use std::fs::File;
        //     use std::io::Write;
        //     let mut f = File::create("out.zlib").unwrap();
        //     f.write_all(&compressed).unwrap();
        // }

        println!("file_zlib compressed(default) length: {}", compressed.len());

        let result = decompress_zlib(&compressed);

        assert!(&test_data == &result);
        assert!(compressed.len() < test_data.len());
    }

    #[test]
    fn zlib_short() {
        let test_data = [10, 10, 10, 10, 10, 55];
        roundtrip_zlib(&test_data, CO::default());
    }

    #[test]
    fn zlib_last_block() {
        let mut test_data = vec![22; 32768];
        test_data.extend(&[5, 2, 55, 11, 12]);
        roundtrip_zlib(&test_data, CO::default());
    }

    #[test]
    fn deflate_short() {
        let test_data = [10, 10, 10, 10, 10, 55];
        let compressed = deflate_bytes(&test_data);

        let result = decompress_to_end(&compressed);
        assert_eq!(&test_data, result.as_slice());
        // If block type and compression is selected correctly, this should only take 5 bytes.
        assert_eq!(compressed.len(), 5);
    }

    #[cfg(feature = "gzip")]
    #[test]
    fn gzip() {
        let data = get_test_data();
        let comment = b"Test";
        let compressed = deflate_bytes_gzip_conf(
            &data,
            Compression::Default,
            GzBuilder::new().comment(&comment[..]),
        );
        let (dec, decompressed) = decompress_gzip(&compressed);
        assert_eq!(dec.header().comment().unwrap(), comment);
        assert!(data == decompressed);
    }

    fn chunk_test(chunk_size: usize, level: CompressionOptions) {
        let mut compressed = Vec::with_capacity(32000);
        let data = get_test_data();
        {
            let mut compressor = write::ZlibEncoder::new(&mut compressed, level);
            chunked_write(&mut compressor, &data, chunk_size);
            compressor.finish().unwrap();
        }
        let compressed2 = deflate_bytes_zlib_conf(&data, level);
        let res = decompress_zlib(&compressed);
        assert!(res == data);
        assert_eq!(compressed.len(), compressed2.len());
        assert!(compressed == compressed2);
    }

    fn writer_chunks_level(level: CompressionOptions) {
        use input_buffer::BUFFER_SIZE;
        let ct = |n| chunk_test(n, level);
        ct(1);
        ct(50);
        ct(400);
        ct(32768);
        ct(BUFFER_SIZE);
        ct(50000);
        ct((32768 * 2) + 258);
    }

    #[ignore]
    #[test]
    /// Test the writer by inputing data in one chunk at the time.
    fn zlib_writer_chunks() {
        writer_chunks_level(CompressionOptions::default());
        writer_chunks_level(CompressionOptions::fast());
        writer_chunks_level(CompressionOptions::rle());
    }

    /// Check that the frequency values don't overflow.
    #[test]
    fn frequency_overflow() {
        let _ = deflate_bytes_conf(
            &vec![5; 100000],
            compression_options::CompressionOptions::default(),
        );
    }

    fn roundtrip_zlib(data: &[u8], level: CompressionOptions) {
        let compressed = deflate_bytes_zlib_conf(data, level);
        let res = decompress_zlib(&compressed);
        if data.len() <= 32 {
            assert_eq!(res, data, "Failed with level: {:?}", level);
        } else {
            assert!(res == data, "Failed with level: {:?}", level);
        }
    }

    fn check_zero(level: CompressionOptions) {
        roundtrip_zlib(&[], level);
    }

    /// Compress with an empty slice.
    #[test]
    fn empty_input() {
        check_zero(CompressionOptions::default());
        check_zero(CompressionOptions::fast());
        check_zero(CompressionOptions::rle());
    }

    #[test]
    fn one_and_two_values() {
        let one = &[1][..];
        roundtrip_zlib(one, CO::rle());
        roundtrip_zlib(one, CO::fast());
        roundtrip_zlib(one, CO::default());
        let two = &[5, 6, 7, 8][..];
        roundtrip_zlib(two, CO::rle());
        roundtrip_zlib(two, CO::fast());
        roundtrip_zlib(two, CO::default());
    }


}