mod fast;
mod fastest;
pub(crate) mod match_length;
mod optimal;
pub(crate) mod prefix_search;
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
use crate::data::compression::fast::encode;
use crate::data::compression::optimal::{HASH_CHAINING_LEVELS, encode_slice_hc};
#[cfg(test)]
use crate::data::compression::prefix_search::hash_chain::HashChainPrefixSearcher;
use crate::data::compression::prefix_search::multi_level_hash_chain::MultiLevelPrefixSearcher;
use crate::data::control::{
LONG_LENGTH_MAX,
LONG_LENGTH_MIN,
LONG_OFFSET_MAX,
MEDIUM_LENGTH_MAX,
MEDIUM_LENGTH_MIN,
MEDIUM_OFFSET_MAX,
SHORT_LENGTH_MAX,
SHORT_OFFSET_MAX,
};
use crate::format::Format;
use crate::header::Header;
use crate::header::mode::Mode as HeaderMode;
use crate::{RefPackError, RefPackResult};
fn bytes_for_match(length: usize, offset: usize) -> Option<(Option<usize>, usize)> {
if offset > LONG_OFFSET_MAX as usize {
return None;
}
if length >= LONG_LENGTH_MIN as usize {
if length > MEDIUM_LENGTH_MAX as usize || offset > MEDIUM_OFFSET_MAX as usize {
Some((Some(4), LONG_LENGTH_MAX as usize))
} else if length > SHORT_LENGTH_MAX as usize || offset > SHORT_OFFSET_MAX as usize {
Some((Some(3), MEDIUM_LENGTH_MAX as usize))
} else {
Some((Some(2), SHORT_LENGTH_MAX as usize))
}
} else if offset <= SHORT_OFFSET_MAX as usize {
Some((Some(2), SHORT_LENGTH_MAX as usize))
} else if offset <= MEDIUM_OFFSET_MAX as usize {
if length >= MEDIUM_LENGTH_MIN as usize {
Some((Some(3), MEDIUM_LENGTH_MAX as usize))
} else {
Some((None, MEDIUM_LENGTH_MIN as usize - 1))
}
} else {
Some((None, LONG_LENGTH_MIN as usize - 1))
}
}
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Default)]
#[non_exhaustive]
#[cfg_attr(test, derive(test_strategy::Arbitrary))]
pub enum CompressionOptions {
Fastest,
#[default]
Fast,
Optimal,
#[cfg(test)]
OptimalReference,
}
pub fn compress<F: Format>(
length: usize,
reader: &mut (impl Read + Seek),
writer: &mut (impl Write + Seek),
compression_options: CompressionOptions,
) -> RefPackResult<()> {
let mut buf = vec![0; length];
reader.read_exact(buf.as_mut_slice())?;
let out = easy_compress::<F>(&buf, compression_options)?;
writer.write_all(&out)?;
Ok(())
}
#[inline]
pub fn easy_compress<F: Format>(
input: &[u8],
compression_options: CompressionOptions,
) -> Result<Vec<u8>, RefPackError> {
let mut writer: Cursor<Vec<u8>> = Cursor::new(vec![]);
let length = input.len();
if input.is_empty() {
return Err(RefPackError::EmptyInput);
}
let controls = match compression_options {
CompressionOptions::Fastest => fastest::encode(input),
CompressionOptions::Fast => encode(input),
CompressionOptions::Optimal => {
encode_slice_hc::<MultiLevelPrefixSearcher<{ HASH_CHAINING_LEVELS }>>(input)
}
#[cfg(test)]
CompressionOptions::OptimalReference => encode_slice_hc::<HashChainPrefixSearcher>(input),
};
let header_length = F::HeaderMode::length(length);
let header_position = writer.stream_position()?;
writer.seek(SeekFrom::Current(header_length as i64))?;
for control in controls {
control.write(&mut writer)?;
}
let data_end_pos = writer.stream_position()?;
let compression_length = data_end_pos - header_position;
let header = Header {
compressed_length: Some(compression_length as u32),
decompressed_length: length as u32,
};
writer.seek(SeekFrom::Start(header_position))?;
header.write::<F::HeaderMode>(&mut writer)?;
Ok(writer.into_inner())
}
#[cfg(test)]
mod test {
use proptest::prelude::*;
use test_strategy::proptest;
use super::*;
use crate::format::{Maxis, Reference};
#[proptest]
#[ignore]
fn large_input_compression(
#[strategy(proptest::collection::vec(any::< u8 > (), 100_000..=500_000))] input: Vec<u8>,
#[strategy(any::<CompressionOptions>())] options: CompressionOptions,
) {
let _unused = easy_compress::<Reference>(&input, options).unwrap();
}
#[proptest]
fn reader_compression(
#[strategy(proptest::collection::vec(any::< u8 > (), 1..=256))] input: Vec<u8>,
#[strategy(any::<CompressionOptions>())] options: CompressionOptions,
) {
let length = input.len();
let mut in_cursor = Cursor::new(input);
let mut out_cursor = Cursor::new(Vec::new());
compress::<Reference>(length, &mut in_cursor, &mut out_cursor, options).unwrap();
}
#[test]
fn empty_input_yields_error() {
let input = vec![];
let result = easy_compress::<Reference>(&input, CompressionOptions::Fast);
assert!(result.is_err());
assert!(matches!(result.unwrap_err(), RefPackError::EmptyInput));
}
#[proptest]
#[ignore]
fn optimal_matches_reference(
#[strategy(proptest::collection::vec(0..=3u8, 1..=1_000_000))] input: Vec<u8>,
) {
println!("in: {}", input.len());
let compressed_reference =
easy_compress::<Reference>(&input, CompressionOptions::OptimalReference)?;
let compressed_optimal = easy_compress::<Reference>(&input, CompressionOptions::Optimal)?;
println!(
"out ref: {} opt: {}",
compressed_reference.len(),
compressed_optimal.len()
);
prop_assert_eq!(
&compressed_reference,
&compressed_optimal,
"Optimal compression should match the reference implementation."
);
}
#[proptest]
fn compressed_size_includes_header(
#[strategy(proptest::collection::vec(0..=3u8, 1..=1_000))] input: Vec<u8>,
) {
let compressed = easy_compress::<Maxis>(&input, CompressionOptions::Fastest)?;
let compressed_total_bytes = compressed.len();
let compressed_size_header =
u32::from_le_bytes(compressed[..4].try_into().unwrap()) as usize;
prop_assert_eq!(
compressed_total_bytes,
compressed_size_header,
"The compression header compressed size field should include the size of the header \
itself."
);
}
}