#![cfg(feature = "lzma")]
use compcol::lzma::{Decoder, Encoder, EncoderConfig, Lzma};
use compcol::{Algorithm, Decoder as _, Encoder as _, Error, Status};
fn hex(s: &str) -> Vec<u8> {
let s: String = s.chars().filter(|c| !c.is_whitespace()).collect();
(0..s.len())
.step_by(2)
.map(|i| u8::from_str_radix(&s[i..i + 2], 16).unwrap())
.collect()
}
fn decode_one_shot(compressed: &[u8]) -> Result<Vec<u8>, Error> {
decode_chunked(compressed, compressed.len().max(1), 65536)
}
fn decode_chunked(compressed: &[u8], in_chunk: usize, out_chunk: usize) -> Result<Vec<u8>, Error> {
let mut dec = Decoder::new();
decode_chunked_with(&mut dec, compressed, in_chunk, out_chunk)
}
fn decode_chunked_with(
dec: &mut Decoder,
compressed: &[u8],
in_chunk: usize,
out_chunk: usize,
) -> Result<Vec<u8>, Error> {
let mut out = Vec::new();
let mut buf = vec![0u8; out_chunk.max(1)];
let mut i = 0;
while i < compressed.len() {
let end = (i + in_chunk).min(compressed.len());
let chunk = &compressed[i..end];
let mut consumed = 0;
while consumed < chunk.len() {
let (p, status) = dec.decode(&chunk[consumed..], &mut buf)?;
out.extend_from_slice(&buf[..p.written]);
consumed += p.consumed;
match status {
Status::StreamEnd => return Ok(out),
Status::InputEmpty => break,
Status::OutputFull => continue,
}
}
i = end;
}
loop {
let (p, status) = dec.decode(&[], &mut buf)?;
out.extend_from_slice(&buf[..p.written]);
if matches!(status, Status::StreamEnd) {
return Ok(out);
}
if p.written == 0 {
break;
}
}
loop {
let (p, status) = dec.finish(&mut buf)?;
out.extend_from_slice(&buf[..p.written]);
match status {
Status::StreamEnd => break,
Status::OutputFull | Status::InputEmpty => {
if p.written == 0 {
panic!("decoder finish stalled");
}
}
}
}
Ok(out)
}
const FIX_EMPTY: &str = "5d00008000ffffffffffffffff0083fffbffffc0000000";
const FIX_HELLO: &str = "5d00008000ffffffffffffffff00341949ee8de917893a336005f7cf64fffb782000";
const FIX_REP4K: &str =
"5d00008000ffffffffffffffff0020effbbffea3b15ee5f83fb2aa2655f868704170150ee40930ffffb52c0000";
#[test]
fn decode_empty() {
let out = decode_one_shot(&hex(FIX_EMPTY)).unwrap();
assert!(
out.is_empty(),
"empty fixture decoded to {} bytes",
out.len()
);
}
#[test]
fn decode_hello_world() {
let out = decode_one_shot(&hex(FIX_HELLO)).unwrap();
assert_eq!(out, b"hello world");
}
#[test]
fn decode_hello_world_chunked() {
let stream = hex(FIX_HELLO);
for in_chunk in [1, 2, 3, 5, 8, 16] {
let out = decode_chunked(&stream, in_chunk, 7).unwrap();
assert_eq!(out, b"hello world", "in_chunk={in_chunk}");
}
}
#[test]
fn decode_4kib_repeating_bytes() {
let out = decode_one_shot(&hex(FIX_REP4K)).unwrap();
assert_eq!(out.len(), 4096);
assert!(out.iter().all(|&b| b == b'A'));
}
#[test]
fn decode_4kib_chunked_tiny_output() {
let stream = hex(FIX_REP4K);
let out = decode_chunked(&stream, 7, 13).unwrap();
assert_eq!(out.len(), 4096);
assert!(out.iter().all(|&b| b == b'A'));
}
#[test]
fn decode_lorem_16kib() {
let fix = concat!(
"5d00008000ffffffffffffffff00261bca46675af277b87d86d841db0535cd",
"83a57c12a505db90bd2f14d3717296a88a7d8456718d6a2298ab9e3dc355ef",
"cca5c3dd5b8ebf03812140d6269102454f92a178bb8a00af902a26920223e5",
"5cb32de3e85c2cfb3221c66f6a37b16620cdb7527d66a42108d1441495affc",
"58cfe5db354c05b89327ad7fe5fcbd0afbe2eda9e4d660d61c60112bf411e2",
"9134c192bd8d4ac7c3c84aef9b3dda35640dd2db8ac9fd8cacc0",
);
let out = decode_one_shot(&hex(fix)).unwrap();
let lorem_chunk = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. ";
let expected: Vec<u8> = lorem_chunk
.repeat(200)
.into_bytes()
.into_iter()
.take(16384)
.collect();
assert_eq!(out.len(), 16384);
assert_eq!(out, expected);
}
#[test]
fn decode_known_uncompressed_size_header() {
let mut stream = hex(FIX_HELLO);
stream[5..13].copy_from_slice(&11u64.to_le_bytes());
let out = decode_one_shot(&stream).unwrap();
assert_eq!(out, b"hello world");
}
#[test]
fn bad_header_props_rejected() {
let mut stream = hex(FIX_HELLO);
stream[0] = 0xFF;
let mut dec = Decoder::new();
let mut buf = [0u8; 64];
let err = dec.decode(&stream, &mut buf).unwrap_err();
assert_eq!(err, Error::BadHeader);
}
#[test]
fn corrupt_first_init_byte_rejected() {
let mut stream = hex(FIX_HELLO);
stream[13] = 0x01;
let mut dec = Decoder::new();
let mut buf = [0u8; 64];
let err = dec.decode(&stream, &mut buf).unwrap_err();
assert_eq!(err, Error::Corrupt);
}
#[test]
fn unexpected_eof_on_finish() {
let stream = hex(FIX_HELLO);
let truncated = &stream[..stream.len() - 4]; let mut dec = Decoder::new();
let mut buf = vec![0u8; 64];
let _ = dec.decode(truncated, &mut buf).unwrap();
let err = dec.finish(&mut buf).unwrap_err();
assert_eq!(err, Error::UnexpectedEnd);
}
#[test]
fn name_is_lzma() {
assert_eq!(<Lzma as Algorithm>::NAME, "lzma");
}
#[test]
fn default_config_is_level_6() {
assert_eq!(EncoderConfig::default().level, 6);
}
fn encode_one_shot(payload: &[u8]) -> Vec<u8> {
let mut enc = Encoder::new();
encode_with(&mut enc, payload)
}
fn encode_at_level(payload: &[u8], level: u8) -> Vec<u8> {
let mut enc = Encoder::with_config(EncoderConfig { level });
encode_with(&mut enc, payload)
}
fn encode_with(enc: &mut Encoder, payload: &[u8]) -> Vec<u8> {
let mut scratch = [0u8; 64];
let mut consumed = 0;
while consumed < payload.len() {
let (p, status) = enc.encode(&payload[consumed..], &mut scratch).unwrap();
consumed += p.consumed;
assert_eq!(p.written, 0);
match status {
Status::InputEmpty | Status::StreamEnd => break,
Status::OutputFull => {
if p.consumed == 0 {
panic!("encoder stalled mid-input");
}
}
}
}
let mut out = Vec::new();
let mut buf = vec![0u8; 4096];
loop {
let (p, status) = enc.finish(&mut buf).unwrap();
out.extend_from_slice(&buf[..p.written]);
match status {
Status::StreamEnd => break,
Status::OutputFull | Status::InputEmpty => {
if p.written == 0 {
panic!("encoder finish stalled");
}
}
}
}
out
}
fn round_trip(payload: &[u8]) {
let compressed = encode_one_shot(payload);
let recovered = decode_one_shot(&compressed).expect("decoding our own output failed");
assert_eq!(
recovered,
payload,
"round-trip mismatch (input len {})",
payload.len()
);
}
#[test]
fn encode_empty_round_trip() {
let compressed = encode_one_shot(b"");
assert!(
compressed.len() >= 13,
"encoder must always emit a header, got {} bytes",
compressed.len()
);
assert_eq!(
compressed[0], 0x5d,
"props byte = (pb=2)*5*9 + (lp=0)*9 + (lc=3)"
);
for &b in &compressed[5..13] {
assert_eq!(b, 0xFF);
}
let recovered = decode_one_shot(&compressed).unwrap();
assert!(recovered.is_empty());
}
#[test]
fn encode_single_byte_round_trip() {
for b in [0u8, 1, 0x7F, 0xFE, 0xFF, b'A'] {
let compressed = encode_one_shot(&[b]);
let recovered = decode_one_shot(&compressed).unwrap();
assert_eq!(recovered, vec![b], "byte 0x{:02x}", b);
}
}
#[test]
fn encode_hello_world_round_trip() {
round_trip(b"hello world");
}
#[test]
fn encode_small_text_round_trip() {
round_trip(b"hello world! hello world! hello world!");
}
#[test]
fn encode_4kib_repeating_byte_round_trip() {
let payload = vec![b'A'; 4096];
let compressed = encode_one_shot(&payload);
assert!(
compressed.len() < 100,
"expected strong compression on repeating byte, got {} bytes",
compressed.len()
);
let recovered = decode_one_shot(&compressed).unwrap();
assert_eq!(recovered, payload);
}
#[test]
fn encode_byte_value_coverage() {
let payload: Vec<u8> = (0u8..=255).collect();
round_trip(&payload);
}
#[test]
fn encode_streaming_one_byte_chunks_round_trip() {
let payload = b"The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.";
let mut enc = Encoder::new();
let mut scratch = [0u8; 4];
for byte in payload {
let (p, _status) = enc
.encode(core::slice::from_ref(byte), &mut scratch)
.unwrap();
assert_eq!(p.consumed, 1);
assert_eq!(p.written, 0);
}
let mut compressed = Vec::new();
let mut buf = [0u8; 1];
loop {
let (p, status) = enc.finish(&mut buf).unwrap();
compressed.extend_from_slice(&buf[..p.written]);
match status {
Status::StreamEnd => break,
Status::OutputFull | Status::InputEmpty => {
if p.written == 0 {
panic!("encoder finish stalled in single-byte streaming mode");
}
}
}
}
let recovered = decode_chunked(&compressed, 1, 1).unwrap();
assert_eq!(recovered, payload);
}
fn mixed_corpus() -> Vec<u8> {
let mut state: u32 = 0xC0FFEE_u32;
let mut out = Vec::with_capacity(80 * 1024);
let alphabet = b"abcdef";
let phrases: &[&[u8]] = &[
b"the_quick_brown_fox_jumps_over_the_lazy_dog_xxxxxxxxxxxxxxxxxxxxxxxx",
b"lorem_ipsum_dolor_sit_amet_consectetur_adipiscing_elit_yyyyyyyyyyyyyy",
b"compcol_streaming_codec_test_corpus_for_level_differentiation_zzzzz",
];
let mut phrase_idx = 0usize;
while out.len() < 64 * 1024 {
for _ in 0..64 {
state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
out.push(alphabet[(state as usize) % alphabet.len()]);
}
out.extend_from_slice(phrases[phrase_idx % phrases.len()]);
phrase_idx += 1;
}
out
}
#[test]
fn round_trip_mixed_corpus_default_level() {
let input = mixed_corpus();
assert!(input.len() >= 64 * 1024);
round_trip(&input);
}
fn lorem_corpus(min_len: usize) -> Vec<u8> {
let chunk = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. ";
let mut out = Vec::with_capacity(min_len + chunk.len());
while out.len() < min_len {
out.extend_from_slice(chunk.as_bytes());
}
out
}
#[test]
fn round_trip_level_1() {
for input in [
&b""[..],
b"hello world",
&b"abcabcabcabcabc".repeat(100)[..],
] {
let compressed = encode_at_level(input, 1);
let recovered = decode_one_shot(&compressed).unwrap();
assert_eq!(recovered, input);
}
}
#[test]
fn round_trip_level_9() {
for input in [
&b""[..],
b"hello world",
&b"abcabcabcabcabc".repeat(100)[..],
] {
let compressed = encode_at_level(input, 9);
let recovered = decode_one_shot(&compressed).unwrap();
assert_eq!(recovered, input);
}
}
#[test]
fn level_9_no_worse_than_level_1_on_compressible_corpus() {
let input = lorem_corpus(16 * 1024);
let lo = encode_at_level(&input, 1);
let hi = encode_at_level(&input, 9);
assert!(
hi.len() <= lo.len(),
"level 9 ({} bytes) was bigger than level 1 ({} bytes)",
hi.len(),
lo.len(),
);
assert_eq!(decode_one_shot(&lo).unwrap(), input);
assert_eq!(decode_one_shot(&hi).unwrap(), input);
}
#[test]
fn out_of_range_level_is_clamped() {
let input = b"the rain in spain falls mainly on the plain";
let compressed = encode_at_level(input, 250);
assert_eq!(decode_one_shot(&compressed).unwrap(), input);
}
#[test]
fn reset_preserves_level_and_allows_reuse() {
let input_a = b"alpha alpha alpha alpha alpha".as_slice();
let input_b = b"bravo bravo bravo bravo bravo".as_slice();
let mut enc = Encoder::with_config(EncoderConfig { level: 9 });
let encoded_a = encode_with(&mut enc, input_a);
enc.reset();
let encoded_b = encode_with(&mut enc, input_b);
assert_eq!(decode_one_shot(&encoded_a).unwrap(), input_a);
assert_eq!(decode_one_shot(&encoded_b).unwrap(), input_b);
let mut fresh = Encoder::with_config(EncoderConfig { level: 9 });
let fresh_b = encode_with(&mut fresh, input_b);
assert_eq!(encoded_b, fresh_b, "reset must preserve compression level");
}
#[test]
fn decoder_reset_allows_reuse() {
let encoded_a = encode_one_shot(b"hello");
let encoded_b = encode_one_shot(b"world");
let mut dec = Decoder::new();
assert_eq!(
decode_chunked_with(&mut dec, &encoded_a, 64, 64).unwrap(),
b"hello"
);
dec.reset();
assert_eq!(
decode_chunked_with(&mut dec, &encoded_b, 64, 64).unwrap(),
b"world"
);
}
#[test]
fn algorithm_encoder_decoder_round_trip() {
let mut enc = <Lzma as Algorithm>::encoder();
let input = b"compcol Algorithm trait roundtrip!";
let mut encoded = Vec::new();
let mut buf = vec![0u8; 256];
let mut consumed = 0;
while consumed < input.len() {
let (p, status) = enc.encode(&input[consumed..], &mut buf).unwrap();
encoded.extend_from_slice(&buf[..p.written]);
consumed += p.consumed;
if matches!(status, Status::InputEmpty) {
break;
}
}
loop {
let (p, status) = enc.finish(&mut buf).unwrap();
encoded.extend_from_slice(&buf[..p.written]);
if matches!(status, Status::StreamEnd) {
break;
}
}
let mut dec = <Lzma as Algorithm>::decoder();
let decoded = decode_chunked_with(&mut dec, &encoded, encoded.len(), 256).unwrap();
assert_eq!(decoded, input);
}
#[test]
fn algorithm_encoder_with_uses_config() {
let input = lorem_corpus(16 * 1024);
let mut enc_lo = <Lzma as Algorithm>::encoder_with(EncoderConfig { level: 1 });
let mut enc_hi = <Lzma as Algorithm>::encoder_with(EncoderConfig { level: 9 });
let lo = encode_with(&mut enc_lo, &input);
let hi = encode_with(&mut enc_hi, &input);
assert!(
hi.len() <= lo.len(),
"encoder_with(level=9) was bigger than encoder_with(level=1): hi={} lo={}",
hi.len(),
lo.len(),
);
assert_eq!(decode_one_shot(&lo).unwrap(), input);
assert_eq!(decode_one_shot(&hi).unwrap(), input);
}
#[cfg(feature = "factory")]
mod factory {
use compcol::Status;
use compcol::factory;
#[test]
fn lookup_known() {
assert!(factory::encoder_by_name("lzma").is_some());
assert!(factory::decoder_by_name("lzma").is_some());
}
#[test]
fn names_contains_lzma() {
assert!(factory::names().contains(&"lzma"));
}
#[test]
fn boxed_round_trip() {
let mut enc = factory::encoder_by_name("lzma").unwrap();
let mut dec = factory::decoder_by_name("lzma").unwrap();
let input = b"hello hello hello world world world!";
let mut encoded = Vec::new();
let mut buf = vec![0u8; 256];
let mut consumed = 0;
while consumed < input.len() {
let (p, status) = enc.encode(&input[consumed..], &mut buf).unwrap();
encoded.extend_from_slice(&buf[..p.written]);
consumed += p.consumed;
if matches!(status, Status::InputEmpty) {
break;
}
}
loop {
let (p, status) = enc.finish(&mut buf).unwrap();
encoded.extend_from_slice(&buf[..p.written]);
if matches!(status, Status::StreamEnd) {
break;
}
}
let mut decoded = Vec::new();
let mut consumed = 0;
while consumed < encoded.len() {
let (p, status) = dec.decode(&encoded[consumed..], &mut buf).unwrap();
decoded.extend_from_slice(&buf[..p.written]);
consumed += p.consumed;
if matches!(status, Status::StreamEnd) {
break;
}
if matches!(status, Status::InputEmpty) {
break;
}
}
loop {
let (p, status) = dec.decode(&[], &mut buf).unwrap();
decoded.extend_from_slice(&buf[..p.written]);
if matches!(status, Status::StreamEnd) {
break;
}
if p.written == 0 {
break;
}
}
loop {
let (p, status) = dec.finish(&mut buf).unwrap();
decoded.extend_from_slice(&buf[..p.written]);
if matches!(status, Status::StreamEnd) {
break;
}
if p.written == 0 {
panic!("decoder finish stalled");
}
}
assert_eq!(&decoded[..], input);
}
}
#[test]
fn xz_format_lzma_round_trips_via_vec_helper() {
const FIXTURE: &[u8] = &[
0x5d, 0x00, 0x00, 0x80, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x34,
0x19, 0x49, 0xee, 0x8d, 0xe9, 0x14, 0x8a, 0x6a, 0xa5, 0xd6, 0xb6, 0x11, 0x0a, 0xd7, 0x39,
0x16, 0x6a, 0x19, 0x15, 0x45, 0xff, 0xfe, 0x66, 0xec, 0x00,
];
let decoded = compcol::vec::decompress_to_vec::<compcol::lzma::Lzma>(FIXTURE).unwrap();
assert_eq!(decoded, b"hello lzma alone\n");
}
#[test]
fn limited_decoder_at_exact_budget_terminates_cleanly() {
use compcol::Algorithm;
use compcol::limit::LimitedDecoder;
let original = vec![b'A'; 65536];
let compressed = compcol::vec::compress_to_vec::<compcol::lzma::Lzma>(&original).unwrap();
let mut dec = LimitedDecoder::new(compcol::lzma::Lzma::decoder(), original.len() as u64);
let mut buf = vec![0u8; 4096];
let mut decoded = Vec::new();
let mut consumed = 0;
while consumed < compressed.len() {
let (p, s) = dec.decode(&compressed[consumed..], &mut buf).unwrap();
decoded.extend_from_slice(&buf[..p.written]);
consumed += p.consumed;
if matches!(s, compcol::Status::StreamEnd) {
break;
}
if matches!(s, compcol::Status::InputEmpty) && consumed == compressed.len() {
break;
}
if p.consumed == 0 && p.written == 0 {
break;
}
}
loop {
let (p, s) = dec.finish(&mut buf).unwrap();
decoded.extend_from_slice(&buf[..p.written]);
if matches!(s, compcol::Status::StreamEnd) {
break;
}
}
assert_eq!(decoded, original);
}