#![cfg(feature = "xpress")]
use compcol::xpress::{Decoder, Encoder, Xpress};
use compcol::{Algorithm, Decoder as _, Encoder as _, Error, Status};
fn encode_chunked(enc: &mut Encoder, input: &[u8], in_chunk: usize, out_chunk: usize) -> Vec<u8> {
let mut encoded = Vec::new();
let mut buf = vec![0u8; out_chunk.max(1)];
let mut i = 0;
while i < input.len() {
let end = (i + in_chunk).min(input.len());
let chunk = &input[i..end];
let mut consumed = 0;
while consumed < chunk.len() {
let (p, status) = enc.encode(&chunk[consumed..], &mut buf).unwrap();
encoded.extend_from_slice(&buf[..p.written]);
consumed += p.consumed;
match status {
Status::InputEmpty | Status::StreamEnd => break,
Status::OutputFull => continue,
}
}
i = end;
}
loop {
let (p, status) = enc.finish(&mut buf).unwrap();
encoded.extend_from_slice(&buf[..p.written]);
match status {
Status::StreamEnd => break,
Status::OutputFull | Status::InputEmpty => {
if p.written == 0 {
panic!("xpress encoder finish stalled");
}
}
}
}
encoded
}
fn decode_chunked(encoded: &[u8], in_chunk: usize, out_chunk: usize) -> Result<Vec<u8>, Error> {
let mut dec = Decoder::new();
decode_chunked_with(&mut dec, encoded, in_chunk, out_chunk)
}
fn decode_chunked_with(
dec: &mut Decoder,
encoded: &[u8],
in_chunk: usize,
out_chunk: usize,
) -> Result<Vec<u8>, Error> {
let mut decoded = Vec::new();
let mut buf = vec![0u8; out_chunk.max(1)];
let mut i = 0;
while i < encoded.len() {
let end = (i + in_chunk).min(encoded.len());
let chunk = &encoded[i..end];
let mut consumed = 0;
while consumed < chunk.len() {
let (p, status) = dec.decode(&chunk[consumed..], &mut buf)?;
decoded.extend_from_slice(&buf[..p.written]);
consumed += p.consumed;
match status {
Status::StreamEnd => break,
Status::InputEmpty => break,
Status::OutputFull => continue,
}
}
i = end;
}
loop {
let (p, _status) = dec.decode(&[], &mut buf)?;
decoded.extend_from_slice(&buf[..p.written]);
if p.written == 0 {
break;
}
}
loop {
let (p, status) = dec.finish(&mut buf)?;
decoded.extend_from_slice(&buf[..p.written]);
match status {
Status::StreamEnd => break,
Status::OutputFull | Status::InputEmpty => {
if p.written == 0 {
panic!("xpress decoder finish stalled");
}
}
}
}
Ok(decoded)
}
fn encode_all(input: &[u8]) -> Vec<u8> {
let mut enc = Encoder::new();
encode_chunked(&mut enc, input, input.len().max(1), 4096)
}
fn round_trip(input: &[u8]) {
let encoded = encode_all(input);
let decoded = decode_chunked(&encoded, 4096, 4096).unwrap();
assert_eq!(decoded, input, "round-trip mismatch len {}", input.len());
}
#[test]
fn name_is_xpress() {
assert_eq!(<Xpress as Algorithm>::NAME, "xpress");
}
#[test]
fn round_trip_empty() {
let encoded = encode_all(b"");
assert_eq!(encoded.len(), 8);
assert_eq!(&encoded[..8], &0u64.to_le_bytes());
let decoded = decode_chunked(&encoded, 4096, 4096).unwrap();
assert!(decoded.is_empty());
}
#[test]
fn round_trip_single_byte() {
round_trip(b"X");
}
#[test]
fn round_trip_hello_world() {
round_trip(b"hello world");
}
#[test]
fn round_trip_short_match() {
round_trip(b"abcabcabc");
}
#[test]
fn round_trip_distance_one() {
round_trip(b"aaaaaaaa");
}
#[test]
fn round_trip_tier2_length() {
let mut input = Vec::new();
input.extend_from_slice(b"abcdefghij");
input.extend_from_slice(b"abcdefghijabcdefghij");
round_trip(&input);
}
#[test]
fn round_trip_tier3_length() {
let phrase = b"the quick brown fox jumps over the lazy dog ";
let mut input = Vec::new();
for _ in 0..8 {
input.extend_from_slice(phrase);
}
round_trip(&input);
}
#[test]
fn round_trip_tier4_length() {
let input = vec![b'a'; 4096];
round_trip(&input);
}
#[test]
fn round_trip_long_repeating_64kib() {
let input = vec![b'Z'; 64 * 1024];
round_trip(&input);
}
#[test]
fn round_trip_mixed_short_runs() {
let mut input = Vec::new();
input.extend(core::iter::repeat_n(b'a', 10));
input.extend(core::iter::repeat_n(b'b', 3));
input.extend(core::iter::repeat_n(b'c', 7));
input.extend_from_slice(b"and a quick fox jumps over");
input.extend(core::iter::repeat_n(b'z', 200));
round_trip(&input);
}
#[test]
fn round_trip_mixed_corpus() {
let mut state: u32 = 0xC0FFEE_u32;
let mut input = Vec::with_capacity(16 * 1024);
let phrases: &[&[u8]] = &[
b"the_quick_brown_fox_jumps_over_the_lazy_dog ",
b"compcol streaming codec test corpus aaaa ",
b"xpress round trip mixed ",
];
let mut p = 0;
while input.len() < 8 * 1024 {
for _ in 0..32 {
state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
input.push((state >> 16) as u8);
}
input.extend_from_slice(phrases[p % phrases.len()]);
p += 1;
}
round_trip(&input);
}
#[test]
fn round_trip_pseudo_random_64kib() {
let mut state: u32 = 0xC0FFEE_u32;
let mut input = Vec::with_capacity(64 * 1024);
for _ in 0..(64 * 1024) {
state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
input.push((state >> 16) as u8);
}
round_trip(&input);
}
#[test]
fn round_trip_full_byte_alphabet() {
let input: Vec<u8> = (0..=255u8).collect();
round_trip(&input);
}
#[test]
fn round_trip_streaming_one_byte() {
let input = b"streaming bytes one at a time".to_vec();
let mut enc = Encoder::new();
let encoded = encode_chunked(&mut enc, &input, 1, 1);
let decoded = decode_chunked(&encoded, 1, 1).unwrap();
assert_eq!(decoded, input);
}
#[test]
fn decoder_output_buffer_size_one() {
let input = b"the_the_the_the_the_the_the_the_the_the_the_the_the_".to_vec();
let encoded = encode_all(&input);
let decoded = decode_chunked(&encoded, 4096, 1).unwrap();
assert_eq!(decoded, input);
}
#[test]
fn decoder_input_buffer_size_one() {
let input = b"the_the_the_the_the_the_the_the_the_the_the_the_the_".to_vec();
let encoded = encode_all(&input);
let decoded = decode_chunked(&encoded, 1, 4096).unwrap();
assert_eq!(decoded, input);
}
#[test]
fn encoder_reset_allows_reuse() {
let input_a = b"alpha alpha alpha alpha alpha".as_slice();
let input_b = b"bravo bravo bravo bravo bravo".as_slice();
let mut enc = Encoder::new();
let encoded_a = encode_chunked(&mut enc, input_a, 4096, 4096);
enc.reset();
let encoded_b = encode_chunked(&mut enc, input_b, 4096, 4096);
assert_eq!(decode_chunked(&encoded_a, 4096, 4096).unwrap(), input_a);
assert_eq!(decode_chunked(&encoded_b, 4096, 4096).unwrap(), input_b);
}
#[test]
fn decoder_reset_allows_reuse() {
let mut enc = Encoder::new();
let encoded_a = encode_chunked(&mut enc, b"hello", 4096, 4096);
enc.reset();
let encoded_b = encode_chunked(&mut enc, b"world", 4096, 4096);
let mut dec = Decoder::new();
assert_eq!(
decode_chunked_with(&mut dec, &encoded_a, 4096, 4096).unwrap(),
b"hello"
);
dec.reset();
assert_eq!(
decode_chunked_with(&mut dec, &encoded_b, 4096, 4096).unwrap(),
b"world"
);
}
#[test]
fn truncated_header_rejected() {
let encoded = vec![0u8; 4];
let mut dec = Decoder::new();
let mut buf = [0u8; 32];
let (_p, _s) = dec.decode(&encoded, &mut buf).unwrap();
let err = dec.finish(&mut buf).unwrap_err();
assert_eq!(err, Error::UnexpectedEnd);
}
#[test]
fn truncated_payload_rejected() {
let mut encoded = Vec::new();
encoded.extend_from_slice(&100u64.to_le_bytes());
let err = decode_chunked(&encoded, 4096, 4096).unwrap_err();
assert_eq!(err, Error::UnexpectedEnd);
}
#[test]
fn distance_out_of_history_rejected() {
let mut encoded = Vec::new();
encoded.extend_from_slice(&4u64.to_le_bytes());
encoded.extend_from_slice(&0x8000_0000u32.to_le_bytes());
encoded.extend_from_slice(&0u16.to_le_bytes());
let err = decode_chunked(&encoded, 4096, 4096).unwrap_err();
assert_eq!(err, Error::InvalidDistance);
}
#[test]
fn invalid_long_length_rejected() {
let mut encoded = Vec::new();
encoded.extend_from_slice(&10_000u64.to_le_bytes());
let flag: u32 = 0b0100_0000_0000_0000_0000_0000_0000_0000 | 0x3FFF_FFFF;
encoded.extend_from_slice(&flag.to_le_bytes());
encoded.push(b'A');
let sym: u16 = 7; encoded.extend_from_slice(&sym.to_le_bytes());
encoded.push(0x0F);
encoded.push(0xFF);
encoded.extend_from_slice(&10u16.to_le_bytes());
let err = decode_chunked(&encoded, 4096, 4096).unwrap_err();
assert_eq!(err, Error::Corrupt);
}
#[test]
fn algorithm_encoder_decoder_round_trip() {
let mut enc = <Xpress as Algorithm>::encoder();
let mut dec = <Xpress as Algorithm>::decoder();
let input = b"compcol xpress Algorithm trait roundtrip!";
let mut encoded = Vec::new();
let mut buf = vec![0u8; 256];
let mut consumed = 0;
while consumed < input.len() {
let (p, status) = enc.encode(&input[consumed..], &mut buf).unwrap();
encoded.extend_from_slice(&buf[..p.written]);
consumed += p.consumed;
if matches!(status, Status::InputEmpty) {
break;
}
}
loop {
let (p, status) = enc.finish(&mut buf).unwrap();
encoded.extend_from_slice(&buf[..p.written]);
if matches!(status, Status::StreamEnd) {
break;
}
if p.written == 0 {
panic!("finish stalled");
}
}
let mut decoded = Vec::new();
let mut consumed = 0;
loop {
let (p, status) = dec.decode(&encoded[consumed..], &mut buf).unwrap();
decoded.extend_from_slice(&buf[..p.written]);
consumed += p.consumed;
if matches!(status, Status::StreamEnd | Status::InputEmpty) {
break;
}
}
let (_, status) = dec.finish(&mut buf).unwrap();
assert!(matches!(status, Status::StreamEnd));
assert_eq!(decoded, input);
}
#[test]
fn decode_ms_xca_alphabet_fixture() {
let mut encoded = Vec::new();
encoded.extend_from_slice(&26u64.to_le_bytes());
encoded.extend_from_slice(&[
0x3F, 0x00, 0x00, 0x00, b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z',
]);
let decoded = decode_chunked(&encoded, 4096, 4096).unwrap();
assert_eq!(decoded, b"abcdefghijklmnopqrstuvwxyz");
}
#[test]
fn decode_short_match_fixture() {
let mut encoded = Vec::new();
encoded.extend_from_slice(&6u64.to_le_bytes());
encoded.extend_from_slice(&[
0xff, 0xff, 0xff, 0x1f, b'a', b'b', b'c', 0x10, 0x00, ]);
let decoded = decode_chunked(&encoded, 4096, 4096).unwrap();
assert_eq!(decoded, b"abcabc");
}
#[test]
fn decode_long_run_fixture() {
let mut encoded = Vec::new();
encoded.extend_from_slice(&128u64.to_le_bytes());
encoded.extend_from_slice(&[
0xff, 0xff, 0xff, 0x7f, b'a', 0x07, 0x00, 0x0f, 0x66, ]);
let decoded = decode_chunked(&encoded, 4096, 4096).unwrap();
assert_eq!(decoded, vec![b'a'; 128]);
}
#[test]
fn decode_tier4_length_fixture() {
let mut encoded = Vec::new();
encoded.extend_from_slice(&65_536u64.to_le_bytes());
encoded.extend_from_slice(&[
0xff, 0xff, 0xff, 0x7f, b'a', 0x07, 0x00, 0x0f, 0xff, 0xfc, 0xff, ]);
let decoded = decode_chunked(&encoded, 4096, 4096).unwrap();
assert_eq!(decoded, vec![b'a'; 65536]);
}
#[cfg(feature = "std")]
#[test]
fn cross_validate_with_python_dissect() {
use std::io::Write;
use std::process::{Command, Stdio};
let probe = Command::new("python3")
.arg("-c")
.arg("import dissect.util.compression.lzxpress")
.output();
if !matches!(probe, Ok(o) if o.status.success()) {
eprintln!("python3 / dissect.util not available; skipping cross-validation");
return;
}
let input = b"the quick brown fox jumps over the lazy dog 1234567890";
let encoded = encode_all(input);
let payload = &encoded[8..];
let mut child = Command::new("python3")
.arg("-c")
.arg("import sys; from dissect.util.compression import lzxpress; sys.stdout.buffer.write(lzxpress.decompress(sys.stdin.buffer.read()))")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.expect("spawn python3");
child
.stdin
.as_mut()
.unwrap()
.write_all(payload)
.expect("write to python stdin");
let out = child.wait_with_output().expect("wait python");
assert!(
out.status.success(),
"python decoder rejected our stream: {}",
String::from_utf8_lossy(&out.stderr)
);
assert_eq!(out.stdout, input);
}
#[cfg(feature = "factory")]
mod factory {
use compcol::Status;
use compcol::factory;
#[test]
fn lookup_known() {
assert!(factory::encoder_by_name("xpress").is_some());
assert!(factory::decoder_by_name("xpress").is_some());
}
#[test]
fn names_contains_xpress() {
assert!(factory::names().contains(&"xpress"));
}
#[test]
fn extension_is_xpress() {
assert_eq!(factory::extension("xpress"), Some("xpress"));
}
#[test]
fn boxed_round_trip() {
let mut enc = factory::encoder_by_name("xpress").unwrap();
let mut dec = factory::decoder_by_name("xpress").unwrap();
let input = b"factory boxed round-trip via xpress";
let mut encoded = Vec::new();
let mut buf = vec![0u8; 256];
let mut consumed = 0;
while consumed < input.len() {
let (p, status) = enc.encode(&input[consumed..], &mut buf).unwrap();
encoded.extend_from_slice(&buf[..p.written]);
consumed += p.consumed;
if matches!(status, Status::InputEmpty) {
break;
}
}
loop {
let (p, status) = enc.finish(&mut buf).unwrap();
encoded.extend_from_slice(&buf[..p.written]);
if matches!(status, Status::StreamEnd) {
break;
}
if p.written == 0 {
panic!("finish stalled");
}
}
let mut decoded = Vec::new();
let mut consumed = 0;
loop {
let (p, status) = dec.decode(&encoded[consumed..], &mut buf).unwrap();
decoded.extend_from_slice(&buf[..p.written]);
consumed += p.consumed;
if matches!(status, Status::StreamEnd | Status::InputEmpty) {
break;
}
}
let (_, status) = dec.finish(&mut buf).unwrap();
assert!(matches!(status, Status::StreamEnd));
assert_eq!(&decoded[..], input);
}
}