#![cfg(feature = "xz")]
use std::io::Write;
use std::process::{Command, Stdio};
use compcol::xz::{Decoder, Encoder};
use compcol::{Decoder as _, Encoder as _, Error};
fn encode_all(input: &[u8]) -> Vec<u8> {
let mut enc = Encoder::new();
let mut out = Vec::new();
let mut buf = vec![0u8; 4096];
let mut consumed = 0;
while consumed < input.len() {
let p = enc.encode(&input[consumed..], &mut buf).unwrap();
out.extend_from_slice(&buf[..p.written]);
consumed += p.consumed;
if p.consumed == 0 && p.written == 0 {
break;
}
}
loop {
let p = enc.finish(&mut buf).unwrap();
out.extend_from_slice(&buf[..p.written]);
if p.done {
break;
}
if p.written == 0 {
panic!("xz encoder finish stalled");
}
}
out
}
fn encode_chunked(input: &[u8], in_chunk: usize, out_chunk: usize) -> Vec<u8> {
let mut enc = Encoder::new();
let mut out = Vec::new();
let mut buf = vec![0u8; out_chunk.max(1)];
let mut i = 0;
while i < input.len() {
let end = (i + in_chunk).min(input.len());
let chunk = &input[i..end];
let mut consumed = 0;
loop {
let p = enc.encode(&chunk[consumed..], &mut buf).unwrap();
out.extend_from_slice(&buf[..p.written]);
consumed += p.consumed;
if p.consumed == 0 && p.written == 0 {
break;
}
}
i = end;
}
loop {
let p = enc.finish(&mut buf).unwrap();
out.extend_from_slice(&buf[..p.written]);
if p.done {
break;
}
if p.written == 0 {
panic!("xz encoder finish stalled");
}
}
out
}
fn decode_chunked(encoded: &[u8], in_chunk: usize, out_chunk: usize) -> Result<Vec<u8>, Error> {
let mut dec = Decoder::new();
let mut out = Vec::new();
let mut buf = vec![0u8; out_chunk.max(1)];
let mut i = 0;
while i < encoded.len() {
let end = (i + in_chunk).min(encoded.len());
let chunk = &encoded[i..end];
let mut consumed = 0;
loop {
let p = dec.decode(&chunk[consumed..], &mut buf)?;
out.extend_from_slice(&buf[..p.written]);
consumed += p.consumed;
if p.consumed == 0 && p.written == 0 {
break;
}
}
i = end;
}
loop {
let p = dec.finish(&mut buf)?;
out.extend_from_slice(&buf[..p.written]);
if p.done {
break;
}
if p.written == 0 {
panic!("xz decoder finish stalled");
}
}
Ok(out)
}
fn round_trip(input: &[u8]) {
let encoded = encode_all(input);
assert_eq!(&encoded[..6], &[0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00]);
assert_eq!(&encoded[encoded.len() - 2..], b"YZ");
let decoded = decode_chunked(&encoded, 4096, 4096).unwrap();
assert_eq!(decoded, input, "round-trip mismatch (len {})", input.len());
}
#[test]
fn round_trip_empty() {
round_trip(b"");
}
#[test]
fn round_trip_short() {
round_trip(b"hello xz");
}
#[test]
fn round_trip_repeated() {
round_trip(&b"the quick brown fox ".repeat(100));
}
#[test]
fn round_trip_zeros_long() {
round_trip(&vec![0u8; 8192]);
}
#[test]
fn round_trip_pseudo_random() {
let data: Vec<u8> = (0..50_000u32)
.map(|i| ((i.wrapping_mul(0x9E37_79B1)) >> 24) as u8)
.collect();
round_trip(&data);
}
#[test]
fn round_trip_structured() {
let mut v = Vec::new();
for i in 0..200u32 {
let s = format!(
"record {:04} | timestamp 2026-05-28T{:02}:{:02}:00Z\n",
i,
(i / 60) % 24,
i % 60
);
v.extend_from_slice(s.as_bytes());
}
round_trip(&v);
}
#[test]
fn round_trip_exactly_one_chunk() {
round_trip(&vec![0xABu8; 65_536]);
}
#[test]
fn round_trip_just_over_one_chunk() {
let mut v = vec![0u8; 65_537];
for (i, b) in v.iter_mut().enumerate() {
*b = (i & 0xFF) as u8;
}
round_trip(&v);
}
#[test]
fn round_trip_multi_chunk() {
let v: Vec<u8> = (0..200_000u32)
.map(|i| (i as u8).wrapping_mul(17))
.collect();
round_trip(&v);
}
#[test]
fn streaming_one_byte_both_sides() {
let input = b"one byte at a time, all the way through".to_vec();
let encoded = encode_chunked(&input, 1, 1);
let decoded = decode_chunked(&encoded, 1, 1).unwrap();
assert_eq!(decoded, input);
}
#[test]
fn streaming_irregular_chunks() {
let input: Vec<u8> = (0..70_000u32).map(|i| (i ^ (i >> 7)) as u8).collect();
let encoded = encode_chunked(&input, 13, 257);
let decoded = decode_chunked(&encoded, 521, 1024).unwrap();
assert_eq!(decoded, input);
}
#[test]
fn bad_magic_rejected() {
let stream = [0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x01]; let mut dec = Decoder::new();
let mut buf = [0u8; 16];
let err = dec.decode(&stream, &mut buf).unwrap_err();
assert_eq!(err, Error::BadHeader);
}
#[test]
fn truncated_stream_rejected() {
let mut encoded = encode_all(b"some payload");
encoded.truncate(encoded.len() - 4);
let err = decode_chunked(&encoded, 1024, 1024).unwrap_err();
assert_eq!(err, Error::UnexpectedEnd);
}
#[test]
fn corrupted_check_rejected() {
let input = b"checksum me please";
let mut encoded = encode_all(input);
let mid = encoded.len() / 2;
encoded[mid] ^= 0x01;
let err = decode_chunked(&encoded, 1024, 1024).unwrap_err();
assert!(
matches!(
err,
Error::ChecksumMismatch | Error::Corrupt | Error::Unsupported | Error::TrailerMismatch
),
"unexpected error variant: {:?}",
err
);
}
#[test]
fn fixture_empty_file() {
let bytes = encode_all(b"");
let decoded = decode_chunked(&bytes, 1024, 1024).unwrap();
assert!(decoded.is_empty());
}
#[test]
fn reset_then_reuse() {
let mut enc = Encoder::new();
let mut buf = vec![0u8; 4096];
let p = enc.encode(b"first", &mut buf).unwrap();
assert_eq!(p.consumed, 5);
enc.reset();
let mut out = Vec::new();
let mut consumed = 0;
let input = b"second-payload";
while consumed < input.len() {
let p = enc.encode(&input[consumed..], &mut buf).unwrap();
out.extend_from_slice(&buf[..p.written]);
consumed += p.consumed;
if p.consumed == 0 && p.written == 0 {
break;
}
}
loop {
let p = enc.finish(&mut buf).unwrap();
out.extend_from_slice(&buf[..p.written]);
if p.done {
break;
}
}
let decoded = decode_chunked(&out, 1024, 1024).unwrap();
assert_eq!(decoded, input);
}
fn tool_available(cmd: &str) -> bool {
Command::new(cmd)
.arg("--version")
.stdout(Stdio::null())
.stderr(Stdio::null())
.status()
.map(|s| s.success())
.unwrap_or(false)
}
fn pipe_through(cmd: &str, args: &[&str], stdin_data: &[u8]) -> std::io::Result<Vec<u8>> {
let mut child = Command::new(cmd)
.args(args)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()?;
child.stdin.as_mut().unwrap().write_all(stdin_data)?;
let out = child.wait_with_output()?;
if !out.status.success() {
return Err(std::io::Error::other(format!(
"{} {:?} exited {:?}: {}",
cmd,
args,
out.status,
String::from_utf8_lossy(&out.stderr)
)));
}
Ok(out.stdout)
}
#[test]
fn our_encode_then_system_xz_decode() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
for (label, input) in [
("empty", Vec::new()),
("short", b"hello xz world".to_vec()),
("medium", b"Lorem ipsum dolor sit amet. ".repeat(200)),
("two_chunks", vec![0xCDu8; 70_000]),
] {
let encoded = encode_all(&input);
match pipe_through("xz", &["-d", "-c"], &encoded) {
Ok(decoded) => assert_eq!(decoded, input, "{}: system xz decoded wrong", label),
Err(e) => panic!("{}: system xz failed: {}", label, e),
}
}
}
#[test]
fn system_xz_encode_then_our_decode_small() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
for input in [
b"".to_vec(),
b"hello".to_vec(),
b"a".to_vec(),
b"the quick brown fox jumps over".to_vec(),
] {
let encoded = match pipe_through("xz", &["-c", "-z"], &input) {
Ok(v) => v,
Err(e) => {
println!("skipping case (xz failed): {}", e);
continue;
}
};
match decode_chunked(&encoded, 1024, 1024) {
Ok(decoded) => assert_eq!(decoded, input),
Err(e) => panic!(
"our decoder failed for system-xz output ({:?}): {:?}",
input, e
),
}
}
}
#[cfg(unix)]
#[test]
fn system_xz_encode_then_our_decode_empty() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
let input: Vec<u8> = Vec::new();
let encoded = pipe_through("xz", &["-c", "-z"], &input).unwrap();
let decoded = decode_chunked(&encoded, 1024, 1024).unwrap();
assert_eq!(decoded, input);
}
#[cfg(unix)]
#[test]
fn system_xz_encode_then_our_decode_small_string() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
let input: Vec<u8> = b"hello world\n".to_vec();
let encoded = pipe_through("xz", &["-c", "-z"], &input).unwrap();
let decoded = decode_chunked(&encoded, 1024, 1024).unwrap();
assert_eq!(decoded, input);
}
#[cfg(unix)]
#[test]
fn system_xz_encode_then_our_decode_medium_ascii() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
let mut input: Vec<u8> = Vec::with_capacity(10 * 1024);
while input.len() < 10 * 1024 {
input.extend_from_slice(b"The quick brown fox jumps over the lazy dog. 0123456789\n");
}
input.truncate(10 * 1024);
let encoded = pipe_through("xz", &["-c", "-z"], &input).unwrap();
let decoded = decode_chunked(&encoded, 1024, 1024).unwrap();
assert_eq!(decoded, input);
}
#[cfg(unix)]
#[test]
fn system_xz_encode_then_our_decode_lorem_ipsum() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
let para: &[u8] = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do \
eiusmod tempor incididunt ut labore et dolore magna aliqua. \
Ut enim ad minim veniam, quis nostrud exercitation ullamco \
laboris nisi ut aliquip ex ea commodo consequat. Duis aute \
irure dolor in reprehenderit in voluptate velit esse cillum \
dolore eu fugiat nulla pariatur.\n";
let mut input: Vec<u8> = Vec::with_capacity(16 * 1024);
while input.len() < 16 * 1024 {
input.extend_from_slice(para);
}
input.truncate(16 * 1024);
let encoded = pipe_through("xz", &["-c", "-z"], &input).unwrap();
let decoded = decode_chunked(&encoded, 1024, 1024).unwrap();
assert_eq!(decoded, input);
}
#[cfg(unix)]
#[test]
fn system_xz_encode_then_our_decode_large_zeros() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
let input: Vec<u8> = vec![0u8; 64 * 1024];
let encoded = pipe_through("xz", &["-c", "-z"], &input).unwrap();
let decoded = decode_chunked(&encoded, 1024, 1024).unwrap();
assert_eq!(decoded, input);
}
#[cfg(unix)]
#[test]
fn system_xz_encode_then_our_decode_binary() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
let input: Vec<u8> = (0..30_000u32)
.map(|i| (i.wrapping_mul(0x9E37_79B1) >> 16) as u8)
.collect();
let encoded = pipe_through("xz", &["-c", "-z"], &input).unwrap();
let decoded = decode_chunked(&encoded, 1024, 1024).unwrap();
assert_eq!(decoded, input);
}
#[cfg(unix)]
fn first_chunk_control_byte(encoded: &[u8]) -> u8 {
assert!(encoded.len() > 24, "xz stream too short to contain a chunk");
encoded[24]
}
#[cfg(unix)]
#[test]
fn compressed_lzma2_empty_round_trip_via_xz() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
let input: Vec<u8> = Vec::new();
let encoded = encode_all(&input);
assert_eq!(first_chunk_control_byte(&encoded), 0x00);
let decoded = pipe_through("xz", &["-d", "-c"], &encoded).unwrap();
assert_eq!(decoded, input);
}
#[cfg(unix)]
#[test]
fn compressed_lzma2_hello_world_round_trip_via_xz() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
let input: Vec<u8> = b"hello world\n".to_vec();
let encoded = encode_all(&input);
assert_eq!(
first_chunk_control_byte(&encoded),
0x01,
"expected uncompressed fallback for 12-byte input"
);
let decoded = pipe_through("xz", &["-d", "-c"], &encoded).unwrap();
assert_eq!(decoded, input);
let our_decoded = decode_chunked(&encoded, 1024, 1024).unwrap();
assert_eq!(our_decoded, input);
}
#[cfg(unix)]
#[test]
fn compressed_lzma2_ten_kib_ascii_via_xz() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
let mut input: Vec<u8> = Vec::with_capacity(10 * 1024);
while input.len() < 10 * 1024 {
input.extend_from_slice(b"The quick brown fox jumps over the lazy dog. 0123456789\n");
}
input.truncate(10 * 1024);
let encoded = encode_all(&input);
assert_eq!(
first_chunk_control_byte(&encoded),
0xE0,
"expected compressed chunk for 10 KiB repeating ASCII"
);
assert!(
encoded.len() < input.len() / 2,
"encoded {} >= half of {} — compression seems broken",
encoded.len(),
input.len()
);
let decoded = pipe_through("xz", &["-d", "-c"], &encoded).unwrap();
assert_eq!(decoded, input);
let our_decoded = decode_chunked(&encoded, 1024, 1024).unwrap();
assert_eq!(our_decoded, input);
}
#[cfg(unix)]
#[test]
fn compressed_lzma2_sixteen_kib_lorem_ipsum_via_xz() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
let para: &[u8] = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do \
eiusmod tempor incididunt ut labore et dolore magna aliqua. \
Ut enim ad minim veniam, quis nostrud exercitation ullamco \
laboris nisi ut aliquip ex ea commodo consequat. Duis aute \
irure dolor in reprehenderit in voluptate velit esse cillum \
dolore eu fugiat nulla pariatur.\n";
let mut input: Vec<u8> = Vec::with_capacity(16 * 1024);
while input.len() < 16 * 1024 {
input.extend_from_slice(para);
}
input.truncate(16 * 1024);
let encoded = encode_all(&input);
assert_eq!(
first_chunk_control_byte(&encoded),
0xE0,
"expected compressed chunk for Lorem ipsum"
);
let decoded = pipe_through("xz", &["-d", "-c"], &encoded).unwrap();
assert_eq!(decoded, input);
let our_decoded = decode_chunked(&encoded, 1024, 1024).unwrap();
assert_eq!(our_decoded, input);
}
#[cfg(unix)]
#[test]
fn compressed_lzma2_sixty_four_kib_zeros_via_xz() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
let input: Vec<u8> = vec![0u8; 64 * 1024];
let encoded = encode_all(&input);
assert_eq!(
first_chunk_control_byte(&encoded),
0xE0,
"expected compressed chunk for 64 KiB of zeros"
);
assert!(
encoded.len() < 1024,
"encoded {} > 1 KiB for 64 KiB of zeros — compression not effective",
encoded.len()
);
let decoded = pipe_through("xz", &["-d", "-c"], &encoded).unwrap();
assert_eq!(decoded, input);
let our_decoded = decode_chunked(&encoded, 1024, 1024).unwrap();
assert_eq!(our_decoded, input);
}
#[cfg(unix)]
#[test]
fn compressed_lzma2_pseudo_random_round_trip_via_xz() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
let input: Vec<u8> = (0..40_000u32)
.map(|i| {
let a = (i.wrapping_mul(0x9E37_79B1)) >> 24;
let b = ((i ^ 0xDEAD_BEEF).wrapping_mul(0x85EB_CA6B)) >> 16;
(a ^ b) as u8
})
.collect();
let encoded = encode_all(&input);
let cb = first_chunk_control_byte(&encoded);
assert!(
cb == 0x01 || cb == 0xE0,
"expected uncompressed (0x01) or compressed (0xE0) chunk, got {:#x}",
cb
);
let decoded = pipe_through("xz", &["-d", "-c"], &encoded).unwrap();
assert_eq!(decoded, input);
let our_decoded = decode_chunked(&encoded, 1024, 1024).unwrap();
assert_eq!(our_decoded, input);
}
#[cfg(unix)]
#[test]
fn compressed_lzma2_truly_random_falls_back_uncompressed() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
let mut s: u32 = 0xCAFE_F00D;
let mut input: Vec<u8> = Vec::with_capacity(40_000);
while input.len() < 40_000 {
s ^= s << 13;
s ^= s >> 17;
s ^= s << 5;
input.extend_from_slice(&s.to_le_bytes());
}
input.truncate(40_000);
let encoded = encode_all(&input);
let cb = first_chunk_control_byte(&encoded);
if cb != 0x01 && cb != 0xE0 {
panic!("unexpected control byte for random input: {:#x}", cb);
}
let decoded = pipe_through("xz", &["-d", "-c"], &encoded).unwrap();
assert_eq!(decoded, input);
let our_decoded = decode_chunked(&encoded, 1024, 1024).unwrap();
assert_eq!(our_decoded, input);
}
#[cfg(unix)]
#[test]
fn compressed_lzma2_multi_chunk_via_xz() {
if !tool_available("xz") {
println!("skipping: xz not installed");
return;
}
let mut input: Vec<u8> = Vec::with_capacity(200 * 1024);
while input.len() < 200 * 1024 {
input.extend_from_slice(
b"The quick brown fox jumps over the lazy dog. \
Pack my box with five dozen liquor jugs.\n",
);
}
input.truncate(200 * 1024);
let encoded = encode_all(&input);
assert_eq!(
first_chunk_control_byte(&encoded),
0xE0,
"expected compressed first chunk"
);
assert!(
encoded.len() < input.len() / 4,
"encoded {} too large vs input {}",
encoded.len(),
input.len()
);
let decoded = pipe_through("xz", &["-d", "-c"], &encoded).unwrap();
assert_eq!(decoded, input);
let our_decoded = decode_chunked(&encoded, 1024, 1024).unwrap();
assert_eq!(our_decoded, input);
}