#![cfg(feature = "brotli")]
use std::io::Write;
use std::process::{Command, Stdio};
use compcol::brotli::{Decoder, Encoder};
use compcol::{Decoder as _, Encoder as _, Error};
fn hex(s: &str) -> Vec<u8> {
(0..s.len())
.step_by(2)
.map(|i| u8::from_str_radix(&s[i..i + 2], 16).unwrap())
.collect()
}
fn encode_chunked(input: &[u8], in_chunk: usize, out_chunk: usize) -> Result<Vec<u8>, Error> {
let mut enc = Encoder::new();
let mut out = Vec::new();
let mut buf = vec![0u8; out_chunk.max(1)];
let mut i = 0;
while i < input.len() {
let end = (i + in_chunk).min(input.len());
let chunk = &input[i..end];
let mut consumed_total = 0;
loop {
let p = enc.encode(&chunk[consumed_total..], &mut buf)?;
out.extend_from_slice(&buf[..p.written]);
consumed_total += p.consumed;
if consumed_total == chunk.len() && p.written == 0 {
break;
}
if p.consumed == 0 && p.written == 0 {
break;
}
}
i = end;
}
loop {
let p = enc.finish(&mut buf)?;
out.extend_from_slice(&buf[..p.written]);
if p.done {
break;
}
if p.written == 0 {
panic!("encoder finish stalled");
}
}
Ok(out)
}
fn decode_chunked(encoded: &[u8], in_chunk: usize, out_chunk: usize) -> Result<Vec<u8>, Error> {
let mut dec = Decoder::new();
let mut out = Vec::new();
let mut buf = vec![0u8; out_chunk.max(1)];
let mut i = 0;
while i < encoded.len() {
let end = (i + in_chunk).min(encoded.len());
let chunk = &encoded[i..end];
let mut consumed_in_chunk = 0;
loop {
let p = dec.decode(&chunk[consumed_in_chunk..], &mut buf)?;
out.extend_from_slice(&buf[..p.written]);
consumed_in_chunk += p.consumed;
if p.consumed == 0 && p.written == 0 {
break;
}
}
i = end;
}
loop {
let p = dec.finish(&mut buf)?;
out.extend_from_slice(&buf[..p.written]);
if p.done {
break;
}
if p.written == 0 {
panic!("decoder finish stalled");
}
}
Ok(out)
}
fn roundtrip(input: &[u8]) {
let encoded = encode_chunked(input, input.len().max(1), input.len().max(1) + 32).unwrap();
let decoded = decode_chunked(&encoded, encoded.len().max(1), input.len().max(1)).unwrap();
assert_eq!(decoded, input);
}
#[test]
fn empty_stream_round_trip() {
roundtrip(b"");
}
#[test]
fn empty_stream_exact_bytes() {
let encoded = encode_chunked(b"", 1, 16).unwrap();
assert_eq!(encoded, [0x06]);
}
#[test]
fn short_round_trip() {
roundtrip(b"hello");
roundtrip(b"a");
roundtrip(b"hello world");
roundtrip(b"The quick brown fox jumps over the lazy dog.");
}
#[test]
fn binary_round_trip() {
let input: Vec<u8> = (0..=255u8).collect();
roundtrip(&input);
}
#[test]
fn large_round_trip() {
let input: Vec<u8> = (0..200_000).map(|i| (i * 31) as u8).collect();
roundtrip(&input);
}
#[test]
fn exact_block_boundary_round_trip() {
let input: Vec<u8> = (0..65_536).map(|i| (i % 251) as u8).collect();
roundtrip(&input);
let input: Vec<u8> = (0..65_537).map(|i| (i % 251) as u8).collect();
roundtrip(&input);
let input: Vec<u8> = (0..131_072).map(|i| (i % 251) as u8).collect();
roundtrip(&input);
}
#[test]
fn structured_round_trip() {
let mut input = Vec::new();
for _ in 0..1000 {
input.extend_from_slice(b"the quick brown fox jumps over the lazy dog\n");
}
roundtrip(&input);
}
#[test]
fn pseudo_random_round_trip() {
let mut x: u32 = 0xdead_beef;
let mut input = vec![0u8; 70_000];
for slot in &mut input {
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
*slot = x as u8;
}
roundtrip(&input);
}
#[test]
fn one_byte_input_one_byte_output_round_trip() {
let input = b"hello world from brotli";
let encoded = encode_chunked(input, 1, 1).unwrap();
let decoded = decode_chunked(&encoded, 1, 1).unwrap();
assert_eq!(decoded, input);
}
#[test]
fn one_byte_streaming_large_round_trip() {
let input: Vec<u8> = (0..3000).map(|i| (i * 17 + 5) as u8).collect();
let encoded = encode_chunked(&input, 1, 1).unwrap();
let decoded = decode_chunked(&encoded, 1, 1).unwrap();
assert_eq!(decoded, input);
}
#[test]
fn decode_handcrafted_hello_uncompressed() {
let stream = hex("40001068656c6c6f03");
let decoded = decode_chunked(&stream, 1024, 1024).unwrap();
assert_eq!(decoded, b"hello");
}
#[test]
fn decode_rejects_unsupported_large_window_flag() {
let stream = [0x11u8];
let mut dec = Decoder::new();
let mut buf = [0u8; 32];
let err = dec.decode(&stream, &mut buf).unwrap_err();
assert_eq!(err, Error::Unsupported);
}
#[test]
fn decode_rejects_truncated_stream() {
let stream = [0x00];
let mut dec = Decoder::new();
let mut buf = [0u8; 32];
let _ = dec.decode(&stream, &mut buf).unwrap();
let err = dec.finish(&mut buf).unwrap_err();
assert_eq!(err, Error::UnexpectedEnd);
}
#[test]
fn reset_allows_reuse() {
let mut enc = Encoder::new();
let mut buf = [0u8; 64];
let p = enc.encode(b"hi", &mut buf).unwrap();
assert_eq!(p.consumed, 2);
enc.reset();
let p1 = enc.encode(b"bye", &mut buf).unwrap();
assert_eq!(p1.consumed, 3);
let mut total = Vec::new();
total.extend_from_slice(&buf[..p1.written]);
loop {
let p2 = enc.finish(&mut buf).unwrap();
total.extend_from_slice(&buf[..p2.written]);
if p2.done {
break;
}
if p2.written == 0 {
panic!("stalled");
}
}
let decoded = decode_chunked(&total, 1024, 1024).unwrap();
assert_eq!(decoded, b"bye");
}
fn brotli_cli_available() -> Option<String> {
let path = "brotli";
let r = Command::new(path)
.arg("--version")
.stdout(Stdio::null())
.stderr(Stdio::null())
.status();
match r {
Ok(s) if s.success() => Some(path.to_string()),
_ => None,
}
}
fn brotli_decode(brotli: &str, data: &[u8]) -> Vec<u8> {
let mut child = Command::new(brotli)
.args(["-d", "-c"])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.expect("spawn brotli");
child
.stdin
.as_mut()
.unwrap()
.write_all(data)
.expect("write stdin");
let out = child.wait_with_output().expect("wait brotli");
assert!(
out.status.success(),
"brotli -d failed: {}",
String::from_utf8_lossy(&out.stderr)
);
out.stdout
}
fn brotli_encode(brotli: &str, data: &[u8]) -> Vec<u8> {
let mut child = Command::new(brotli)
.args(["-c"])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.expect("spawn brotli");
child
.stdin
.as_mut()
.unwrap()
.write_all(data)
.expect("write stdin");
let out = child.wait_with_output().expect("wait brotli");
assert!(out.status.success(), "brotli encode failed");
out.stdout
}
#[test]
fn cross_validate_with_reference_decoder() {
let Some(brotli) = brotli_cli_available() else {
eprintln!("skipping: brotli CLI not available");
return;
};
for input in [
b"".to_vec(),
b"a".to_vec(),
b"hello world".to_vec(),
(0..=255u8).collect::<Vec<_>>(),
(0..70_000usize).map(|i| (i * 37) as u8).collect::<Vec<_>>(),
] {
let encoded = encode_chunked(&input, input.len().max(1), input.len().max(1) + 32).unwrap();
let decoded = brotli_decode(&brotli, &encoded);
assert_eq!(decoded, input, "reference decoder mismatch");
}
}
#[test]
fn cross_validate_compressed_input_round_trips() {
let Some(brotli) = brotli_cli_available() else {
eprintln!("skipping: brotli CLI not available");
return;
};
let input = b"the quick brown fox jumps over the lazy dog. this is repetitive enough that the encoder will pick compressed format.";
let encoded = brotli_encode(&brotli, input);
let decoded = decode_chunked(&encoded, encoded.len(), input.len()).unwrap();
assert_eq!(decoded, input);
}
#[test]
fn cross_validate_compressed_hello_world() {
let Some(brotli) = brotli_cli_available() else {
eprintln!("skipping: brotli CLI not available");
return;
};
let input = b"hello world\n";
let encoded = brotli_encode(&brotli, input);
let decoded = decode_chunked(&encoded, encoded.len(), input.len()).unwrap();
assert_eq!(decoded, input);
}
#[test]
fn cross_validate_compressed_4k_ascii() {
let Some(brotli) = brotli_cli_available() else {
eprintln!("skipping: brotli CLI not available");
return;
};
let mut input = Vec::with_capacity(4096);
while input.len() < 4096 {
input.extend_from_slice(b"The quick brown fox jumps over the lazy dog.\n");
}
input.truncate(4096);
let encoded = brotli_encode(&brotli, &input);
let decoded = decode_chunked(&encoded, encoded.len(), input.len()).unwrap();
assert_eq!(decoded, input);
}
#[test]
fn cross_validate_compressed_16k_lorem() {
let Some(brotli) = brotli_cli_available() else {
eprintln!("skipping: brotli CLI not available");
return;
};
let lorem: &[u8] = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. ";
let mut input = Vec::with_capacity(16 * 1024);
while input.len() < 16 * 1024 {
input.extend_from_slice(lorem);
}
input.truncate(16 * 1024);
let encoded = brotli_encode(&brotli, &input);
let decoded = decode_chunked(&encoded, encoded.len(), input.len()).unwrap();
assert_eq!(decoded, input);
}
#[test]
fn cross_validate_compressed_dictionary_phrase() {
let Some(brotli) = brotli_cli_available() else {
eprintln!("skipping: brotli CLI not available");
return;
};
let input = b"the time has come";
let encoded = brotli_encode(&brotli, input);
eprintln!(
"encoded: {} bytes: {:?}",
encoded.len(),
encoded
.iter()
.map(|b| format!("{:02x}", b))
.collect::<Vec<_>>()
.join("")
);
let decoded = decode_chunked(&encoded, encoded.len(), input.len() + 32).unwrap();
assert_eq!(decoded, input);
}
#[test]
fn cross_validate_compressed_empty() {
let Some(brotli) = brotli_cli_available() else {
eprintln!("skipping: brotli CLI not available");
return;
};
let input: &[u8] = b"";
let encoded = brotli_encode(&brotli, input);
let decoded = decode_chunked(&encoded, encoded.len().max(1), 16).unwrap();
assert_eq!(decoded, input);
}
fn decode_one_shot(stream: &[u8]) -> Vec<u8> {
let mut dec = Decoder::new();
let mut out = vec![0u8; stream.len() * 16 + 4096];
let p = dec.decode(stream, &mut out).expect("decode");
out.truncate(p.written);
out
}
#[test]
fn decode_fixed_reference_streams() {
let cases: &[(&str, &[u8])] = &[
("1f0700f825c242840000", b"aaaaaaaa"),
("1f0d00f825c2e2850000", b"aaaaaaaaaaaaaa"),
(
"1f2700f825c2a28c00c0",
b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
),
(
"1f1000f8a541c2d0e69428c0d429203d343906",
b"the time has come",
),
("1f0d00f825004a9042ea16999e2200", b"this is a test"),
(
"1f2a00889c09364ea87737bc2433a34b9033bc427b4b90b23998c881435ba0f7dea7150ee90b4789ea0c1be0563506",
b"the quick brown fox jumps over the lazy dog",
),
(
"1f4a00a014a1d2d56da92ea4c77e70ea41b8e8101536e080bd05f617fd00b5e7947aa93a819311a5e685e00dc00fff0f259bd5b15d9c5428ceec103d",
b"the quick brown fox jumps over the lazy dog. this is repetitive enough that",
),
(
"1f7300e045b779bd3b2ecf3f68a550182651e9e40ecc7fd4965cf212ce2df084052db0c8db379508510f9ae617e0bd617b47f90fd5bbdcc4bee0625ada219e1c75aa68e600388b1d6a0eb3004b01",
b"the quick brown fox jumps over the lazy dog. this is repetitive enough that the encoder will pick compressed format.",
),
];
for (hex_s, expected) in cases {
let stream = hex(hex_s);
let got = decode_one_shot(&stream);
assert_eq!(
got,
*expected,
"mismatch for stream {hex_s}: got {:?}",
String::from_utf8_lossy(&got)
);
}
}
fn lorem_16k() -> Vec<u8> {
let lorem: &[u8] = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. ";
let mut input = Vec::with_capacity(16 * 1024);
while input.len() < 16 * 1024 {
input.extend_from_slice(lorem);
}
input.truncate(16 * 1024);
input
}
#[test]
fn compressed_encoder_internal_round_trip_shapes() {
let cases: &[(&str, Vec<u8>)] = &[
("empty", Vec::new()),
("single byte", b"a".to_vec()),
("hello world", b"hello world\n".to_vec()),
("4k lorem", {
let mut v = Vec::with_capacity(4096);
while v.len() < 4096 {
v.extend_from_slice(b"The quick brown fox jumps over the lazy dog.\n");
}
v.truncate(4096);
v
}),
("16k lorem", lorem_16k()),
];
for (name, input) in cases {
let encoded = encode_chunked(input, input.len().max(1), input.len().max(1) + 64).unwrap();
let decoded =
decode_chunked(&encoded, encoded.len().max(1), input.len().max(1) + 64).unwrap();
assert_eq!(decoded, *input, "internal round-trip failed for {name}");
}
}
#[test]
fn compressed_encoder_streaming_one_byte() {
let cases: &[(&str, Vec<u8>)] = &[
("hello world", b"hello world\n".to_vec()),
("alphabet", (0..=255u8).collect()),
("structured", {
let mut v = Vec::new();
for _ in 0..100 {
v.extend_from_slice(b"the quick brown fox jumps over the lazy dog\n");
}
v
}),
];
for (name, input) in cases {
let encoded = encode_chunked(input, 1, 1).unwrap();
let decoded = decode_chunked(&encoded, 1, 1).unwrap();
assert_eq!(decoded, *input, "streaming round-trip failed for {name}");
}
}
#[cfg(unix)]
#[test]
fn compressed_encoder_cross_validate_reference() {
let Some(brotli) = brotli_cli_available() else {
eprintln!("skipping: brotli CLI not available");
return;
};
let cases: &[(&str, Vec<u8>)] = &[
("empty", Vec::new()),
("single byte", b"a".to_vec()),
("hello world", b"hello world\n".to_vec()),
("4k lorem", {
let mut v = Vec::with_capacity(4096);
while v.len() < 4096 {
v.extend_from_slice(b"The quick brown fox jumps over the lazy dog.\n");
}
v.truncate(4096);
v
}),
("16k lorem", lorem_16k()),
("binary 0..255", (0..=255u8).collect()),
("pseudo random 70k", {
let mut x: u32 = 0xdead_beef;
let mut v = vec![0u8; 70_000];
for slot in &mut v {
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
*slot = x as u8;
}
v
}),
];
for (name, input) in cases {
let encoded = encode_chunked(input, input.len().max(1), input.len().max(1) + 64).unwrap();
let decoded = brotli_decode(&brotli, &encoded);
assert_eq!(
decoded, *input,
"system brotli -d returned the wrong bytes for {name}"
);
}
}
#[test]
fn compressed_encoder_multi_block_round_trip() {
let mut input = Vec::with_capacity(200_000);
while input.len() < 200_000 {
input.extend_from_slice(
b"The quick brown fox jumps over the lazy dog. Pack my box with five dozen liquor jugs. ",
);
}
input.truncate(200_000);
let encoded = encode_chunked(&input, input.len(), input.len() + 64).unwrap();
let decoded = decode_chunked(&encoded, encoded.len(), input.len() + 64).unwrap();
assert_eq!(decoded, input);
if let Some(brotli) = brotli_cli_available() {
let ref_decoded = brotli_decode(&brotli, &encoded);
assert_eq!(ref_decoded, input);
}
}
#[test]
fn compressed_encoder_degenerate_alphabets() {
let cases: &[(&str, Vec<u8>)] = &[
("all zeros 4k", vec![0u8; 4096]),
("all 0xff 16k", vec![0xffu8; 16384]),
("repeating 'a' 1k", vec![b'a'; 1024]),
("ab × 500", b"ab".repeat(500)),
(
"0 / 1 alternating",
(0..2048).map(|i| (i & 1) as u8).collect(),
),
("abc × 300", b"abc".repeat(300)),
];
for (name, input) in cases {
let encoded = encode_chunked(input, input.len(), input.len() + 64).unwrap();
let decoded = decode_chunked(&encoded, encoded.len(), input.len() + 64).unwrap();
assert_eq!(decoded, *input, "internal round-trip failed for {name}");
if let Some(brotli) = brotli_cli_available() {
let ref_decoded = brotli_decode(&brotli, &encoded);
assert_eq!(ref_decoded, *input, "reference decoder failed for {name}");
}
}
}
#[test]
fn compressed_encoder_mixed_inputs() {
let cases: &[(&str, Vec<u8>)] = &[
("repetitive sentence", b"This is a test. ".repeat(500)),
(
"html-ish",
b"<html><head><title>x</title></head><body>".repeat(200),
),
("alpha + count", {
let mut v = Vec::new();
for i in 0..2000 {
v.extend_from_slice(format!("line {i}: hello world\n").as_bytes());
}
v
}),
("zeros + pattern", {
let mut v = vec![0u8; 1024];
v.extend_from_slice(b"the quick brown fox");
v.extend_from_slice(&vec![0u8; 1024]);
v
}),
];
for (name, input) in cases {
let encoded = encode_chunked(input, input.len(), input.len() + 64).unwrap();
let decoded = decode_chunked(&encoded, encoded.len(), input.len() + 64).unwrap();
assert_eq!(decoded, *input, "internal round-trip failed for {name}");
if let Some(brotli) = brotli_cli_available() {
let ref_decoded = brotli_decode(&brotli, &encoded);
assert_eq!(ref_decoded, *input, "ref decode failed for {name}");
}
}
}
#[test]
fn compressed_encoder_fuzz_round_trip() {
let mut state: u64 = 0x1234_5678_9abc_def0;
let brotli = brotli_cli_available();
for round in 0..80 {
state = state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
let len = ((state >> 33) as usize) & 0x1FFF;
let mut input = vec![0u8; len];
for slot in input.iter_mut() {
state = state
.wrapping_mul(6364136223846793005)
.wrapping_add(1442695040888963407);
*slot = ((state >> 32) & 0xFF) as u8;
}
let encoded = encode_chunked(&input, len.max(1), len.max(1) + 64).unwrap();
let decoded = decode_chunked(&encoded, encoded.len().max(1), len.max(1) + 64).unwrap();
assert_eq!(
decoded, input,
"internal round-trip failed at round {round}"
);
if let Some(ref bin) = brotli {
let ref_decoded = brotli_decode(bin, &encoded);
assert_eq!(ref_decoded, input, "ref decode failed at round {round}");
}
}
}
#[test]
fn compressed_encoder_ratio_sanity() {
let lorem = lorem_16k();
let encoded = encode_chunked(&lorem, lorem.len(), lorem.len() + 64).unwrap();
eprintln!(
"compcol-brotli 16k lorem: {} → {} bytes ({:.1}%)",
lorem.len(),
encoded.len(),
100.0 * encoded.len() as f64 / lorem.len() as f64
);
assert!(
encoded.len() < lorem.len(),
"encoder expanded structured text: {} ≥ {}",
encoded.len(),
lorem.len()
);
let cases: &[(&str, Vec<u8>)] = &[
("all zeros 16k", vec![0u8; 16384]),
("repetitive sentence 8k", b"This is a test. ".repeat(500)),
("alphabet ×64", (0..=255u8).collect::<Vec<_>>().repeat(64)),
];
for (name, input) in cases {
let encoded = encode_chunked(input, input.len(), input.len() + 64).unwrap();
eprintln!(
"compcol-brotli {}: {} → {} bytes ({:.1}%)",
name,
input.len(),
encoded.len(),
100.0 * encoded.len() as f64 / input.len() as f64
);
}
}