use std::ffi::c_void;
use std::fs;
use std::path::PathBuf;
use structured_zstd::testing::{MAX_BLOCK_SIZE, block_splitter_decision};
use zstd::zstd_safe::zstd_sys as _;
const ZSTD_SLIPBLOCK_WORKSPACESIZE: usize = 8208;
#[allow(non_snake_case)]
unsafe extern "C" {
fn ZSTD_splitBlock(
block_start: *const c_void,
block_size: usize,
level: i32,
workspace: *mut c_void,
wksp_size: usize,
) -> usize;
}
fn corpus_dir() -> PathBuf {
let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let here = manifest.join("decodecorpus_files");
let sibling = manifest.join("../zstd/decodecorpus_files");
let path = if here.is_dir() { here } else { sibling };
assert!(
path.is_dir(),
"expected corpus directory at {} — this test needs the \
decodecorpus_files/ fixture from the repository checkout; \
it is not shipped in the crates.io package",
path.display()
);
path
}
fn load_corpus_chunks() -> Vec<(String, Vec<u8>)> {
let dir = corpus_dir();
let mut chunks = Vec::new();
for entry in fs::read_dir(&dir).expect("read_dir corpus") {
let entry = entry.expect("dir entry");
let path = entry.path();
if !entry.file_type().expect("entry file_type").is_file() {
continue;
}
if path
.extension()
.and_then(|e| e.to_str())
.is_some_and(|e| e == "zst")
{
continue;
}
let bytes = fs::read(&path).expect("read corpus file");
let block_size = MAX_BLOCK_SIZE as usize;
let name = path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("<unknown>")
.to_string();
let mut offset = 0;
let mut chunk_idx = 0;
while offset + block_size <= bytes.len() {
chunks.push((
format!("{name}#chunk{chunk_idx}"),
bytes[offset..offset + block_size].to_vec(),
));
offset += block_size;
chunk_idx += 1;
}
}
chunks
}
fn synthetic_transition_chunk(transition_at: usize) -> Vec<u8> {
let block_size = MAX_BLOCK_SIZE as usize;
let mut block = Vec::with_capacity(block_size);
let mut s1: u64 = 0xDEAD_BEEF_CAFE_F00D;
let mut s2: u64 = 0x0123_4567_89AB_CDEF;
for i in 0..block_size {
let v = if i < transition_at {
s1 ^= s1 << 13;
s1 ^= s1 >> 7;
s1 ^= s1 << 17;
(s1 & 0xFF) as u8
} else {
s2 ^= s2 << 13;
s2 ^= s2 >> 7;
s2 ^= s2 << 17;
(s2 & 0xFF) as u8
};
block.push(v);
}
block
}
fn reference_decision(block: &[u8], level: i32) -> usize {
assert_eq!(block.len(), MAX_BLOCK_SIZE as usize);
const U64_SIZE: usize = core::mem::size_of::<u64>();
let workspace_slots = ZSTD_SLIPBLOCK_WORKSPACESIZE.div_ceil(U64_SIZE);
let mut workspace = vec![0u64; workspace_slots];
unsafe {
ZSTD_splitBlock(
block.as_ptr() as *const c_void,
block.len(),
level,
workspace.as_mut_ptr() as *mut c_void,
workspace.len() * U64_SIZE,
)
}
}
fn assert_parity(label: &str, block: &[u8], split_level: usize) {
let ours = block_splitter_decision(block, split_level);
let reference = reference_decision(block, split_level as i32);
assert_eq!(
ours,
reference,
"{label} @ split_level={split_level}: \
our port = {ours}, reference = {reference} \
(block first 16 bytes = {:02X?})",
&block[..16]
);
}
#[test]
fn corpus_borders_heuristic_matches_reference() {
let chunks = load_corpus_chunks();
assert!(
!chunks.is_empty(),
"expected at least one 128 KB chunk from the decode corpus"
);
for (label, block) in &chunks {
assert_parity(label, block, 0);
}
}
#[test]
fn corpus_by_chunks_matches_reference_at_each_sampling_level() {
let chunks = load_corpus_chunks();
assert!(
!chunks.is_empty(),
"expected at least one 128 KB chunk from the decode corpus"
);
for (label, block) in &chunks {
for level in 1..=4 {
assert_parity(label, block, level);
}
}
}
#[test]
fn synthetic_transition_at_32k_borders_heuristic() {
let block = synthetic_transition_chunk(32 * 1024);
assert_parity("synthetic-transition-32k", &block, 0);
}
#[test]
fn synthetic_transition_at_64k_borders_heuristic() {
let block = synthetic_transition_chunk(64 * 1024);
assert_parity("synthetic-transition-64k", &block, 0);
}
#[test]
fn synthetic_transition_at_96k_borders_heuristic() {
let block = synthetic_transition_chunk(96 * 1024);
assert_parity("synthetic-transition-96k", &block, 0);
}
#[test]
fn synthetic_no_transition_borders_heuristic() {
let block = synthetic_transition_chunk(MAX_BLOCK_SIZE as usize);
let split_level = 0;
let ours = block_splitter_decision(&block, split_level);
let reference = reference_decision(&block, split_level as i32);
assert_eq!(
ours, reference,
"no-transition: ours={ours} reference={reference}"
);
assert_eq!(
ours,
block.len(),
"no-transition: expected no split (== block.len()), got {ours}"
);
}
#[test]
fn synthetic_transitions_by_chunks_all_levels() {
for &transition_at in &[16 * 1024usize, 32 * 1024, 48 * 1024, 64 * 1024, 96 * 1024] {
let block = synthetic_transition_chunk(transition_at);
let label = format!("synthetic-transition-{}k", transition_at / 1024);
for level in 1..=4 {
assert_parity(&label, &block, level);
}
}
}