use base_d::prelude::*;
#[test]
fn test_compression_nul_bytes() {
let test_messages = vec![
"add whirlpool command - three spiral methods with random selection",
"Session wrap: wake ritual, bloom tagging, Honeypot check-in, 109 blooms",
"Session wrap: activate script includeCoAuthoredBy fix",
"Add iri command - bismuth staircase crystals with rainbow colors",
"add whirlpool command - three spiral methods with random selection",
];
println!("\n=== Testing Compression for Nul Bytes ===\n");
for msg in test_messages {
println!("Message: \"{}\"", msg);
println!("Length: {} bytes\n", msg.len());
for algo in [
CompressionAlgorithm::Lz4,
CompressionAlgorithm::Snappy,
CompressionAlgorithm::Brotli,
CompressionAlgorithm::Gzip,
CompressionAlgorithm::Lzma,
CompressionAlgorithm::Zstd,
] {
let level = algo.default_level();
let compressed = compress(msg.as_bytes(), algo, level).unwrap();
let nul_count = compressed.iter().filter(|&&b| b == 0).count();
let has_nuls = nul_count > 0;
println!(
" {:?}: {} bytes, {} nul bytes {}",
algo,
compressed.len(),
nul_count,
if has_nuls { "⚠️" } else { "✓" }
);
if has_nuls {
let mut byte_counts = [0usize; 256];
for &b in &compressed {
byte_counts[b as usize] += 1;
}
let low_bytes = (0..32).filter(|&i| byte_counts[i] > 0).count();
let surrogates_low = (0xD8..=0xDB).filter(|&i| byte_counts[i] > 0).count();
let surrogates_high = (0xDC..=0xDF).filter(|&i| byte_counts[i] > 0).count();
println!(
" Problematic bytes: {} in [0-31], {} in [D8-DB], {} in [DC-DF]",
low_bytes, surrogates_low, surrogates_high
);
}
}
println!();
}
}
#[test]
fn test_encoding_compressed_with_nuls() {
let msg = "add whirlpool command - three spiral methods with random selection";
println!("\n=== Testing Encoding with Nul-Containing Compressed Data ===\n");
let algo = CompressionAlgorithm::Gzip;
let level = algo.default_level();
let compressed = compress(msg.as_bytes(), algo, level).unwrap();
let nul_count = compressed.iter().filter(|&&b| b == 0).count();
println!(
"Compressed with {:?}: {} bytes, {} nuls",
algo,
compressed.len(),
nul_count
);
if nul_count == 0 {
println!(
"⚠️ This message doesn't produce nuls with {:?} - test may not be representative",
algo
);
return;
}
println!("✓ Using compression output with {} nul bytes\n", nul_count);
let registry = DictionaryRegistry::load_default().unwrap();
let test_dictionaries = vec!["base16", "base32", "base64", "cards", "dna", "base100"];
for dict_name in test_dictionaries {
if let Ok(dict) = registry.dictionary(dict_name) {
let encoded = encode(&compressed, &dict);
println!("\n Dictionary: {}", dict_name);
println!(" Mode: {:?}", dict.mode());
println!(" Start codepoint: {:?}", dict.start_codepoint());
println!(" Encoded length: {} chars", encoded.chars().count());
println!(
" Expected length: {} chars (if all bytes encoded)",
compressed.len()
);
let decoded = decode(&encoded, &dict).unwrap();
let bytes_lost = compressed.len() - decoded.len();
if bytes_lost > 0 {
println!(
" ❌ BYTES DROPPED: {} bytes lost during round-trip!",
bytes_lost
);
println!(" This is THE BUG!");
} else if compressed == decoded {
println!(" ✓ Perfect round-trip");
} else {
println!(" ⚠️ Data corrupted but same length");
}
if encoded.contains('\0') {
println!(" ❌ ENCODED STRING CONTAINS NUL!");
println!(" This will fail when passed to git -m");
}
}
}
}
#[test]
fn test_compress_encode_nul_safety() {
let test_messages = vec![
"add whirlpool command - three spiral methods with random selection",
"Session wrap: wake ritual, bloom tagging, Honeypot check-in, 109 blooms",
"Add iri command - bismuth staircase crystals with rainbow colors",
];
println!("\n=== Testing compress_encode for Nul Safety ===\n");
let registry = DictionaryRegistry::load_default().unwrap();
for msg in test_messages {
println!("Message: \"{}\"", msg);
let mut nul_found = false;
for attempt in 1..=100 {
let result = compress_encode(msg.as_bytes(), ®istry).unwrap();
let has_nul = result.encoded.contains('\0');
if has_nul {
println!(" ❌ Attempt {}: NUL FOUND!", attempt);
println!(" Compression: {:?}", result.compress_algo);
println!(" Dictionary: {}", result.dictionary_name);
println!(" Encoded length: {}", result.encoded.len());
let preview: String = result
.encoded
.chars()
.take(20)
.map(|c| {
if c.is_control() {
format!("\\u{:04X}", c as u32)
} else {
c.to_string()
}
})
.collect();
println!(" Preview: {}", preview);
nul_found = true;
break;
}
}
if !nul_found {
println!(" ✓ All 100 attempts nul-free");
}
}
}
#[test]
fn test_byte_range_start_zero() {
println!("\n=== Testing ByteRange with start_codepoint=0 (should be rejected) ===\n");
let result = Dictionary::builder()
.mode(EncodingMode::ByteRange)
.start_codepoint(0)
.build();
assert!(
result.is_err(),
"ByteRange with start_codepoint=0 should be rejected (maps to NUL and C1 controls)"
);
let err = result.unwrap_err();
println!("Correctly rejected: {}", err);
assert!(
err.contains("Unsafe ByteRange"),
"Error message should mention unsafe ByteRange: {}",
err
);
}
#[test]
fn test_byte_range_surrogate_range() {
println!(
"\n=== Testing ByteRange with start_codepoint overlapping surrogates (should be rejected) ===\n"
);
let result = Dictionary::builder()
.mode(EncodingMode::ByteRange)
.start_codepoint(0xD701)
.build();
assert!(
result.is_err(),
"ByteRange with start_codepoint=0xD701 should be rejected (end 0xD800 overlaps surrogates)"
);
let err = result.unwrap_err();
println!("Correctly rejected: {}", err);
assert!(
err.contains("Unsafe ByteRange"),
"Error message should mention unsafe ByteRange: {}",
err
);
}