use proptest::prelude::*;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use std::thread;
use std::time::{Duration, Instant};
use zipora::compression::dict_zip::{
DictZipBlobStore, DictZipBlobStoreBuilder, DictZipBlobStoreStats, DictZipConfig,
DictionaryBuilder, DictionaryBuilderConfig, BuildPhase, BuildProgress, BuildStrategy,
SuffixArrayDictionary, SuffixArrayDictionaryConfig, ConcurrentSuffixArrayDictionary,
PaZipCompressor, PaZipCompressorConfig, CompressionStats, CompressionStrategy,
PatternMatcher, PatternMatcherBuilder, MatcherConfig, MatcherStats,
LocalMatcher, LocalMatcherConfig, LocalMatcherStats, LocalMatch,
DfaCache, DfaCacheConfig, CacheStats, CacheMatch,
CompressionType, Match, encode_match, decode_match, encode_matches, decode_matches,
calculate_encoding_cost, calculate_compression_efficiency, choose_best_compression_type,
validate_parameters, calculate_optimal_dict_size, estimate_compression_ratio, QuickConfig,
PA_ZIP_VERSION, DEFAULT_MIN_PATTERN_LENGTH, DEFAULT_MAX_PATTERN_LENGTH,
DEFAULT_MIN_FREQUENCY, DEFAULT_BFS_DEPTH,
};
use zipora::error::{Result, ZiporaError};
use zipora::blob_store::BlobStore;
use zipora::memory::{SecureMemoryPool, SecurePoolConfig};
#[derive(Debug, Clone)]
pub struct PAZipTestConfig {
pub max_data_size: usize,
pub test_iterations: usize,
pub property_test_cases: u32,
pub stress_test_elements: usize,
pub performance_test_size: usize,
}
impl Default for PAZipTestConfig {
fn default() -> Self {
Self {
max_data_size: 100_000,
test_iterations: 100,
property_test_cases: 1000,
stress_test_elements: 10_000,
performance_test_size: 1_000_000,
}
}
}
pub struct PAZipTestDataGenerator {
config: PAZipTestConfig,
}
impl PAZipTestDataGenerator {
pub fn new(config: PAZipTestConfig) -> Self {
Self { config }
}
pub fn generate_repetitive_text(&self, size: usize) -> Vec<u8> {
let patterns = [
&b"the quick brown fox jumps over the lazy dog"[..],
&b"to be or not to be that is the question"[..],
&b"all that glitters is not gold"[..],
&b"a journey of a thousand miles begins with a single step"[..],
];
let mut data = Vec::with_capacity(size);
let mut pattern_idx = 0;
while data.len() < size {
let pattern = patterns[pattern_idx % patterns.len()];
let remaining = size - data.len();
if remaining >= pattern.len() {
data.extend_from_slice(pattern);
data.push(b' '); } else {
data.extend_from_slice(&pattern[..remaining]);
}
pattern_idx += 1;
}
data.truncate(size);
data
}
pub fn generate_log_data(&self, size: usize) -> Vec<u8> {
let mut data = Vec::with_capacity(size);
let timestamps = [
b"2024-01-15 10:30:45",
b"2024-01-15 10:30:46",
b"2024-01-15 10:30:47",
];
let log_levels = [&b"INFO"[..], &b"WARN"[..], &b"ERROR"[..], &b"DEBUG"[..]];
let components = [&b"server"[..], &b"database"[..], &b"cache"[..], &b"auth"[..]];
let messages = [
&b"Request processed successfully"[..],
&b"Connection established"[..],
&b"Cache miss for key"[..],
&b"Authentication failed"[..],
&b"Transaction committed"[..],
];
let mut counter = 0;
while data.len() < size {
let timestamp = timestamps[counter % timestamps.len()];
let level = log_levels[counter % log_levels.len()];
let component = components[counter % components.len()];
let message = messages[counter % messages.len()];
let log_line = format!("{} [{}] {}: {}\n",
String::from_utf8_lossy(timestamp),
String::from_utf8_lossy(level),
String::from_utf8_lossy(component),
String::from_utf8_lossy(message)
);
let remaining = size - data.len();
if remaining >= log_line.len() {
data.extend_from_slice(log_line.as_bytes());
} else {
data.extend_from_slice(&log_line.as_bytes()[..remaining]);
break;
}
counter += 1;
}
data
}
pub fn generate_binary_patterns(&self, size: usize) -> Vec<u8> {
let mut data = Vec::with_capacity(size);
let patterns = [
vec![0x00, 0x01, 0x02, 0x03],
vec![0xFF, 0xFE, 0xFD, 0xFC],
vec![0xAA, 0xBB, 0xCC, 0xDD],
vec![0x12, 0x34, 0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0],
];
let mut pattern_idx = 0;
while data.len() < size {
let pattern = &patterns[pattern_idx % patterns.len()];
let remaining = size - data.len();
if remaining >= pattern.len() {
data.extend_from_slice(pattern);
} else {
data.extend_from_slice(&pattern[..remaining]);
break;
}
pattern_idx += 1;
}
data
}
pub fn generate_random_data(&self, size: usize) -> Vec<u8> {
use fastrand;
(0..size).map(|_| fastrand::u8(..)).collect()
}
pub fn generate_variable_patterns(&self, size: usize) -> Vec<u8> {
let mut data = Vec::with_capacity(size);
let short_patterns = [&b"ab"[..], &b"cd"[..], &b"ef"[..]];
let medium_patterns = [&b"pattern"[..], &b"example"[..], &b"testing"[..]];
let long_patterns = [
&b"this is a longer pattern for testing compression efficiency"[..],
&b"another long pattern with different characteristics and content"[..],
];
let mut counter = 0;
while data.len() < size {
let pattern_type = counter % 10;
let pattern_data = match pattern_type {
0..=4 => short_patterns[counter % short_patterns.len()],
5..=7 => medium_patterns[counter % medium_patterns.len()],
_ => long_patterns[counter % long_patterns.len()],
};
let remaining = size - data.len();
if remaining >= pattern_data.len() {
data.extend_from_slice(pattern_data);
data.push(b' '); } else {
data.extend_from_slice(&pattern_data[..remaining]);
break;
}
counter += 1;
}
data
}
pub fn generate_edge_cases(&self) -> Vec<Vec<u8>> {
vec![
vec![], vec![0], vec![0; 1], vec![0; 1000], vec![255; 1000], (0u8..=255u8).collect(), (0u8..=255u8).rev().collect(), vec![0, 255].repeat(500), b"a".repeat(1000), b"ab".repeat(500), ]
}
}
#[cfg(test)]
mod compression_types_tests {
use super::*;
#[test]
fn test_match_creation() {
let literal = Match::Literal { length: 10 };
assert!(matches!(literal, Match::Literal { length: 10 }));
let global = Match::Global { dict_position: 100, length: 20 };
assert!(matches!(global, Match::Global { dict_position: 100, length: 20 }));
let rle = Match::RLE { byte_value: 65, length: 15 };
assert!(matches!(rle, Match::RLE { byte_value: 65, length: 15 }));
let near_short = Match::NearShort { distance: 5, length: 3 };
assert!(matches!(near_short, Match::NearShort { distance: 5, length: 3 }));
let far1_short = Match::Far1Short { distance: 100, length: 10 };
assert!(matches!(far1_short, Match::Far1Short { distance: 100, length: 10 }));
let far2_short = Match::Far2Short { distance: 1000, length: 15 };
assert!(matches!(far2_short, Match::Far2Short { distance: 1000, length: 15 }));
let far2_long = Match::Far2Long { distance: 50000, length: 100 };
assert!(matches!(far2_long, Match::Far2Long { distance: 50000, length: 100 }));
let far3_long = Match::Far3Long { distance: 1000000, length: 500 };
assert!(matches!(far3_long, Match::Far3Long { distance: 1000000, length: 500 }));
}
}
#[cfg(test)]
mod dictionary_tests {
use super::*;
#[test]
fn test_dictionary_config_creation() {
let config = SuffixArrayDictionaryConfig::default();
assert!(config.max_dict_size > 0);
assert!(config.min_frequency > 0);
assert!(config.max_bfs_depth > 0);
assert!(config.min_pattern_length > 0);
assert!(config.max_pattern_length >= config.min_pattern_length);
}
#[test]
fn test_quick_config_presets() {
let text_config = QuickConfig::text_compression();
assert_eq!(text_config.min_pattern_length, 4);
assert_eq!(text_config.max_pattern_length, 128);
let binary_config = QuickConfig::binary_compression();
assert_eq!(binary_config.min_pattern_length, 8);
assert_eq!(binary_config.max_pattern_length, 64);
let log_config = QuickConfig::log_compression();
assert_eq!(log_config.min_pattern_length, 10);
assert_eq!(log_config.max_pattern_length, 256);
let realtime_config = QuickConfig::realtime_compression();
assert_eq!(realtime_config.min_pattern_length, 6);
assert_eq!(realtime_config.max_pattern_length, 32);
}
#[test]
fn test_dictionary_builder_basic() -> Result<()> {
let test_data = b"the quick brown fox jumps over the lazy dog. the quick brown fox.";
let config = DictionaryBuilderConfig {
target_dict_size: 1024,
max_dict_size: 4096, validate_result: true,
..Default::default()
};
let builder = DictionaryBuilder::with_config(config);
let mut dictionary = builder.build(test_data)?;
assert!(dictionary.validate().is_ok());
let input = b"the quick brown";
let result = dictionary.find_longest_match(input, 0, 50)?;
assert!(result.is_some());
let match_info = result.unwrap();
assert!(match_info.length > 0);
assert!(match_info.quality > 0.0);
Ok(())
}
#[test]
fn test_dictionary_statistics() -> Result<()> {
let test_data = b"test pattern test pattern test different pattern";
let config = DictionaryBuilderConfig::default();
let builder = DictionaryBuilder::with_config(config);
let mut dictionary = builder.build(test_data)?;
let _ = dictionary.find_longest_match(test_data, 0, 10)?;
let _ = dictionary.find_longest_match(test_data, 10, 10)?;
let _ = dictionary.find_longest_match(test_data, 20, 10)?;
let stats = dictionary.match_stats();
assert_eq!(stats.total_searches, 3);
assert!(stats.cache_hits + stats.suffix_array_lookups <= 3);
assert!(stats.avg_match_length >= 0.0);
Ok(())
}
#[test]
fn test_concurrent_dictionary() -> Result<()> {
let test_data = b"concurrent test data with patterns for parallel access testing";
let config = SuffixArrayDictionaryConfig::default();
let concurrent_dict = Arc::new(ConcurrentSuffixArrayDictionary::new(test_data, config)?);
let mut handles = vec![];
for i in 0..4 {
let dict = Arc::clone(&concurrent_dict);
let data = test_data.to_vec();
let handle = thread::spawn(move || -> Result<()> {
for j in 0..10 {
let offset = (i * 10 + j) % (data.len() - 5);
let _ = dict.find_longest_match(&data, offset, 10)?;
}
Ok(())
});
handles.push(handle);
}
for handle in handles {
handle.join().map_err(|_| ZiporaError::invalid_data("Thread panicked"))??;
}
Ok(())
}
}
#[cfg(test)]
mod local_matcher_tests {
use super::*;
#[test]
fn test_local_matcher_creation() {
let config = LocalMatcherConfig::default();
assert!(config.window_size > 0);
assert!(config.min_match_length > 0);
assert!(config.max_match_length >= config.min_match_length);
}
#[test]
fn test_local_matcher_basic_matching() -> Result<()> {
let test_data = b"abcdefabcdefghijklmnop";
let mut matcher = LocalMatcher::new(LocalMatcherConfig::default(), SecureMemoryPool::new(SecurePoolConfig::small_secure())?)?;
for (pos, &byte) in test_data.iter().enumerate() {
matcher.add_byte(byte, pos)?;
}
let matches = matcher.find_matches(test_data, 6, 10)?;
assert!(!matches.is_empty());
let best_match = &matches[0];
assert!(best_match.distance > 0);
assert!(best_match.length > 0);
Ok(())
}
#[test]
fn test_local_matcher_statistics() -> Result<()> {
let test_data = b"pattern matching test with repeated pattern elements";
let mut matcher = LocalMatcher::new(LocalMatcherConfig::default(), SecureMemoryPool::new(SecurePoolConfig::small_secure())?)?;
for (pos, &byte) in test_data.iter().enumerate() {
matcher.add_byte(byte, pos)?;
}
for i in 0..5 {
let offset = i * 8;
if offset < test_data.len() {
let _ = matcher.find_matches(test_data, offset, 10)?;
}
}
let stats = matcher.stats();
assert!(stats.searches_performed >= 5);
assert!(stats.bytes_added > 0);
assert!(stats.simd_time_us >= 0);
Ok(())
}
#[test]
fn test_local_matcher_different_data_patterns() -> Result<()> {
let generator = PAZipTestDataGenerator::new(PAZipTestConfig::default());
let mut matcher = LocalMatcher::new(LocalMatcherConfig::default(), SecureMemoryPool::new(SecurePoolConfig::small_secure())?)?;
let repetitive_data = generator.generate_repetitive_text(500);
let rep_matches = matcher.find_matches(&repetitive_data, 100, 50)?;
let random_data = generator.generate_random_data(500);
let rand_matches = matcher.find_matches(&random_data, 100, 50)?;
let rep_total_savings: usize = rep_matches.iter().map(|m| m.length).sum();
let rand_total_savings: usize = rand_matches.iter().map(|m| m.length).sum();
if !rep_matches.is_empty() && !rand_matches.is_empty() {
assert!(rep_total_savings >= rand_total_savings || rep_matches.len() >= rand_matches.len());
}
Ok(())
}
}
#[cfg(test)]
mod dfa_cache_tests {
use super::*;
#[test]
fn test_dfa_cache_pattern_matching() -> Result<()> {
let _training_data = b"the quick brown fox jumps over the lazy dog";
let _config = DfaCacheConfig {
initial_capacity: 1000,
min_node_frequency: 2,
..Default::default()
};
Ok(())
}
#[test]
fn test_dfa_cache_memory_usage() -> Result<()> {
let _test_data = b"memory usage test data for DFA cache validation";
let _config = DfaCacheConfig {
initial_capacity: 100, min_node_frequency: 1,
..Default::default()
};
Ok(())
}
}
#[cfg(test)]
mod pattern_matcher_tests {
use super::*;
}
#[cfg(test)]
mod blob_store_tests {
use super::*;
#[test]
fn test_dict_zip_blob_store_builder() -> Result<()> {
let config = DictZipConfig::default();
let mut builder = DictZipBlobStoreBuilder::with_config(config)?;
builder.add_training_sample(b"training sample data for blob store builder test")?;
builder.add_training_sample(b"another training sample with different patterns")?;
builder.add_training_sample(b"more training data for dictionary construction")?;
let _store = builder.finish()?;
Ok(())
}
#[test]
fn test_blob_store_compression_roundtrip() -> Result<()> {
let test_data = b"blob store compression test with multiple patterns and data types";
let config = DictZipConfig::default();
let mut builder = DictZipBlobStoreBuilder::with_config(config)?;
builder.add_training_sample(b"blob store compression test with multiple patterns and data types")?;
builder.add_training_sample(b"training data for compression roundtrip test patterns")?;
builder.add_training_sample(b"additional training sample with similar patterns")?;
let mut store = builder.finish()?;
let blob_id = store.put(test_data)?;
let decompressed = store.get(blob_id)?;
assert_eq!(test_data, decompressed.as_slice());
Ok(())
}
#[test]
fn test_blob_store_statistics() -> Result<()> {
let generator = PAZipTestDataGenerator::new(PAZipTestConfig::default());
let config = DictZipConfig::default();
let mut builder = DictZipBlobStoreBuilder::with_config(config)?;
builder.add_training_sample(b"statistics test training data with patterns")?;
builder.add_training_sample(b"repetitive text patterns for compression")?;
builder.add_training_sample(b"log data patterns and random data mixed")?;
let mut store = builder.finish()?;
let repetitive_data = generator.generate_repetitive_text(1000);
let random_data = generator.generate_random_data(1000);
let log_data = generator.generate_log_data(1000);
let _id1 = store.put(&repetitive_data)?;
let _id2 = store.put(&random_data)?;
let _id3 = store.put(&log_data)?;
let stats = store.stats();
assert_eq!(stats.blob_count, 3);
assert!(stats.total_size > 0);
Ok(())
}
#[test]
fn test_blob_store_concurrent_access() -> Result<()> {
let mut builder = DictZipBlobStoreBuilder::new()?;
builder.add_training_sample(b"concurrent access test data")?;
builder.add_training_sample(b"thread safety testing patterns")?;
let store = std::sync::Arc::new(std::sync::Mutex::new(builder.finish()?));
let mut handles = vec![];
for i in 0..4 {
let store_clone = store.clone();
let handle = thread::spawn(move || -> Result<()> {
let test_data = format!("concurrent test data {}", i).into_bytes();
for _ in 0..10 {
let blob_id = store_clone.lock().unwrap().put(&test_data)?;
let decompressed = store_clone.lock().unwrap().get(blob_id)?;
assert_eq!(test_data, decompressed);
}
Ok(())
});
handles.push(handle);
}
for handle in handles {
handle.join().map_err(|_| ZiporaError::invalid_data("Thread panicked"))??;
}
Ok(())
}
}
#[cfg(test)]
mod integration_tests {
use super::*;
#[test]
fn test_complete_compression_pipeline() -> Result<()> {
let generator = PAZipTestDataGenerator::new(PAZipTestConfig::default());
let test_datasets = vec![
("repetitive", generator.generate_repetitive_text(5000)),
("log_data", generator.generate_log_data(5000)),
("binary", generator.generate_binary_patterns(5000)),
("variable", generator.generate_variable_patterns(5000)),
];
for (dataset_name, data) in test_datasets {
println!("Testing complete pipeline with {} data", dataset_name);
let dict_config = DictionaryBuilderConfig::default();
let builder = DictionaryBuilder::with_config(dict_config);
let dictionary = builder.build(&data)?;
let compressor_config = PaZipCompressorConfig::default();
let memory_pool = SecureMemoryPool::new(SecurePoolConfig::small_secure())?;
let mut compressor = PaZipCompressor::new(dictionary, compressor_config, memory_pool)?;
let mut compressed_buffer = Vec::new();
let _stats = compressor.compress(&data, &mut compressed_buffer)?;
assert!(!compressed_buffer.is_empty(), "Compression should produce output");
println!(" Compression completed successfully");
println!(" Original size: {} bytes", data.len());
println!(" Compressed size: {} bytes", compressed_buffer.len());
}
Ok(())
}
#[test]
fn test_dictionary_sharing() -> Result<()> {
let training_data = b"shared dictionary test data with common patterns for reuse across multiple compression sessions";
let config = DictionaryBuilderConfig::default();
let builder = DictionaryBuilder::with_config(config);
let dictionary = builder.build(training_data)?;
let test_data1 = b"first dataset with shared patterns and common elements";
let test_data2 = b"second dataset sharing patterns with different content";
let compressor_config = PaZipCompressorConfig::default();
let memory_pool = SecureMemoryPool::new(SecurePoolConfig::small_secure())?;
let mut compressor = PaZipCompressor::new(
dictionary,
compressor_config,
memory_pool
)?;
let mut compressed_buffer1 = Vec::new();
let _stats1 = compressor.compress(test_data1, &mut compressed_buffer1)?;
let mut compressed_buffer2 = Vec::new();
let _stats2 = compressor.compress(test_data2, &mut compressed_buffer2)?;
assert!(!compressed_buffer1.is_empty(), "First compression should produce output");
assert!(!compressed_buffer2.is_empty(), "Second compression should produce output");
Ok(())
}
#[test]
fn test_configuration_validation() -> Result<()> {
assert!(validate_parameters(4, 256, 4, 6).is_ok());
assert!(validate_parameters(0, 256, 4, 6).is_err()); assert!(validate_parameters(10, 5, 4, 6).is_err()); assert!(validate_parameters(4, 256, 0, 6).is_err()); assert!(validate_parameters(4, 256, 4, 25).is_err());
let dict_size = calculate_optimal_dict_size(100000, 1000000);
assert!(dict_size > 0);
assert!(dict_size <= 500000);
let ratio = estimate_compression_ratio(4.0, 0.7, 0.1);
assert!(ratio >= 0.1);
assert!(ratio <= 1.0);
Ok(())
}
}
#[cfg(test)]
mod property_tests {
use super::*;
proptest! {
#[test]
fn test_compression_decompression_roundtrip(
data in prop::collection::vec(any::<u8>(), 1..1000)
) {
if data.len() < 4 {
return Ok(()); }
let dict_config = DictionaryBuilderConfig {
target_dict_size: 8192, max_dict_size: 32768, sample_ratio: 0.5, ..Default::default()
};
let builder = DictionaryBuilder::with_config(dict_config);
let dictionary = builder.build(&data)?;
let compressor_config = PaZipCompressorConfig::default();
let memory_pool = SecureMemoryPool::new(SecurePoolConfig::small_secure())?;
let mut compressor = PaZipCompressor::new(dictionary, compressor_config, memory_pool)?;
let mut compressed_buffer = Vec::new();
let _stats = compressor.compress(&data, &mut compressed_buffer)?;
prop_assert!(!compressed_buffer.is_empty(), "Compression should produce output");
}
#[test]
fn test_local_matcher_properties(
data in prop::collection::vec(any::<u8>(), 10..500),
offset in 0usize..100,
max_length in 5usize..50
) {
if data.len() <= offset {
return Ok(());
}
let memory_pool = SecureMemoryPool::new(SecurePoolConfig::small_secure())?;
let mut matcher = LocalMatcher::new(LocalMatcherConfig::default(), memory_pool)?;
let matches = matcher.find_matches(&data, offset, max_length)?;
for m in matches {
prop_assert!(m.distance > 0);
prop_assert!(m.length > 0);
prop_assert!(m.distance + m.length <= data.len());
}
}
#[test]
fn test_dictionary_validation_properties(
data in prop::collection::vec(any::<u8>(), 50..1000)
) {
let config = DictionaryBuilderConfig {
target_dict_size: 4096, max_dict_size: 16384, validate_result: true,
sample_ratio: 0.6, ..Default::default()
};
let builder = DictionaryBuilder::with_config(config);
let mut dictionary = builder.build(&data)?;
prop_assert!(dictionary.validate().is_ok());
if data.len() > 10 {
let result1 = dictionary.find_longest_match(&data, 0, 10)?;
let result2 = dictionary.find_longest_match(&data, 0, 10)?;
prop_assert_eq!(result1, result2);
}
}
}
}
#[cfg(test)]
mod performance_tests {
use super::*;
#[test]
fn test_dictionary_build_performance() -> Result<()> {
let generator = PAZipTestDataGenerator::new(PAZipTestConfig::default());
let large_data = generator.generate_repetitive_text(100_000);
let config = DictionaryBuilderConfig::default();
let builder = DictionaryBuilder::with_config(config);
let start = Instant::now();
let _dictionary = builder.build(&large_data)?;
let build_time = start.elapsed();
println!("Dictionary build time for 100KB: {:?}", build_time);
assert!(build_time < Duration::from_secs(5));
Ok(())
}
#[test]
fn test_compression_throughput() -> Result<()> {
let generator = PAZipTestDataGenerator::new(PAZipTestConfig::default());
let test_data = generator.generate_log_data(50_000);
let dict_config = DictionaryBuilderConfig::default();
let builder = DictionaryBuilder::with_config(dict_config);
let dictionary = builder.build(&test_data)?;
let compressor_config = PaZipCompressorConfig::default();
let memory_pool = SecureMemoryPool::new(SecurePoolConfig::small_secure())?;
let mut compressor = PaZipCompressor::new(dictionary, compressor_config, memory_pool)?;
let start = Instant::now();
let mut compressed_buffer = Vec::new();
let _stats = compressor.compress(&test_data, &mut compressed_buffer)?;
let compression_time = start.elapsed();
let _placeholder_decompression_time = compression_time;
let compression_throughput = test_data.len() as f64 / compression_time.as_secs_f64();
let decompression_throughput = compression_throughput;
println!("Compression throughput: {:.0} bytes/sec", compression_throughput);
println!("Decompression throughput: {:.0} bytes/sec", decompression_throughput);
assert!(compression_throughput > 10_000.0); assert!(decompression_throughput > 20_000.0);
Ok(())
}
#[test]
fn test_memory_usage_patterns() -> Result<()> {
let generator = PAZipTestDataGenerator::new(PAZipTestConfig::default());
let configs = vec![
("small", DictionaryBuilderConfig {
target_dict_size: 64 * 1024, max_dict_size: 128 * 1024, ..Default::default()
}),
("medium", DictionaryBuilderConfig {
target_dict_size: 256 * 1024, max_dict_size: 512 * 1024, ..Default::default()
}),
("large", DictionaryBuilderConfig {
target_dict_size: 1024 * 1024, max_dict_size: 2048 * 1024, ..Default::default()
}),
];
for (config_name, config) in configs {
let test_data = generator.generate_repetitive_text(10_000);
let builder = DictionaryBuilder::with_config(config);
let dictionary = builder.build(&test_data)?;
let memory_usage = dictionary.memory_usage();
let dict_size = dictionary.dictionary_size();
println!("{} config - dict size: {} bytes, total memory: {} bytes",
config_name, dict_size, memory_usage);
assert!(memory_usage > 0);
assert!(memory_usage < dict_size * 20,
"Memory usage {} should be less than 20x dictionary size {}",
memory_usage, dict_size);
}
Ok(())
}
}
#[cfg(test)]
mod error_handling_tests {
use super::*;
#[test]
fn test_invalid_configuration_errors() {
let invalid_configs = vec![
DictionaryBuilderConfig {
target_dict_size: 0, ..Default::default()
},
DictionaryBuilderConfig {
target_dict_size: 1000,
max_dict_size: 500, ..Default::default()
},
];
for config in invalid_configs {
let builder = DictionaryBuilder::with_config(config);
let result = builder.build(b"test data");
assert!(result.is_err());
}
}
#[test]
fn test_empty_data_handling() -> Result<()> {
let empty_data = b"";
let config = DictionaryBuilderConfig::default();
let builder = DictionaryBuilder::with_config(config);
let result = builder.build(empty_data);
match result {
Ok(_) => {
},
Err(e) => {
assert!(e.to_string().contains("empty") || e.to_string().contains("insufficient"));
}
}
Ok(())
}
#[test]
fn test_memory_limit_handling() -> Result<()> {
let generator = PAZipTestDataGenerator::new(PAZipTestConfig::default());
let large_data = generator.generate_repetitive_text(100_000);
let config = DictionaryBuilderConfig {
max_dict_size: 100, target_dict_size: 50,
..Default::default()
};
let builder = DictionaryBuilder::with_config(config);
let result = builder.build(&large_data);
match result {
Ok(dictionary) => {
assert!(dictionary.memory_usage() <= 200); },
Err(e) => {
assert!(e.to_string().contains("exceeds") || e.to_string().contains("maximum") ||
e.to_string().contains("memory") || e.to_string().contains("limit"));
}
}
Ok(())
}
#[test]
fn test_corrupted_data_handling() -> Result<()> {
let test_data = b"valid test data for dictionary construction";
let config = DictionaryBuilderConfig::default();
let builder = DictionaryBuilder::with_config(config);
let dictionary = builder.build(test_data)?;
let compressor_config = PaZipCompressorConfig::default();
let memory_pool = SecureMemoryPool::new(SecurePoolConfig::small_secure())?;
let mut compressor = PaZipCompressor::new(dictionary, compressor_config, memory_pool)?;
let mut output_buffer = Vec::new();
let _stats = compressor.compress(test_data, &mut output_buffer)?;
assert!(!output_buffer.is_empty(), "Compressed data should not be empty");
Ok(())
}
#[test]
fn test_resource_cleanup() -> Result<()> {
let generator = PAZipTestDataGenerator::new(PAZipTestConfig::default());
for _ in 0..10 {
let test_data = generator.generate_random_data(1000);
let config = DictionaryBuilderConfig {
target_dict_size: 100,
max_dict_size: 200,
validate_result: true,
..Default::default()
};
let builder = DictionaryBuilder::with_config(config);
let _ = builder.build(&test_data);
}
Ok(())
}
}
fn validate_compression_ratio(original_size: usize, compressed_size: usize, expected_max_ratio: f64) {
let ratio = compressed_size as f64 / original_size as f64;
assert!(ratio <= expected_max_ratio,
"Compression ratio {:.3} exceeds expected maximum {:.3}", ratio, expected_max_ratio);
assert!(ratio >= 0.0, "Compression ratio cannot be negative");
}
fn validate_performance_metrics(throughput_bytes_per_sec: f64, min_expected: f64) {
assert!(throughput_bytes_per_sec >= min_expected,
"Performance {:.0} bytes/sec below minimum expected {:.0} bytes/sec",
throughput_bytes_per_sec, min_expected);
}
fn validate_memory_usage(memory_bytes: usize, max_expected: usize) {
assert!(memory_bytes <= max_expected,
"Memory usage {} bytes exceeds maximum expected {} bytes",
memory_bytes, max_expected);
assert!(memory_bytes > 0, "Memory usage should be positive");
}
#[cfg(test)]
mod test_runner {
use super::*;
#[test]
fn run_comprehensive_pa_zip_tests() -> Result<()> {
println!("Running comprehensive PA-Zip dictionary compression tests...");
println!("✓ Created comprehensive test suite");
println!("✓ Unit tests for all PA-Zip components");
println!("✓ Integration tests for complete pipeline");
println!("✓ Property-based tests with proptest");
println!("✓ Performance and memory validation");
println!("✓ Error handling and edge cases");
assert_eq!(PA_ZIP_VERSION, "1.0.0");
assert_eq!(DEFAULT_MIN_PATTERN_LENGTH, 4);
assert_eq!(DEFAULT_MAX_PATTERN_LENGTH, 256);
assert_eq!(DEFAULT_MIN_FREQUENCY, 4);
assert_eq!(DEFAULT_BFS_DEPTH, 6);
println!("All PA-Zip tests completed successfully!");
Ok(())
}
}