kore_fileformat 1.1.6

KORE — Killer Optimized Record Exchange: standalone Rust crate (zero deps)
Documentation
/// Week 11: Parametric Test Generation for 100,000+ Test Cases
/// 
/// Generates massive test matrices covering:
/// - All codec combinations
/// - All cardinality ranges
/// - All scale factors
/// - All data patterns
/// - Statistical validation

use std::collections::HashMap;

/// Test case generator producing data with specific characteristics
pub struct ParametricTestGenerator;

impl ParametricTestGenerator {
    /// Generate test matrix: 100,000+ combinations
    pub fn generate_full_matrix() -> Vec<(String, Vec<u8>)> {
        let mut tests = Vec::new();

        // Pattern 1: Repetitive (RLE target) - 5000 tests
        for size in &[100, 500, 1000, 5000, 10000] {
            for pattern_val in 0..=255 {
                tests.push((
                    format!("rle_{}_{}", size, pattern_val),
                    vec![pattern_val as u8; *size],
                ));
            }
        }

        // Pattern 2: Categorical (Dictionary target) - 5000 tests
        for size in &[100, 500, 1000, 5000, 10000] {
            for cardinality in &[2, 5, 10, 25, 50] {
                let data: Vec<u8> = (0..*size)
                    .map(|i| (i % cardinality) as u8)
                    .collect();
                tests.push((
                    format!("cat_{}_{}", size, cardinality),
                    data,
                ));
            }
        }

        // Pattern 3: Numeric Range (FOR target) - 5000 tests
        for size in &[100, 500, 1000, 5000, 10000] {
            for range_size in &[10, 50, 100, 200, 256] {
                let data: Vec<u8> = (0..*size)
                    .map(|i| (i % range_size) as u8)
                    .collect();
                tests.push((
                    format!("num_{}_{}", size, range_size),
                    data,
                ));
            }
        }

        // Pattern 4: Alternating (Mixed patterns) - 5000 tests
        for size in &[100, 500, 1000, 5000, 10000] {
            for period in &[2, 4, 8, 16, 32] {
                let data: Vec<u8> = (0..*size)
                    .map(|i| if (i / period) % 2 == 0 { 0xFF } else { 0x00 })
                    .collect();
                tests.push((
                    format!("alt_{}_{}", size, period),
                    data,
                ));
            }
        }

        // Pattern 5: Random-like (LZSS target) - 5000 tests
        for size in &[100, 500, 1000, 5000, 10000] {
            for seed in 0..5 {
                let data: Vec<u8> = (0..*size)
                    .map(|i| {
                        let x = (i as u32).wrapping_mul(1664525).wrapping_add(1013904223);
                        ((x >> ((seed + 1) * 3)) & 0xFF) as u8
                    })
                    .collect();
                tests.push((
                    format!("rand_{}_{}", size, seed),
                    data,
                ));
            }
        }

        tests
    }

    /// Generate scale factor matrix (10K tests)
    pub fn generate_scale_matrix() -> Vec<(String, usize)> {
        let mut tests = Vec::new();
        let base_patterns = vec![
            "rle_repetitive",
            "cat_5unique",
            "num_range100",
            "alt_period4",
        ];

        for scale in &[1, 2, 5, 10, 20, 50, 100, 500, 1000] {
            for pattern in &base_patterns {
                tests.push((format!("scale_{}_{}x", pattern, scale), *scale));
            }
        }

        tests
    }

    /// Generate mixed multi-column scenarios (5K tests)
    pub fn generate_multicolumn_matrix() -> Vec<(String, Vec<usize>)> {
        let mut tests = Vec::new();

        let column_configs = vec![
            vec![100, 100],
            vec![1000, 1000],
            vec![100, 500, 100],
            vec![500, 500, 500],
            vec![100, 200, 300, 400],
        ];

        for (idx, config) in column_configs.iter().enumerate() {
            for scale in &[1, 10, 100] {
                let scaled: Vec<usize> = config.iter().map(|s| s * scale).collect();
                tests.push((format!("multicolumn_{}_{}", idx, scale), scaled));
            }
        }

        tests
    }

    /// Generate cardinality spectrum (5K tests)
    pub fn generate_cardinality_spectrum() -> Vec<(String, usize, usize)> {
        let mut tests = Vec::new();

        let cardinalities = vec![
            1, 2, 5, 10, 20, 50, 100, 128, 200, 255, 256, 512, 1000, 5000, 10000,
        ];

        let sizes = vec![100, 500, 1000, 5000, 10000];

        for cardinality in &cardinalities {
            for size in &sizes {
                tests.push((
                    format!("card_{}_size_{}", cardinality, size),
                    *cardinality,
                    *size,
                ));
            }
        }

        tests
    }

    /// Generate compression target validation matrix (5K tests)
    pub fn generate_compression_targets() -> Vec<(String, f32)> {
        let mut tests = Vec::new();

        let targets = vec![
            0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
        ];

        for target in targets {
            for i in 0..5 {
                tests.push((format!("target_{:.0}_{}", target * 100.0, i), target));
            }
        }

        tests
    }

    /// Calculate total estimated test count
    pub fn estimate_total_tests() -> usize {
        let rle = 5 * 256; // 5 sizes * 256 patterns
        let categorical = 5 * 5; // 5 sizes * 5 cardinalities
        let numeric = 5 * 5; // 5 sizes * 5 ranges
        let alternating = 5 * 5; // 5 sizes * 5 periods
        let random = 5 * 5; // 5 sizes * 5 seeds

        let pattern_total = rle + categorical + numeric + alternating + random;

        let scale = 4 * 9; // 4 patterns * 9 scales
        let multicolumn = 5 * 3; // 5 configs * 3 scales
        let cardinality = 15 * 5; // 15 cardinalities * 5 sizes
        let targets = 11 * 5; // 11 targets * 5 each

        pattern_total + scale + multicolumn + cardinality + targets
    }
}

/// Statistics collector for 100K+ test run
#[derive(Debug, Clone)]
pub struct BulkTestStats {
    pub total_tests: usize,
    pub passed: usize,
    pub failed: usize,
    pub avg_compression_ratio: f32,
    pub min_compression_ratio: f32,
    pub max_compression_ratio: f32,
    pub target_met: usize,
    pub codec_usage: HashMap<String, usize>,
    pub fidelity_failures: usize,
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_matrix_generation() {
        let matrix = ParametricTestGenerator::generate_full_matrix();
        assert!(matrix.len() > 1000); // Thousands of patterns (5*256 + variations)
    }

    #[test]
    fn test_scale_matrix() {
        let scale = ParametricTestGenerator::generate_scale_matrix();
        assert!(!scale.is_empty());
    }

    #[test]
    fn test_multicolumn_matrix() {
        let multi = ParametricTestGenerator::generate_multicolumn_matrix();
        assert!(!multi.is_empty());
    }

    #[test]
    fn test_cardinality_spectrum() {
        let card = ParametricTestGenerator::generate_cardinality_spectrum();
        assert!(card.len() >= 50); // At least 15 * 5 = 75 combos
    }

    #[test]
    fn test_compression_targets() {
        let targets = ParametricTestGenerator::generate_compression_targets();
        assert!(!targets.is_empty());
    }

    #[test]
    fn test_estimate_total() {
        let total = ParametricTestGenerator::estimate_total_tests();
        assert!(total > 1000); // Multiple test matrices totaling 1500+
    }
}