reqsketch 0.1.3

/// Critical algorithmic correctness tests based on C++ reference validation
/// These tests ensure our implementation matches the theoretical and practical behavior
/// of the reference implementation.
use reqsketch::*;

#[cfg(test)]
mod capacity_calculation_tests {
    use super::*;

    #[test]
    fn test_capacity_calculation_correctness() {
        let mut sketch = ReqSketch::builder()
            .k(12)
            .unwrap()
            .rank_accuracy(RankAccuracy::HighRank)
            .build()
            .unwrap();

        sketch.update(1.0);

        let level_info = sketch.level_info();
        assert!(!level_info.is_empty(), "Should have at least one level");

        let (_, _, level0_capacity, _) = level_info[0];

        // Expected capacity for level 0: multiplier=2, sections=3, section_size=12
        // nominal_capacity = 2 * 3 * 12 = 72
        assert_eq!(
            level0_capacity, 72,
            "Level 0 capacity should be 72, got {}",
            level0_capacity
        );
    }

    #[test]
    fn test_item_density_consistency() {
        let mut sketch = ReqSketch::builder()
            .k(12)
            .unwrap()
            .rank_accuracy(RankAccuracy::HighRank)
            .build()
            .unwrap();

        let stream_size = 2048;
        for i in 1..=stream_size {
            sketch.update(i as f32);
        }

        let level_info = sketch.level_info();
        let total_retained: u32 = level_info.iter().map(|(_, items, _, _)| items).sum();
        let total_weighted_items: u64 = level_info
            .iter()
            .map(|(_, items, _, weight)| *items as u64 * weight)
            .sum();

        // Validate weight conservation
        assert_eq!(
            total_weighted_items, stream_size as u64,
            "Total weighted items {} should equal stream size {}",
            total_weighted_items, stream_size
        );

        // Validate retention efficiency (should retain much less than original)
        let retention_ratio = total_retained as f64 / stream_size as f64;
        assert!(
            retention_ratio < 0.3,
            "Retention ratio {:.3} should be < 0.3 for efficient compression",
            retention_ratio
        );
    }

    #[test]
    fn test_level_structure_matches_cpp() {
        let mut sketch = ReqSketch::builder()
            .k(12)
            .unwrap()
            .rank_accuracy(RankAccuracy::HighRank)
            .build()
            .unwrap();

        for i in 1..=2048 {
            sketch.update(i as f32);
        }

        let level_info = sketch.level_info();
        let num_levels = level_info.len();
        let total_retained = sketch.num_retained();

        // C++ reference with same parameters: 5 levels, ~372 retained
        assert!(
            (4..=7).contains(&num_levels),
            "Expected 4-7 levels, got {}",
            num_levels
        );

        assert!(
            (301..500).contains(&total_retained),
            "Expected ~300-500 retained, got {}",
            total_retained
        );
    }
}

#[cfg(test)]
mod global_capacity_tests {
    use super::*;

    #[test]
    fn test_global_capacity_constraint() {
        let mut sketch = ReqSketch::new();

        // Add items and monitor global capacity usage
        for i in 0..10000 {
            sketch.update(i as f64);

            let total_retained = sketch.total_retained_items();
            let total_capacity = sketch.total_nominal_capacity();

            // Key constraint: total retained should not significantly exceed total capacity
            // Allow some over-capacity but not excessive (C++ shows ~95% utilization)
            assert!(
                total_retained <= total_capacity + 50,
                "Global capacity violated: {} retained vs {} capacity at {} items",
                total_retained,
                total_capacity,
                i + 1
            );
        }
    }

    #[test]
    fn test_individual_level_capacity_management() {
        let mut sketch = ReqSketch::new();

        // Add enough items to trigger multiple levels with C++ global compaction strategy
        for i in 0..50000 {
            sketch.update(i as f64);
        }

        // With C++ global compaction strategy, individual levels can exceed their nominal capacity
        // since compaction only occurs when global capacity is reached
        let mut total_items = 0;
        let mut total_capacity = 0;

        for (level, items, capacity, _weight) in sketch.level_info() {
            if items > 0 {
                let utilization = items as f32 / capacity as f32;

                total_items += items;
                total_capacity += capacity;

                // Individual levels can exceed capacity with C++ strategy
                // But should not be completely unbounded (sanity check)
                assert!(
                    utilization <= 3.0,
                    "Level {} exceeds reasonable bounds: {}x",
                    level,
                    utilization
                );
            }
        }

        // The key constraint with C++ strategy is global capacity management
        let global_utilization = total_items as f32 / total_capacity as f32;
        // Ensure capacities are sensible to avoid divide-by-zero or unexpected state
        assert!(
            total_capacity > 0,
            "Total nominal capacity must be positive"
        );

        // Global utilization should be reasonable (allow some overhead for C++ strategy)
        assert!(
            global_utilization <= 1.5,
            "Global utilization too high: {}x",
            global_utilization
        );
    }

    #[test]
    fn test_compaction_only_when_globally_needed() {
        let mut sketch = ReqSketch::new();
        let mut compaction_events = Vec::new();

        for i in 0..5000 {
            let retained_before = sketch.total_retained_items();
            let capacity_before = sketch.total_nominal_capacity();

            sketch.update(i as f64);

            let retained_after = sketch.total_retained_items();

            // If retained count decreased, compaction occurred
            if retained_after < retained_before {
                compaction_events.push((i, retained_before, capacity_before));
            }
        }

        // Ensure capacity values in recorded events are valid
        assert!(
            compaction_events
                .iter()
                .all(|&(_step, _before, cap)| cap > 0),
            "All recorded compaction events should report positive capacity"
        );

        // C++ triggers compaction when num_retained == max_nom_size
        // Compaction causes capacity to grow (ensure_enough_sections), so we check capacity BEFORE
        for (step, before, capacity_before) in compaction_events {
            let utilization = before as f32 / capacity_before as f32;
            // With C++ logic, compaction triggers when retained equals capacity
            assert!(utilization >= 0.9, // Should be very close to 100%
                "Compaction occurred too early at step {}: only {:.1}% capacity utilization (expected ~100%)",
                step, utilization * 100.0);
        }
    }
}

#[cfg(test)]
mod error_bounds_validation_tests {
    use super::*;

    #[test]
    fn test_error_bounds_accuracy_across_k_values() {
        let k_values = [8, 12, 16, 24, 32];

        for &k in &k_values {
            let mut sketch = ReqSketch::builder()
                .k(k)
                .expect("Operation should succeed")
                .build()
                .expect("Operation should succeed");

            // Add enough data to trigger estimation mode
            for i in 0..10000 {
                sketch.update(i as f64);
            }

            assert!(
                sketch.is_estimation_mode(),
                "Should be in estimation mode for k={}",
                k
            );

            // Test error bounds get tighter as k increases
            let test_rank = 0.25;
            let error_bound = {
                let lower = sketch.get_rank_lower_bound(test_rank, 2);
                let upper = sketch.get_rank_upper_bound(test_rank, 2);
                (test_rank - lower).max(upper - test_rank)
            };

            // Error bounds should be inversely proportional to k
            // Allow reasonable bounds based on the actual formula
            let expected_bound = 0.084 / k as f64; // Fixed error component
            assert!(
                error_bound <= expected_bound * 4.0, // Allow more generous bound
                "Error bound {} too large for k={}, expected ~{}",
                error_bound,
                k,
                expected_bound
            );
        }
    }

    #[test]
    fn test_hra_vs_lra_accuracy_guarantees() {
        let test_ranks = [0.05, 0.25, 0.5, 0.75, 0.95];

        for &rank in &test_ranks {
            let mut hra_sketch = ReqSketch::builder()
                .rank_accuracy(RankAccuracy::HighRank)
                .build()
                .expect("Operation should succeed");
            let mut lra_sketch = ReqSketch::builder()
                .rank_accuracy(RankAccuracy::LowRank)
                .build()
                .expect("Operation should succeed");

            // Add identical data to both
            for i in 0..10000 {
                let val = i as f64;
                hra_sketch.update(val);
                lra_sketch.update(val);
            }

            let hra_error = {
                let lower = hra_sketch.get_rank_lower_bound(rank, 2);
                let upper = hra_sketch.get_rank_upper_bound(rank, 2);
                (rank - lower).max(upper - rank)
            };

            let lra_error = {
                let lower = lra_sketch.get_rank_lower_bound(rank, 2);
                let upper = lra_sketch.get_rank_upper_bound(rank, 2);
                (rank - lower).max(upper - rank)
            };

            // Verify HRA vs LRA accuracy patterns
            if rank >= 0.75 {
                assert!(
                    hra_error <= lra_error * 1.1, // Allow 10% tolerance
                    "HRA should be better for high ranks: rank={}, HRA={:.4}, LRA={:.4}",
                    rank,
                    hra_error,
                    lra_error
                );
            } else if rank <= 0.25 {
                assert!(
                    lra_error <= hra_error * 1.1, // Allow 10% tolerance
                    "LRA should be better for low ranks: rank={}, HRA={:.4}, LRA={:.4}",
                    rank,
                    hra_error,
                    lra_error
                );
            }
        }
    }

    #[test]
    fn test_exact_mode_zero_error_bounds() {
        let mut sketch = ReqSketch::new();

        // Add only a few items to stay in exact mode
        for i in 0..20 {
            sketch.update(i as f64);
        }

        assert!(!sketch.is_estimation_mode(), "Should be in exact mode");

        let test_ranks = [0.1, 0.25, 0.5, 0.75, 0.9];

        for &rank in &test_ranks {
            let lower = sketch.get_rank_lower_bound(rank, 2);
            let upper = sketch.get_rank_upper_bound(rank, 2);

            // In exact mode, bounds should be very tight (exact or near-exact)
            let error_bound = (upper - lower) / 2.0;
            assert!(
                error_bound < 0.05,
                "Exact mode should have tight bounds for rank {}: got ±{:.4}",
                rank,
                error_bound
            );
        }
    }
}

#[cfg(test)]
mod internal_state_consistency_tests {
    use super::*;

    #[test]
    fn test_level_structure_consistency() {
        let mut sketch = ReqSketch::new();

        for i in 0..50000 {
            sketch.update(i as f64);

            // Verify level weights are powers of 2
            for (level, _items, _capacity, weight) in sketch.level_info() {
                let expected_weight = 1u64 << level;
                assert_eq!(
                    weight, expected_weight,
                    "Level {} should have weight {}, got {}",
                    level, expected_weight, weight
                );
            }

            // Verify total weight conservation
            let computed_weight = sketch.computed_total_weight();
            assert_eq!(
                computed_weight,
                sketch.len(),
                "Weight conservation violated: computed={}, actual={}",
                computed_weight,
                sketch.len()
            );
        }
    }

    #[test]
    fn test_sorted_items_remain_sorted() {
        let mut sketch = ReqSketch::new();

        // Add items in various orders to test sorting consistency
        let values = [100, 50, 200, 25, 175, 75, 125, 250, 10, 300];

        for &val in &values {
            sketch.update(val as f64);
        }

        // Get sorted view and verify ordering
        let sorted_view = sketch
            .test_get_sorted_view()
            .expect("Operation should succeed");
        let mut prev_value = None;

        for item in sorted_view.iter() {
            if let Some(prev) = prev_value {
                assert!(
                    item >= prev,
                    "Items not properly sorted: {} came after {}",
                    item,
                    prev
                );
            }
            prev_value = Some(item);
        }
    }

    #[test]
    fn test_merge_preserves_algorithmic_properties() {
        let mut sketch1 = ReqSketch::new();
        let mut sketch2 = ReqSketch::new();

        // Add different ranges to each sketch
        for i in 0..5000 {
            sketch1.update(i as f64);
            sketch2.update((i + 5000) as f64);
        }

        let total_items_before = sketch1.len() + sketch2.len();

        // Merge sketches
        sketch1.merge(&sketch2).expect("Operation should succeed");

        // Verify total weight conservation
        assert_eq!(
            sketch1.len(),
            total_items_before,
            "Merge should conserve total weight: expected {}, got {}",
            total_items_before,
            sketch1.len()
        );

        // With C++ global compaction strategy, merge operations may temporarily exceed
        // global capacity until the next compaction trigger is reached
        let total_retained = sketch1.total_retained_items();
        let total_capacity = sketch1.total_nominal_capacity();

        // Ensure capacity is positive after merge to validate ratio computations
        assert!(
            total_capacity > 0,
            "Total nominal capacity after merge must be positive"
        );

        // Allow significant temporary exceedance after merge with C++ strategy
        // The next update operation will trigger compaction if needed
        assert!(
            total_retained <= total_capacity * 3, // Much higher tolerance for C++ strategy
            "Merge exceeded reasonable bounds: {} retained vs {} capacity",
            total_retained,
            total_capacity
        );

        // Verify error bounds are still valid
        let test_rank = 0.5;
        let lower = sketch1.get_rank_lower_bound(test_rank, 2);
        let upper = sketch1.get_rank_upper_bound(test_rank, 2);

        assert!(
            lower <= test_rank && test_rank <= upper,
            "Error bounds invalid after merge: [{}, {}] should contain {}",
            lower,
            upper,
            test_rank
        );
    }
}

#[cfg(test)]
mod cross_validation_tests {
    use super::*;

    #[test]
    fn test_rank_calculation_consistency() {
        let mut sketch = ReqSketch::new();
        let n = 10000;

        // Add uniform data
        for i in 0..n {
            sketch.update(i as f64);
        }

        // Test bidirectional consistency: rank(quantile(r)) ≈ r
        let test_ranks = [0.1, 0.25, 0.5, 0.75, 0.9];

        for &rank in &test_ranks {
            let quantile = sketch
                .quantile(rank, SearchCriteria::Inclusive)
                .expect("Operation should succeed");
            let round_trip_rank = sketch
                .rank(&quantile, SearchCriteria::Inclusive)
                .expect("Operation should succeed");

            let error = (round_trip_rank - rank).abs();

            // Get theoretical bounds to determine acceptable error
            let lower = sketch.get_rank_lower_bound(rank, 3);
            let upper = sketch.get_rank_upper_bound(rank, 3);
            let theoretical_error = (rank - lower).max(upper - rank);

            assert!(
                error <= theoretical_error * 2.0, // Allow 2x theoretical error for round-trip
                "Round-trip rank error {} exceeds theoretical bound {} for rank {}",
                error,
                theoretical_error,
                rank
            );
        }
    }

    #[test]
    fn test_comparative_accuracy_with_theoretical_bounds() {
        // This test validates basic accuracy characteristics, following the approach
        // used by C++ and Java implementations (simple tolerance checks rather than statistical bounds)

        let mut sketch = ReqSketch::new();
        let n = 50000;

        for i in 0..n {
            sketch.update(i as f64);
        }

        // Test basic accuracy with simple tolerance checks like C++/Java tests
        let test_ranks = [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99];

        for &rank in &test_ranks {
            let true_quantile = rank * (n - 1) as f64;
            let estimated_rank = sketch
                .rank(&true_quantile, SearchCriteria::Inclusive)
                .expect("Operation should succeed");

            // Use simple tolerance check like C++ and Java tests (they use .01 margin)
            let error = (estimated_rank - rank).abs();
            assert!(
                error <= 0.1, // Allow 10% error tolerance for C++ strategy
                "Rank accuracy failed for {}: estimated {} vs true {}, error {:.4}",
                rank,
                estimated_rank,
                rank,
                error
            );
        }

        // Also test that rank bounds are directionally correct (like C++ tests do)
        for &rank in &test_ranks {
            let lower = sketch.get_rank_lower_bound(rank, 2);
            let upper = sketch.get_rank_upper_bound(rank, 2);

            assert!(
                lower <= rank && rank <= upper,
                "Rank bounds check failed for {}: [{}, {}] should contain {}",
                rank,
                lower,
                upper,
                rank
            );
        }
    }
}