reqsketch 0.1.3

Relative Error Quantiles sketch
Documentation
#[cfg(test)]
mod tests {
    use crate::*;

    #[test]
    fn test_theoretical_error_bounds() -> Result<()> {
        let mut sketch = ReqSketch::new();
        let n = 50_000;

        // Add uniformly distributed values
        for i in 0..n {
            sketch.update(i as f64);
        }

        println!("=== Rust REQ Error Bounds Validation ===");
        println!("Using theoretical error bounds from C++ reference implementation");
        println!("");

        // Comprehensive quantile coverage - from very low to very high
        let test_ranks = [
            0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5, 0.6, 0.7, 0.75,
            0.8, 0.85, 0.9, 0.92, 0.95, 0.97, 0.98, 0.99, 0.995, 0.999
        ];

        println!("Rank\tQuantile\tTrue Val\tRank Error\t3σ Bound\tWithin Bound");
        println!("----\t--------\t--------\t----------\t--------\t------------");

        for &rank in &test_ranks {
            // Test 1: For a known quantile, check if our rank estimate is within bounds
            let true_quantile = rank * (n - 1) as f64;
            let estimated_rank = sketch.rank(&true_quantile, SearchCriteria::Inclusive)?;

            // Get theoretical error bounds at 3 standard deviations (99.7% confidence)
            let lower_bound = sketch.get_rank_lower_bound(rank, 3);
            let upper_bound = sketch.get_rank_upper_bound(rank, 3);
            let theoretical_error = ((rank - lower_bound).max(upper_bound - rank)).max(0.0);

            let rank_error = (estimated_rank - rank).abs();
            let within_bounds = estimated_rank >= lower_bound && estimated_rank <= upper_bound;

            println!("{}\t\t{:.0}\t\t{:.1}\t\t{:.4}\t\t±{:.2}%\t\t{}",
                     rank, true_quantile, true_quantile, rank_error,
                     theoretical_error * 100.0,
                     if within_bounds { "YES" } else { "NO" });

            // Log violations to see the full pattern first
            if !within_bounds {
                let error_pct = (estimated_rank - rank).abs() / rank * 100.0;
                eprintln!("VIOLATION: Rank {} estimate {:.6} outside bounds [{:.6}, {:.6}], error {:.2}%",
                         rank, estimated_rank, lower_bound, upper_bound, error_pct);
            }

            // Assert bounds compliance now that bounds calculation is fixed
            assert!(within_bounds,
                "Estimated rank {:.6} for true quantile {:.1} (rank {}) is outside 3-sigma bounds [{:.6}, {:.6}]. Error: {:.2}%. This violates the fundamental REQ sketch guarantee.",
                estimated_rank, true_quantile, rank, lower_bound, upper_bound, (estimated_rank - rank).abs() / rank * 100.0);
        }

        println!("\n✓ All quantile estimates are within 3-sigma bounds as required");
        Ok(())
    }

    #[test]
    fn test_error_bounds_across_k_values() -> Result<()> {
        println!("=== Error Bounds vs K Parameter ===");
        println!("Comparing theoretical accuracy for different k values");
        println!("");

        let k_values = [8, 12, 16, 24, 32];
        let test_rank = 0.25;

        println!("K\t1σ Error\t2σ Error\t3σ Error");
        println!("-\t--------\t--------\t--------");

        for &k in &k_values {
            let mut sketch = ReqSketch::builder().k(k)?.build()?;

            // Add enough data to trigger estimation mode
            for i in 0..10000 {
                sketch.update(i as f64);
            }

            let error_1sigma = (test_rank - sketch.get_rank_lower_bound(test_rank, 1)).max(
                sketch.get_rank_upper_bound(test_rank, 1) - test_rank);
            let error_2sigma = (test_rank - sketch.get_rank_lower_bound(test_rank, 2)).max(
                sketch.get_rank_upper_bound(test_rank, 2) - test_rank);
            let error_3sigma = (test_rank - sketch.get_rank_lower_bound(test_rank, 3)).max(
                sketch.get_rank_upper_bound(test_rank, 3) - test_rank);

            println!("{}\t{:.2}%\t\t{:.2}%\t\t{:.2}%",
                     k, error_1sigma * 100.0, error_2sigma * 100.0, error_3sigma * 100.0);
        }

        println!("\n✓ Error bounds decrease as k increases, as expected");
        Ok(())
    }

    #[test]
    fn test_hra_vs_lra_error_bounds() -> Result<()> {
        println!("=== HRA vs LRA Error Bounds Comparison ===");
        println!("High Rank Accuracy should have better bounds for high ranks");
        println!("Low Rank Accuracy should have better bounds for low ranks");
        println!("");

        let test_ranks = [0.05, 0.25, 0.5, 0.75, 0.95];

        println!("Rank\tHRA Error\tLRA Error\tBetter Mode");
        println!("----\t---------\t---------\t-----------");

        for &rank in &test_ranks {
            let mut hra_sketch = ReqSketch::builder()
                .rank_accuracy(RankAccuracy::HighRank)
                .build()?;
            let mut lra_sketch = ReqSketch::builder()
                .rank_accuracy(RankAccuracy::LowRank)
                .build()?;

            // Add data to both sketches
            for i in 0..10000 {
                let val = i as f64;
                hra_sketch.update(val);
                lra_sketch.update(val);
            }

            let hra_error = (rank - hra_sketch.get_rank_lower_bound(rank, 2)).max(
                hra_sketch.get_rank_upper_bound(rank, 2) - rank);
            let lra_error = (rank - lra_sketch.get_rank_lower_bound(rank, 2)).max(
                lra_sketch.get_rank_upper_bound(rank, 2) - rank);

            let better = if hra_error < lra_error { "HRA" } else { "LRA" };

            println!("{}\t{:.2}%\t\t{:.2}%\t\t{}",
                     rank, hra_error * 100.0, lra_error * 100.0, better);

            // Verify the expected behavior
            if rank >= 0.75 {
                assert!(hra_error <= lra_error,
                       "HRA should have better bounds for high ranks, but got HRA: {:.4}%, LRA: {:.4}%",
                       hra_error * 100.0, lra_error * 100.0);
            } else if rank <= 0.25 {
                assert!(lra_error <= hra_error,
                       "LRA should have better bounds for low ranks, but got HRA: {:.4}%, LRA: {:.4}%",
                       hra_error * 100.0, lra_error * 100.0);
            }
        }

        println!("\n✓ HRA and LRA modes show expected accuracy patterns");
        Ok(())
    }

    #[test]
    fn test_exact_mode_error_bounds() -> Result<()> {
        println!("=== Exact Mode Error Bounds ===");
        println!("Small sketches should have exact bounds (0% error)");
        println!("");

        let mut sketch = ReqSketch::new();

        // Add only a few values to stay in exact mode
        for i in 0..10 {
            sketch.update(i as f64);
        }

        assert!(!sketch.is_estimation_mode(), "Should be in exact mode");

        let test_ranks = [0.1, 0.25, 0.5, 0.75, 0.9];

        for &rank in &test_ranks {
            let lower = sketch.get_rank_lower_bound(rank, 2);
            let upper = sketch.get_rank_upper_bound(rank, 2);

            println!("Rank {}: bounds [{:.6}, {:.6}]", rank, lower, upper);

            // In exact mode, bounds should be exact (or very close due to discretization)
            let error = (upper - lower).max(0.0);
            assert!(error < 0.01, "Exact mode should have minimal error bounds, got {:.4}", error);
        }

        println!("\n✓ Exact mode provides tight error bounds as expected");
        Ok(())
    }
}