oxirs-rule 0.2.4

//! Comprehensive Benchmark Suite for Reasoning Engines
//!
//! Provides standardized benchmarks for measuring and comparing performance of
//! different reasoning strategies, algorithms, and optimizations.
//!
//! # Features
//!
//! - **Standard Benchmark Datasets**: Common reasoning tasks for comparison
//! - **Performance Metrics**: Throughput, latency, memory usage, scalability
//! - **Automated Testing**: Run full benchmark suites with one command
//! - **Comparative Analysis**: Compare different engines and configurations
//! - **Regression Detection**: Identify performance regressions
//! - **Reporting**: Generate detailed benchmark reports
//!
//! # Benchmark Categories
//!
//! - **Forward Chaining**: Data-driven inference performance
//! - **Backward Chaining**: Goal-driven query performance
//! - **RETE Network**: Pattern matching efficiency
//! - **Incremental Reasoning**: Delta computation performance
//! - **Parallel Execution**: Multi-threaded scaling
//! - **SPARQL Integration**: Query-driven reasoning
//! - **SHACL Validation**: Constraint checking performance
//!
//! # Example
//!
//! ```text
//! use oxirs_rule::benchmark_suite::{BenchmarkSuite, BenchmarkConfig, BenchmarkCategory};
//!
//! let config = BenchmarkConfig::default()
//!     .with_iterations(100)
//!     .with_warmup(10);
//!
//! let mut suite = BenchmarkSuite::new(config);
//!
//! // Run all benchmarks
//! let results = suite.run_all().expect("should succeed");
//!
//! // Print report
//! println!("{}", results.generate_report());
//!
//! // Run specific category
//! let forward_results = suite.run_category(BenchmarkCategory::ForwardChaining).expect("should succeed");
//! # Ok::<(), anyhow::Error>(())
//! ```

use crate::{Rule, RuleAtom, RuleEngine, Term};
use anyhow::Result;
use scirs2_core::metrics::{Counter, Timer};
use std::collections::HashMap;
use std::time::{Duration, Instant};
use tracing::{debug, info};

// Global metrics for benchmarking
lazy_static::lazy_static! {
    static ref BENCHMARK_RUNS: Counter = Counter::new("benchmark_runs".to_string());
    static ref BENCHMARK_FAILURES: Counter = Counter::new("benchmark_failures".to_string());
    static ref BENCHMARK_TIME: Timer = Timer::new("benchmark_total_time".to_string());
}

/// Benchmark category
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum BenchmarkCategory {
    /// Forward chaining inference
    ForwardChaining,
    /// Backward chaining proof search
    BackwardChaining,
    /// RETE pattern matching
    ReteMatching,
    /// Incremental reasoning
    IncrementalReasoning,
    /// Parallel execution
    ParallelExecution,
    /// SPARQL integration
    SparqlIntegration,
    /// SHACL validation
    ShaclValidation,
    /// Rule optimization
    RuleOptimization,
    /// Memory usage
    MemoryUsage,
    /// Scalability testing
    Scalability,
}

impl BenchmarkCategory {
    /// Get all benchmark categories
    pub fn all() -> Vec<Self> {
        vec![
            Self::ForwardChaining,
            Self::BackwardChaining,
            Self::ReteMatching,
            Self::IncrementalReasoning,
            Self::ParallelExecution,
            Self::SparqlIntegration,
            Self::ShaclValidation,
            Self::RuleOptimization,
            Self::MemoryUsage,
            Self::Scalability,
        ]
    }

    /// Get category name
    pub fn name(&self) -> &str {
        match self {
            Self::ForwardChaining => "Forward Chaining",
            Self::BackwardChaining => "Backward Chaining",
            Self::ReteMatching => "RETE Matching",
            Self::IncrementalReasoning => "Incremental Reasoning",
            Self::ParallelExecution => "Parallel Execution",
            Self::SparqlIntegration => "SPARQL Integration",
            Self::ShaclValidation => "SHACL Validation",
            Self::RuleOptimization => "Rule Optimization",
            Self::MemoryUsage => "Memory Usage",
            Self::Scalability => "Scalability",
        }
    }
}

/// Benchmark configuration
#[derive(Debug, Clone)]
pub struct BenchmarkConfig {
    /// Number of iterations per benchmark
    pub iterations: usize,
    /// Warmup iterations (not included in results)
    pub warmup: usize,
    /// Enable detailed profiling
    pub detailed_profiling: bool,
    /// Maximum duration per benchmark (ms)
    pub max_duration_ms: u64,
    /// Minimum samples for statistical significance
    pub min_samples: usize,
}

impl Default for BenchmarkConfig {
    fn default() -> Self {
        Self {
            iterations: 100,
            warmup: 10,
            detailed_profiling: false,
            max_duration_ms: 60_000, // 1 minute
            min_samples: 10,
        }
    }
}

impl BenchmarkConfig {
    /// Set iterations
    pub fn with_iterations(mut self, iterations: usize) -> Self {
        self.iterations = iterations;
        self
    }

    /// Set warmup
    pub fn with_warmup(mut self, warmup: usize) -> Self {
        self.warmup = warmup;
        self
    }

    /// Enable detailed profiling
    pub fn with_detailed_profiling(mut self, enabled: bool) -> Self {
        self.detailed_profiling = enabled;
        self
    }
}

/// Benchmark result for a single test
#[derive(Debug, Clone)]
pub struct BenchmarkResult {
    /// Name of the benchmark
    pub name: String,
    /// Category
    pub category: BenchmarkCategory,
    /// Total time taken
    pub total_time: Duration,
    /// Average time per iteration
    pub avg_time: Duration,
    /// Minimum time
    pub min_time: Duration,
    /// Maximum time
    pub max_time: Duration,
    /// Standard deviation
    pub std_dev: Duration,
    /// Throughput (operations per second)
    pub throughput: f64,
    /// Number of samples
    pub samples: usize,
    /// Memory usage (bytes)
    pub memory_bytes: usize,
}

impl BenchmarkResult {
    /// Generate a summary string
    pub fn summary(&self) -> String {
        format!(
            "{}: avg={:.2}ms, throughput={:.0} ops/sec, mem={}KB",
            self.name,
            self.avg_time.as_secs_f64() * 1000.0,
            self.throughput,
            self.memory_bytes / 1024
        )
    }
}

/// Collection of benchmark results
#[derive(Debug, Clone)]
pub struct BenchmarkResults {
    /// All results
    pub results: Vec<BenchmarkResult>,
    /// Timestamp
    pub timestamp: std::time::SystemTime,
}

impl BenchmarkResults {
    /// Generate a detailed report
    pub fn generate_report(&self) -> String {
        let mut report = String::new();
        report.push_str("═══════════════════════════════════════════════════════════════\n");
        report.push_str("          OxiRS Rule Engine Benchmark Report\n");
        report.push_str("═══════════════════════════════════════════════════════════════\n\n");

        // Group by category
        let mut by_category: HashMap<BenchmarkCategory, Vec<&BenchmarkResult>> = HashMap::new();
        for result in &self.results {
            by_category.entry(result.category).or_default().push(result);
        }

        // Print results by category
        for category in BenchmarkCategory::all() {
            if let Some(results) = by_category.get(&category) {
                report.push_str(&format!("\n{}\n", category.name()));
                report.push_str(&format!("{}\n", "─".repeat(60)));

                for result in results {
                    report.push_str(&format!(
                        "  {:<30} {:>10.2}ms  {:>12.0} ops/s\n",
                        result.name,
                        result.avg_time.as_secs_f64() * 1000.0,
                        result.throughput
                    ));
                }
            }
        }

        report.push_str("\n═══════════════════════════════════════════════════════════════\n");
        report
    }

    /// Get fastest benchmark in category
    pub fn fastest_in_category(&self, category: BenchmarkCategory) -> Option<&BenchmarkResult> {
        self.results
            .iter()
            .filter(|r| r.category == category)
            .min_by_key(|r| r.avg_time)
    }

    /// Get slowest benchmark in category
    pub fn slowest_in_category(&self, category: BenchmarkCategory) -> Option<&BenchmarkResult> {
        self.results
            .iter()
            .filter(|r| r.category == category)
            .max_by_key(|r| r.avg_time)
    }

    /// Calculate overall statistics
    pub fn overall_stats(&self) -> (Duration, f64) {
        let total_time: Duration = self.results.iter().map(|r| r.total_time).sum();
        let avg_throughput: f64 =
            self.results.iter().map(|r| r.throughput).sum::<f64>() / self.results.len() as f64;
        (total_time, avg_throughput)
    }
}

/// Comprehensive benchmark suite
pub struct BenchmarkSuite {
    /// Configuration
    config: BenchmarkConfig,
    /// Rule engine
    engine: RuleEngine,
    /// Test datasets
    datasets: HashMap<String, Vec<RuleAtom>>,
}

impl BenchmarkSuite {
    /// Create a new benchmark suite
    pub fn new(config: BenchmarkConfig) -> Self {
        let mut suite = Self {
            config,
            engine: RuleEngine::new(),
            datasets: HashMap::new(),
        };
        suite.load_datasets();
        suite
    }

    /// Load standard benchmark datasets
    fn load_datasets(&mut self) {
        // Small dataset (10 facts)
        self.datasets.insert(
            "small".to_string(),
            (0..10)
                .map(|i| RuleAtom::Triple {
                    subject: Term::Constant(format!("s{i}")),
                    predicate: Term::Constant("p".to_string()),
                    object: Term::Constant(format!("o{i}")),
                })
                .collect(),
        );

        // Medium dataset (100 facts)
        self.datasets.insert(
            "medium".to_string(),
            (0..100)
                .map(|i| RuleAtom::Triple {
                    subject: Term::Constant(format!("s{i}")),
                    predicate: Term::Constant("p".to_string()),
                    object: Term::Constant(format!("o{i}")),
                })
                .collect(),
        );

        // Large dataset (1000 facts)
        self.datasets.insert(
            "large".to_string(),
            (0..1000)
                .map(|i| RuleAtom::Triple {
                    subject: Term::Constant(format!("s{i}")),
                    predicate: Term::Constant("p".to_string()),
                    object: Term::Constant(format!("o{i}")),
                })
                .collect(),
        );
    }

    /// Run all benchmarks
    pub fn run_all(&mut self) -> Result<BenchmarkResults> {
        info!("Running all benchmarks");
        let _timer = BENCHMARK_TIME.start();

        let mut all_results = Vec::new();

        for category in BenchmarkCategory::all() {
            match self.run_category(category) {
                Ok(mut results) => all_results.append(&mut results.results),
                Err(e) => {
                    BENCHMARK_FAILURES.inc();
                    debug!("Category {:?} failed: {}", category, e);
                }
            }
        }

        BENCHMARK_RUNS.inc();

        Ok(BenchmarkResults {
            results: all_results,
            timestamp: std::time::SystemTime::now(),
        })
    }

    /// Run benchmarks for specific category
    pub fn run_category(&mut self, category: BenchmarkCategory) -> Result<BenchmarkResults> {
        info!("Running benchmarks for {:?}", category);

        let results = match category {
            BenchmarkCategory::ForwardChaining => self.bench_forward_chaining(),
            BenchmarkCategory::BackwardChaining => self.bench_backward_chaining(),
            BenchmarkCategory::ReteMatching => self.bench_rete_matching(),
            BenchmarkCategory::IncrementalReasoning => self.bench_incremental_reasoning(),
            BenchmarkCategory::ParallelExecution => self.bench_parallel_execution(),
            BenchmarkCategory::SparqlIntegration => self.bench_sparql_integration(),
            BenchmarkCategory::ShaclValidation => self.bench_shacl_validation(),
            BenchmarkCategory::RuleOptimization => self.bench_rule_optimization(),
            BenchmarkCategory::MemoryUsage => self.bench_memory_usage(),
            BenchmarkCategory::Scalability => self.bench_scalability(),
        }?;

        Ok(BenchmarkResults {
            results,
            timestamp: std::time::SystemTime::now(),
        })
    }

    /// Benchmark forward chaining
    fn bench_forward_chaining(&mut self) -> Result<Vec<BenchmarkResult>> {
        let mut results = Vec::new();

        // Simple rule benchmark
        let rule = Rule {
            name: "simple".to_string(),
            body: vec![RuleAtom::Triple {
                subject: Term::Variable("X".to_string()),
                predicate: Term::Constant("p".to_string()),
                object: Term::Variable("Y".to_string()),
            }],
            head: vec![RuleAtom::Triple {
                subject: Term::Variable("X".to_string()),
                predicate: Term::Constant("q".to_string()),
                object: Term::Variable("Y".to_string()),
            }],
        };
        self.engine.add_rule(rule);

        // Clone datasets to avoid borrow checker issues
        let datasets: Vec<(String, Vec<RuleAtom>)> = self
            .datasets
            .iter()
            .map(|(k, v)| (k.clone(), v.clone()))
            .collect();

        for (name, dataset) in datasets {
            let result = self.run_benchmark_with_engine(
                &format!("forward_chain_{}", name),
                BenchmarkCategory::ForwardChaining,
                |engine| {
                    engine.clear();
                    engine.forward_chain(&dataset)
                },
            )?;
            results.push(result);
        }

        Ok(results)
    }

    /// Benchmark backward chaining
    ///
    /// Note: Uses very conservative settings (max_depth=5, minimal dataset) to prevent stack overflow
    fn bench_backward_chaining(&mut self) -> Result<Vec<BenchmarkResult>> {
        let mut results = Vec::new();

        // Configure backward chainer with very conservative max_depth to prevent stack overflow
        self.engine.set_backward_chain_max_depth(5);

        // Add test rule (simple non-recursive rule)
        let rule = Rule {
            name: "test".to_string(),
            body: vec![RuleAtom::Triple {
                subject: Term::Variable("X".to_string()),
                predicate: Term::Constant("p".to_string()),
                object: Term::Variable("Y".to_string()),
            }],
            head: vec![RuleAtom::Triple {
                subject: Term::Variable("X".to_string()),
                predicate: Term::Constant("q".to_string()),
                object: Term::Variable("Y".to_string()),
            }],
        };
        self.engine.add_rule(rule);

        // Use minimal dataset (just 3 facts) to prevent exponential proof search
        let minimal_dataset = vec![
            RuleAtom::Triple {
                subject: Term::Constant("s0".to_string()),
                predicate: Term::Constant("p".to_string()),
                object: Term::Constant("o0".to_string()),
            },
            RuleAtom::Triple {
                subject: Term::Constant("s1".to_string()),
                predicate: Term::Constant("p".to_string()),
                object: Term::Constant("o1".to_string()),
            },
            RuleAtom::Triple {
                subject: Term::Constant("s2".to_string()),
                predicate: Term::Constant("p".to_string()),
                object: Term::Constant("o2".to_string()),
            },
        ];

        // Goal with fully bound subject to minimize search space
        let goal = RuleAtom::Triple {
            subject: Term::Constant("s0".to_string()),
            predicate: Term::Constant("q".to_string()),
            object: Term::Constant("o0".to_string()), // Fully bound to avoid exponential search
        };

        let goal_clone = goal.clone();
        let dataset_clone = minimal_dataset.clone();
        let result = self.run_benchmark_with_engine(
            "backward_chain_minimal",
            BenchmarkCategory::BackwardChaining,
            |engine| {
                engine.clear();
                // Re-apply the safe max_depth after clear
                engine.set_backward_chain_max_depth(5);
                engine.add_facts(dataset_clone.clone());
                engine.backward_chain(&goal_clone)
            },
        )?;
        results.push(result);

        Ok(results)
    }

    /// Benchmark RETE matching
    fn bench_rete_matching(&mut self) -> Result<Vec<BenchmarkResult>> {
        let mut results = Vec::new();

        // Clone datasets to avoid borrow checker issues
        let datasets: Vec<(String, Vec<RuleAtom>)> = self
            .datasets
            .iter()
            .map(|(k, v)| (k.clone(), v.clone()))
            .collect();

        for (name, dataset) in datasets {
            let result = self.run_benchmark_with_engine(
                &format!("rete_{}", name),
                BenchmarkCategory::ReteMatching,
                |engine| {
                    engine.clear();
                    engine.rete_forward_chain(dataset.clone())
                },
            )?;
            results.push(result);
        }

        Ok(results)
    }

    /// Placeholder benchmarks for other categories
    fn bench_incremental_reasoning(&mut self) -> Result<Vec<BenchmarkResult>> {
        Ok(vec![])
    }

    fn bench_parallel_execution(&mut self) -> Result<Vec<BenchmarkResult>> {
        Ok(vec![])
    }

    fn bench_sparql_integration(&mut self) -> Result<Vec<BenchmarkResult>> {
        Ok(vec![])
    }

    fn bench_shacl_validation(&mut self) -> Result<Vec<BenchmarkResult>> {
        Ok(vec![])
    }

    fn bench_rule_optimization(&mut self) -> Result<Vec<BenchmarkResult>> {
        Ok(vec![])
    }

    fn bench_memory_usage(&mut self) -> Result<Vec<BenchmarkResult>> {
        Ok(vec![])
    }

    fn bench_scalability(&mut self) -> Result<Vec<BenchmarkResult>> {
        Ok(vec![])
    }

    /// Estimate memory usage of the engine
    ///
    /// This provides an approximation of memory consumption based on:
    /// - Number of facts and rules
    /// - Average sizes of terms and atoms
    /// - Internal data structure overhead
    fn estimate_memory_usage(&self) -> usize {
        let facts = self.engine.get_facts();

        // Estimate bytes per fact (conservative)
        // - RuleAtom enum discriminant: 1 byte
        // - Triple with 3 Terms: ~3 * 50 bytes (strings, etc.) = 150 bytes
        // - HashSet overhead: ~8 bytes per entry
        let bytes_per_fact = 160;

        // Estimate bytes per rule
        // - Rule struct: name (String ~24 bytes) + body Vec + head Vec
        // - Average 3 atoms in body + 2 in head = 5 * 160 bytes
        // - Vec overhead: ~24 bytes each
        let bytes_per_rule = 24 + (5 * 160) + (2 * 24);

        let fact_count = facts.len();
        let rule_count = self.engine.get_facts().len(); // Approximation

        // Calculate total memory
        let facts_memory = fact_count * bytes_per_fact;
        let rules_memory = rule_count * bytes_per_rule;

        // Add engine overhead (RETE network, caches, etc.)
        let engine_overhead = 4096; // ~4KB base overhead

        facts_memory + rules_memory + engine_overhead
    }

    /// Run a single benchmark with timing using engine
    fn run_benchmark_with_engine<F, T>(
        &mut self,
        name: &str,
        category: BenchmarkCategory,
        mut f: F,
    ) -> Result<BenchmarkResult>
    where
        F: FnMut(&mut RuleEngine) -> Result<T>,
    {
        debug!("Running benchmark: {}", name);

        // Warmup
        for _ in 0..self.config.warmup {
            let _ = f(&mut self.engine);
        }

        // Actual benchmark
        let mut durations = Vec::new();
        let benchmark_start = Instant::now();

        for _ in 0..self.config.iterations {
            let start = Instant::now();
            let _ = f(&mut self.engine)?;
            let duration = start.elapsed();
            durations.push(duration);

            if benchmark_start.elapsed().as_millis() > self.config.max_duration_ms as u128 {
                debug!("Benchmark {} hit time limit", name);
                break;
            }
        }

        // Calculate statistics
        let total_time: Duration = durations.iter().sum();
        let avg_time = total_time / durations.len() as u32;
        let min_time = *durations
            .iter()
            .min()
            .expect("durations should not be empty");
        let max_time = *durations
            .iter()
            .max()
            .expect("durations should not be empty");

        // Calculate standard deviation
        let mean = avg_time.as_nanos() as f64;
        let variance: f64 = durations
            .iter()
            .map(|d| {
                let diff = d.as_nanos() as f64 - mean;
                diff * diff
            })
            .sum::<f64>()
            / durations.len() as f64;
        let std_dev = Duration::from_nanos(variance.sqrt() as u64);

        // Calculate throughput
        let throughput = if avg_time.as_secs_f64() > 0.0 {
            1.0 / avg_time.as_secs_f64()
        } else {
            f64::INFINITY
        };

        // Estimate memory usage after benchmark execution
        let memory_bytes = self.estimate_memory_usage();

        Ok(BenchmarkResult {
            name: name.to_string(),
            category,
            total_time,
            avg_time,
            min_time,
            max_time,
            std_dev,
            throughput,
            samples: durations.len(),
            memory_bytes,
        })
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_benchmark_config_default() {
        let config = BenchmarkConfig::default();
        assert_eq!(config.iterations, 100);
        assert_eq!(config.warmup, 10);
        assert!(!config.detailed_profiling);
    }

    #[test]
    fn test_benchmark_config_builder() {
        let config = BenchmarkConfig::default()
            .with_iterations(50)
            .with_warmup(5)
            .with_detailed_profiling(true);

        assert_eq!(config.iterations, 50);
        assert_eq!(config.warmup, 5);
        assert!(config.detailed_profiling);
    }

    #[test]
    fn test_benchmark_categories() {
        let categories = BenchmarkCategory::all();
        assert_eq!(categories.len(), 10);
        assert!(categories.contains(&BenchmarkCategory::ForwardChaining));
    }

    #[test]
    fn test_benchmark_category_names() {
        assert_eq!(
            BenchmarkCategory::ForwardChaining.name(),
            "Forward Chaining"
        );
        assert_eq!(
            BenchmarkCategory::BackwardChaining.name(),
            "Backward Chaining"
        );
    }

    #[test]
    fn test_benchmark_suite_creation() {
        let config = BenchmarkConfig::default().with_iterations(10);
        let suite = BenchmarkSuite::new(config);

        assert!(suite.datasets.contains_key("small"));
        assert!(suite.datasets.contains_key("medium"));
        assert!(suite.datasets.contains_key("large"));
    }

    #[test]
    fn test_benchmark_forward_chaining() -> Result<(), Box<dyn std::error::Error>> {
        let config = BenchmarkConfig::default().with_iterations(5).with_warmup(1);
        let mut suite = BenchmarkSuite::new(config);

        let results = suite.run_category(BenchmarkCategory::ForwardChaining)?;
        assert!(!results.results.is_empty());

        for result in &results.results {
            assert_eq!(result.category, BenchmarkCategory::ForwardChaining);
            assert!(result.throughput > 0.0);
            assert!(result.samples > 0);
        }
        Ok(())
    }

    #[test]
    fn test_benchmark_backward_chaining() -> Result<(), Box<dyn std::error::Error>> {
        let config = BenchmarkConfig::default().with_iterations(1).with_warmup(0);
        let mut suite = BenchmarkSuite::new(config);

        let results = suite.run_category(BenchmarkCategory::BackwardChaining)?;
        assert!(!results.results.is_empty());
        // Should only have one result (small dataset)
        assert_eq!(results.results.len(), 1);
        Ok(())
    }

    #[test]
    fn test_benchmark_rete_matching() -> Result<(), Box<dyn std::error::Error>> {
        let config = BenchmarkConfig::default().with_iterations(5).with_warmup(1);
        let mut suite = BenchmarkSuite::new(config);

        let results = suite.run_category(BenchmarkCategory::ReteMatching)?;
        assert!(!results.results.is_empty());
        Ok(())
    }

    #[test]
    fn test_benchmark_results_report() -> Result<(), Box<dyn std::error::Error>> {
        let config = BenchmarkConfig::default().with_iterations(5);
        let mut suite = BenchmarkSuite::new(config);

        let results = suite.run_category(BenchmarkCategory::ForwardChaining)?;
        let report = results.generate_report();

        assert!(report.contains("Benchmark Report"));
        assert!(report.contains("Forward Chaining"));
        Ok(())
    }

    #[test]
    fn test_benchmark_result_summary() {
        let result = BenchmarkResult {
            name: "test".to_string(),
            category: BenchmarkCategory::ForwardChaining,
            total_time: Duration::from_millis(100),
            avg_time: Duration::from_micros(1000),
            min_time: Duration::from_micros(800),
            max_time: Duration::from_micros(1200),
            std_dev: Duration::from_micros(100),
            throughput: 1000.0,
            samples: 100,
            memory_bytes: 1024,
        };

        let summary = result.summary();
        assert!(summary.contains("test"));
        assert!(summary.contains("1000 ops/sec"));
    }

    #[test]
    fn test_benchmark_fastest_slowest() -> Result<(), Box<dyn std::error::Error>> {
        let results = BenchmarkResults {
            results: vec![
                BenchmarkResult {
                    name: "fast".to_string(),
                    category: BenchmarkCategory::ForwardChaining,
                    total_time: Duration::from_millis(10),
                    avg_time: Duration::from_micros(100),
                    min_time: Duration::from_micros(90),
                    max_time: Duration::from_micros(110),
                    std_dev: Duration::from_micros(5),
                    throughput: 10000.0,
                    samples: 100,
                    memory_bytes: 1024,
                },
                BenchmarkResult {
                    name: "slow".to_string(),
                    category: BenchmarkCategory::ForwardChaining,
                    total_time: Duration::from_millis(100),
                    avg_time: Duration::from_millis(1),
                    min_time: Duration::from_micros(900),
                    max_time: Duration::from_micros(1100),
                    std_dev: Duration::from_micros(50),
                    throughput: 1000.0,
                    samples: 100,
                    memory_bytes: 2048,
                },
            ],
            timestamp: std::time::SystemTime::now(),
        };

        let fastest = results
            .fastest_in_category(BenchmarkCategory::ForwardChaining)
            .ok_or("expected Some value")?;
        assert_eq!(fastest.name, "fast");

        let slowest = results
            .slowest_in_category(BenchmarkCategory::ForwardChaining)
            .ok_or("expected Some value")?;
        assert_eq!(slowest.name, "slow");
        Ok(())
    }

    #[test]
    fn test_run_all_benchmarks() -> Result<(), Box<dyn std::error::Error>> {
        let config = BenchmarkConfig::default().with_iterations(2).with_warmup(1);
        let mut suite = BenchmarkSuite::new(config);

        let results = suite.run_all()?;
        assert!(!results.results.is_empty());

        let (total_time, avg_throughput) = results.overall_stats();
        assert!(total_time.as_millis() > 0);
        assert!(avg_throughput > 0.0);
        Ok(())
    }

    #[test]
    fn test_memory_tracking() -> Result<(), Box<dyn std::error::Error>> {
        let config = BenchmarkConfig::default().with_iterations(1).with_warmup(0);
        let mut suite = BenchmarkSuite::new(config);

        let results = suite.run_category(BenchmarkCategory::ForwardChaining)?;

        // Verify memory tracking is working
        for result in &results.results {
            assert!(
                result.memory_bytes > 0,
                "Memory tracking should report non-zero bytes"
            );
            // Memory should be reasonable (> 1KB, < 100MB for our test datasets)
            assert!(
                result.memory_bytes >= 1024,
                "Memory usage should be at least 1KB"
            );
            assert!(
                result.memory_bytes <= 100 * 1024 * 1024,
                "Memory usage should be < 100MB for tests"
            );
        }
        Ok(())
    }
}