use crate::{Rule, RuleAtom, RuleEngine, Term};
use anyhow::Result;
use scirs2_core::metrics::{Counter, Timer};
use std::collections::HashMap;
use std::time::{Duration, Instant};
use tracing::{debug, info};
lazy_static::lazy_static! {
static ref BENCHMARK_RUNS: Counter = Counter::new("benchmark_runs".to_string());
static ref BENCHMARK_FAILURES: Counter = Counter::new("benchmark_failures".to_string());
static ref BENCHMARK_TIME: Timer = Timer::new("benchmark_total_time".to_string());
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum BenchmarkCategory {
ForwardChaining,
BackwardChaining,
ReteMatching,
IncrementalReasoning,
ParallelExecution,
SparqlIntegration,
ShaclValidation,
RuleOptimization,
MemoryUsage,
Scalability,
}
impl BenchmarkCategory {
pub fn all() -> Vec<Self> {
vec![
Self::ForwardChaining,
Self::BackwardChaining,
Self::ReteMatching,
Self::IncrementalReasoning,
Self::ParallelExecution,
Self::SparqlIntegration,
Self::ShaclValidation,
Self::RuleOptimization,
Self::MemoryUsage,
Self::Scalability,
]
}
pub fn name(&self) -> &str {
match self {
Self::ForwardChaining => "Forward Chaining",
Self::BackwardChaining => "Backward Chaining",
Self::ReteMatching => "RETE Matching",
Self::IncrementalReasoning => "Incremental Reasoning",
Self::ParallelExecution => "Parallel Execution",
Self::SparqlIntegration => "SPARQL Integration",
Self::ShaclValidation => "SHACL Validation",
Self::RuleOptimization => "Rule Optimization",
Self::MemoryUsage => "Memory Usage",
Self::Scalability => "Scalability",
}
}
}
#[derive(Debug, Clone)]
pub struct BenchmarkConfig {
pub iterations: usize,
pub warmup: usize,
pub detailed_profiling: bool,
pub max_duration_ms: u64,
pub min_samples: usize,
}
impl Default for BenchmarkConfig {
fn default() -> Self {
Self {
iterations: 100,
warmup: 10,
detailed_profiling: false,
max_duration_ms: 60_000, min_samples: 10,
}
}
}
impl BenchmarkConfig {
pub fn with_iterations(mut self, iterations: usize) -> Self {
self.iterations = iterations;
self
}
pub fn with_warmup(mut self, warmup: usize) -> Self {
self.warmup = warmup;
self
}
pub fn with_detailed_profiling(mut self, enabled: bool) -> Self {
self.detailed_profiling = enabled;
self
}
}
#[derive(Debug, Clone)]
pub struct BenchmarkResult {
pub name: String,
pub category: BenchmarkCategory,
pub total_time: Duration,
pub avg_time: Duration,
pub min_time: Duration,
pub max_time: Duration,
pub std_dev: Duration,
pub throughput: f64,
pub samples: usize,
pub memory_bytes: usize,
}
impl BenchmarkResult {
pub fn summary(&self) -> String {
format!(
"{}: avg={:.2}ms, throughput={:.0} ops/sec, mem={}KB",
self.name,
self.avg_time.as_secs_f64() * 1000.0,
self.throughput,
self.memory_bytes / 1024
)
}
}
#[derive(Debug, Clone)]
pub struct BenchmarkResults {
pub results: Vec<BenchmarkResult>,
pub timestamp: std::time::SystemTime,
}
impl BenchmarkResults {
pub fn generate_report(&self) -> String {
let mut report = String::new();
report.push_str("═══════════════════════════════════════════════════════════════\n");
report.push_str(" OxiRS Rule Engine Benchmark Report\n");
report.push_str("═══════════════════════════════════════════════════════════════\n\n");
let mut by_category: HashMap<BenchmarkCategory, Vec<&BenchmarkResult>> = HashMap::new();
for result in &self.results {
by_category.entry(result.category).or_default().push(result);
}
for category in BenchmarkCategory::all() {
if let Some(results) = by_category.get(&category) {
report.push_str(&format!("\n{}\n", category.name()));
report.push_str(&format!("{}\n", "─".repeat(60)));
for result in results {
report.push_str(&format!(
" {:<30} {:>10.2}ms {:>12.0} ops/s\n",
result.name,
result.avg_time.as_secs_f64() * 1000.0,
result.throughput
));
}
}
}
report.push_str("\n═══════════════════════════════════════════════════════════════\n");
report
}
pub fn fastest_in_category(&self, category: BenchmarkCategory) -> Option<&BenchmarkResult> {
self.results
.iter()
.filter(|r| r.category == category)
.min_by_key(|r| r.avg_time)
}
pub fn slowest_in_category(&self, category: BenchmarkCategory) -> Option<&BenchmarkResult> {
self.results
.iter()
.filter(|r| r.category == category)
.max_by_key(|r| r.avg_time)
}
pub fn overall_stats(&self) -> (Duration, f64) {
let total_time: Duration = self.results.iter().map(|r| r.total_time).sum();
let avg_throughput: f64 =
self.results.iter().map(|r| r.throughput).sum::<f64>() / self.results.len() as f64;
(total_time, avg_throughput)
}
}
pub struct BenchmarkSuite {
config: BenchmarkConfig,
engine: RuleEngine,
datasets: HashMap<String, Vec<RuleAtom>>,
}
impl BenchmarkSuite {
pub fn new(config: BenchmarkConfig) -> Self {
let mut suite = Self {
config,
engine: RuleEngine::new(),
datasets: HashMap::new(),
};
suite.load_datasets();
suite
}
fn load_datasets(&mut self) {
self.datasets.insert(
"small".to_string(),
(0..10)
.map(|i| RuleAtom::Triple {
subject: Term::Constant(format!("s{i}")),
predicate: Term::Constant("p".to_string()),
object: Term::Constant(format!("o{i}")),
})
.collect(),
);
self.datasets.insert(
"medium".to_string(),
(0..100)
.map(|i| RuleAtom::Triple {
subject: Term::Constant(format!("s{i}")),
predicate: Term::Constant("p".to_string()),
object: Term::Constant(format!("o{i}")),
})
.collect(),
);
self.datasets.insert(
"large".to_string(),
(0..1000)
.map(|i| RuleAtom::Triple {
subject: Term::Constant(format!("s{i}")),
predicate: Term::Constant("p".to_string()),
object: Term::Constant(format!("o{i}")),
})
.collect(),
);
}
pub fn run_all(&mut self) -> Result<BenchmarkResults> {
info!("Running all benchmarks");
let _timer = BENCHMARK_TIME.start();
let mut all_results = Vec::new();
for category in BenchmarkCategory::all() {
match self.run_category(category) {
Ok(mut results) => all_results.append(&mut results.results),
Err(e) => {
BENCHMARK_FAILURES.inc();
debug!("Category {:?} failed: {}", category, e);
}
}
}
BENCHMARK_RUNS.inc();
Ok(BenchmarkResults {
results: all_results,
timestamp: std::time::SystemTime::now(),
})
}
pub fn run_category(&mut self, category: BenchmarkCategory) -> Result<BenchmarkResults> {
info!("Running benchmarks for {:?}", category);
let results = match category {
BenchmarkCategory::ForwardChaining => self.bench_forward_chaining(),
BenchmarkCategory::BackwardChaining => self.bench_backward_chaining(),
BenchmarkCategory::ReteMatching => self.bench_rete_matching(),
BenchmarkCategory::IncrementalReasoning => self.bench_incremental_reasoning(),
BenchmarkCategory::ParallelExecution => self.bench_parallel_execution(),
BenchmarkCategory::SparqlIntegration => self.bench_sparql_integration(),
BenchmarkCategory::ShaclValidation => self.bench_shacl_validation(),
BenchmarkCategory::RuleOptimization => self.bench_rule_optimization(),
BenchmarkCategory::MemoryUsage => self.bench_memory_usage(),
BenchmarkCategory::Scalability => self.bench_scalability(),
}?;
Ok(BenchmarkResults {
results,
timestamp: std::time::SystemTime::now(),
})
}
fn bench_forward_chaining(&mut self) -> Result<Vec<BenchmarkResult>> {
let mut results = Vec::new();
let rule = Rule {
name: "simple".to_string(),
body: vec![RuleAtom::Triple {
subject: Term::Variable("X".to_string()),
predicate: Term::Constant("p".to_string()),
object: Term::Variable("Y".to_string()),
}],
head: vec![RuleAtom::Triple {
subject: Term::Variable("X".to_string()),
predicate: Term::Constant("q".to_string()),
object: Term::Variable("Y".to_string()),
}],
};
self.engine.add_rule(rule);
let datasets: Vec<(String, Vec<RuleAtom>)> = self
.datasets
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
for (name, dataset) in datasets {
let result = self.run_benchmark_with_engine(
&format!("forward_chain_{}", name),
BenchmarkCategory::ForwardChaining,
|engine| {
engine.clear();
engine.forward_chain(&dataset)
},
)?;
results.push(result);
}
Ok(results)
}
fn bench_backward_chaining(&mut self) -> Result<Vec<BenchmarkResult>> {
let mut results = Vec::new();
self.engine.set_backward_chain_max_depth(5);
let rule = Rule {
name: "test".to_string(),
body: vec![RuleAtom::Triple {
subject: Term::Variable("X".to_string()),
predicate: Term::Constant("p".to_string()),
object: Term::Variable("Y".to_string()),
}],
head: vec![RuleAtom::Triple {
subject: Term::Variable("X".to_string()),
predicate: Term::Constant("q".to_string()),
object: Term::Variable("Y".to_string()),
}],
};
self.engine.add_rule(rule);
let minimal_dataset = vec![
RuleAtom::Triple {
subject: Term::Constant("s0".to_string()),
predicate: Term::Constant("p".to_string()),
object: Term::Constant("o0".to_string()),
},
RuleAtom::Triple {
subject: Term::Constant("s1".to_string()),
predicate: Term::Constant("p".to_string()),
object: Term::Constant("o1".to_string()),
},
RuleAtom::Triple {
subject: Term::Constant("s2".to_string()),
predicate: Term::Constant("p".to_string()),
object: Term::Constant("o2".to_string()),
},
];
let goal = RuleAtom::Triple {
subject: Term::Constant("s0".to_string()),
predicate: Term::Constant("q".to_string()),
object: Term::Constant("o0".to_string()), };
let goal_clone = goal.clone();
let dataset_clone = minimal_dataset.clone();
let result = self.run_benchmark_with_engine(
"backward_chain_minimal",
BenchmarkCategory::BackwardChaining,
|engine| {
engine.clear();
engine.set_backward_chain_max_depth(5);
engine.add_facts(dataset_clone.clone());
engine.backward_chain(&goal_clone)
},
)?;
results.push(result);
Ok(results)
}
fn bench_rete_matching(&mut self) -> Result<Vec<BenchmarkResult>> {
let mut results = Vec::new();
let datasets: Vec<(String, Vec<RuleAtom>)> = self
.datasets
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
for (name, dataset) in datasets {
let result = self.run_benchmark_with_engine(
&format!("rete_{}", name),
BenchmarkCategory::ReteMatching,
|engine| {
engine.clear();
engine.rete_forward_chain(dataset.clone())
},
)?;
results.push(result);
}
Ok(results)
}
fn bench_incremental_reasoning(&mut self) -> Result<Vec<BenchmarkResult>> {
Ok(vec![])
}
fn bench_parallel_execution(&mut self) -> Result<Vec<BenchmarkResult>> {
Ok(vec![])
}
fn bench_sparql_integration(&mut self) -> Result<Vec<BenchmarkResult>> {
Ok(vec![])
}
fn bench_shacl_validation(&mut self) -> Result<Vec<BenchmarkResult>> {
Ok(vec![])
}
fn bench_rule_optimization(&mut self) -> Result<Vec<BenchmarkResult>> {
Ok(vec![])
}
fn bench_memory_usage(&mut self) -> Result<Vec<BenchmarkResult>> {
Ok(vec![])
}
fn bench_scalability(&mut self) -> Result<Vec<BenchmarkResult>> {
Ok(vec![])
}
fn estimate_memory_usage(&self) -> usize {
let facts = self.engine.get_facts();
let bytes_per_fact = 160;
let bytes_per_rule = 24 + (5 * 160) + (2 * 24);
let fact_count = facts.len();
let rule_count = self.engine.get_facts().len();
let facts_memory = fact_count * bytes_per_fact;
let rules_memory = rule_count * bytes_per_rule;
let engine_overhead = 4096;
facts_memory + rules_memory + engine_overhead
}
fn run_benchmark_with_engine<F, T>(
&mut self,
name: &str,
category: BenchmarkCategory,
mut f: F,
) -> Result<BenchmarkResult>
where
F: FnMut(&mut RuleEngine) -> Result<T>,
{
debug!("Running benchmark: {}", name);
for _ in 0..self.config.warmup {
let _ = f(&mut self.engine);
}
let mut durations = Vec::new();
let benchmark_start = Instant::now();
for _ in 0..self.config.iterations {
let start = Instant::now();
let _ = f(&mut self.engine)?;
let duration = start.elapsed();
durations.push(duration);
if benchmark_start.elapsed().as_millis() > self.config.max_duration_ms as u128 {
debug!("Benchmark {} hit time limit", name);
break;
}
}
let total_time: Duration = durations.iter().sum();
let avg_time = total_time / durations.len() as u32;
let min_time = *durations
.iter()
.min()
.expect("durations should not be empty");
let max_time = *durations
.iter()
.max()
.expect("durations should not be empty");
let mean = avg_time.as_nanos() as f64;
let variance: f64 = durations
.iter()
.map(|d| {
let diff = d.as_nanos() as f64 - mean;
diff * diff
})
.sum::<f64>()
/ durations.len() as f64;
let std_dev = Duration::from_nanos(variance.sqrt() as u64);
let throughput = if avg_time.as_secs_f64() > 0.0 {
1.0 / avg_time.as_secs_f64()
} else {
f64::INFINITY
};
let memory_bytes = self.estimate_memory_usage();
Ok(BenchmarkResult {
name: name.to_string(),
category,
total_time,
avg_time,
min_time,
max_time,
std_dev,
throughput,
samples: durations.len(),
memory_bytes,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_benchmark_config_default() {
let config = BenchmarkConfig::default();
assert_eq!(config.iterations, 100);
assert_eq!(config.warmup, 10);
assert!(!config.detailed_profiling);
}
#[test]
fn test_benchmark_config_builder() {
let config = BenchmarkConfig::default()
.with_iterations(50)
.with_warmup(5)
.with_detailed_profiling(true);
assert_eq!(config.iterations, 50);
assert_eq!(config.warmup, 5);
assert!(config.detailed_profiling);
}
#[test]
fn test_benchmark_categories() {
let categories = BenchmarkCategory::all();
assert_eq!(categories.len(), 10);
assert!(categories.contains(&BenchmarkCategory::ForwardChaining));
}
#[test]
fn test_benchmark_category_names() {
assert_eq!(
BenchmarkCategory::ForwardChaining.name(),
"Forward Chaining"
);
assert_eq!(
BenchmarkCategory::BackwardChaining.name(),
"Backward Chaining"
);
}
#[test]
fn test_benchmark_suite_creation() {
let config = BenchmarkConfig::default().with_iterations(10);
let suite = BenchmarkSuite::new(config);
assert!(suite.datasets.contains_key("small"));
assert!(suite.datasets.contains_key("medium"));
assert!(suite.datasets.contains_key("large"));
}
#[test]
fn test_benchmark_forward_chaining() -> Result<(), Box<dyn std::error::Error>> {
let config = BenchmarkConfig::default().with_iterations(5).with_warmup(1);
let mut suite = BenchmarkSuite::new(config);
let results = suite.run_category(BenchmarkCategory::ForwardChaining)?;
assert!(!results.results.is_empty());
for result in &results.results {
assert_eq!(result.category, BenchmarkCategory::ForwardChaining);
assert!(result.throughput > 0.0);
assert!(result.samples > 0);
}
Ok(())
}
#[test]
fn test_benchmark_backward_chaining() -> Result<(), Box<dyn std::error::Error>> {
let config = BenchmarkConfig::default().with_iterations(1).with_warmup(0);
let mut suite = BenchmarkSuite::new(config);
let results = suite.run_category(BenchmarkCategory::BackwardChaining)?;
assert!(!results.results.is_empty());
assert_eq!(results.results.len(), 1);
Ok(())
}
#[test]
fn test_benchmark_rete_matching() -> Result<(), Box<dyn std::error::Error>> {
let config = BenchmarkConfig::default().with_iterations(5).with_warmup(1);
let mut suite = BenchmarkSuite::new(config);
let results = suite.run_category(BenchmarkCategory::ReteMatching)?;
assert!(!results.results.is_empty());
Ok(())
}
#[test]
fn test_benchmark_results_report() -> Result<(), Box<dyn std::error::Error>> {
let config = BenchmarkConfig::default().with_iterations(5);
let mut suite = BenchmarkSuite::new(config);
let results = suite.run_category(BenchmarkCategory::ForwardChaining)?;
let report = results.generate_report();
assert!(report.contains("Benchmark Report"));
assert!(report.contains("Forward Chaining"));
Ok(())
}
#[test]
fn test_benchmark_result_summary() {
let result = BenchmarkResult {
name: "test".to_string(),
category: BenchmarkCategory::ForwardChaining,
total_time: Duration::from_millis(100),
avg_time: Duration::from_micros(1000),
min_time: Duration::from_micros(800),
max_time: Duration::from_micros(1200),
std_dev: Duration::from_micros(100),
throughput: 1000.0,
samples: 100,
memory_bytes: 1024,
};
let summary = result.summary();
assert!(summary.contains("test"));
assert!(summary.contains("1000 ops/sec"));
}
#[test]
fn test_benchmark_fastest_slowest() -> Result<(), Box<dyn std::error::Error>> {
let results = BenchmarkResults {
results: vec![
BenchmarkResult {
name: "fast".to_string(),
category: BenchmarkCategory::ForwardChaining,
total_time: Duration::from_millis(10),
avg_time: Duration::from_micros(100),
min_time: Duration::from_micros(90),
max_time: Duration::from_micros(110),
std_dev: Duration::from_micros(5),
throughput: 10000.0,
samples: 100,
memory_bytes: 1024,
},
BenchmarkResult {
name: "slow".to_string(),
category: BenchmarkCategory::ForwardChaining,
total_time: Duration::from_millis(100),
avg_time: Duration::from_millis(1),
min_time: Duration::from_micros(900),
max_time: Duration::from_micros(1100),
std_dev: Duration::from_micros(50),
throughput: 1000.0,
samples: 100,
memory_bytes: 2048,
},
],
timestamp: std::time::SystemTime::now(),
};
let fastest = results
.fastest_in_category(BenchmarkCategory::ForwardChaining)
.ok_or("expected Some value")?;
assert_eq!(fastest.name, "fast");
let slowest = results
.slowest_in_category(BenchmarkCategory::ForwardChaining)
.ok_or("expected Some value")?;
assert_eq!(slowest.name, "slow");
Ok(())
}
#[test]
fn test_run_all_benchmarks() -> Result<(), Box<dyn std::error::Error>> {
let config = BenchmarkConfig::default().with_iterations(2).with_warmup(1);
let mut suite = BenchmarkSuite::new(config);
let results = suite.run_all()?;
assert!(!results.results.is_empty());
let (total_time, avg_throughput) = results.overall_stats();
assert!(total_time.as_millis() > 0);
assert!(avg_throughput > 0.0);
Ok(())
}
#[test]
fn test_memory_tracking() -> Result<(), Box<dyn std::error::Error>> {
let config = BenchmarkConfig::default().with_iterations(1).with_warmup(0);
let mut suite = BenchmarkSuite::new(config);
let results = suite.run_category(BenchmarkCategory::ForwardChaining)?;
for result in &results.results {
assert!(
result.memory_bytes > 0,
"Memory tracking should report non-zero bytes"
);
assert!(
result.memory_bytes >= 1024,
"Memory usage should be at least 1KB"
);
assert!(
result.memory_bytes <= 100 * 1024 * 1024,
"Memory usage should be < 100MB for tests"
);
}
Ok(())
}
}