use super::{ToolResult, ToolStats};
use std::path::PathBuf;
use std::time::{Duration, Instant};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BenchmarkSuite {
SP2Bench,
WatDiv,
LDBC,
Custom,
}
impl BenchmarkSuite {
pub fn name(&self) -> &str {
match self {
BenchmarkSuite::SP2Bench => "SP2Bench",
BenchmarkSuite::WatDiv => "WatDiv",
BenchmarkSuite::LDBC => "LDBC",
BenchmarkSuite::Custom => "Custom",
}
}
pub fn description(&self) -> &str {
match self {
BenchmarkSuite::SP2Bench => "SPARQL Performance Benchmark (DBLP-like data)",
BenchmarkSuite::WatDiv => "Waterloo SPARQL Diversity Test Suite",
BenchmarkSuite::LDBC => "Linked Data Benchmark Council",
BenchmarkSuite::Custom => "Custom benchmark queries",
}
}
}
pub struct BenchmarkConfig {
pub suite: BenchmarkSuite,
pub data_path: Option<PathBuf>,
pub queries_path: Option<PathBuf>,
pub warmup_runs: usize,
pub benchmark_runs: usize,
pub timeout: Duration,
pub output_format: String,
}
#[derive(Debug, Clone)]
pub struct BenchmarkQuery {
pub id: String,
pub name: String,
pub query: String,
pub category: String,
}
#[derive(Debug, Clone)]
pub struct QueryBenchmarkResult {
pub query_id: String,
pub query_name: String,
pub runs: Vec<Duration>,
pub mean: Duration,
pub median: Duration,
pub min: Duration,
pub max: Duration,
pub std_dev: Duration,
pub timeout: bool,
pub error: Option<String>,
}
#[derive(Debug)]
pub struct BenchmarkResults {
pub suite: BenchmarkSuite,
pub total_queries: usize,
pub successful_queries: usize,
pub failed_queries: usize,
pub total_duration: Duration,
pub query_results: Vec<QueryBenchmarkResult>,
}
pub async fn run(config: BenchmarkConfig) -> ToolResult {
let mut stats = ToolStats::new();
println!("OxiRS SPARQL Benchmark Suite");
println!("============================\n");
println!("Suite: {}", config.suite.name());
println!("Description: {}", config.suite.description());
println!("Warmup runs: {}", config.warmup_runs);
println!("Benchmark runs: {}", config.benchmark_runs);
println!("Timeout: {:?}\n", config.timeout);
let queries = load_benchmark_queries(&config)?;
println!("Loaded {} benchmark queries\n", queries.len());
let start = Instant::now();
let results = execute_benchmarks(&queries, &config).await?;
let total_duration = start.elapsed();
display_results(&results, &config.output_format)?;
println!("\n=== Summary ===");
println!("Total queries: {}", results.total_queries);
println!("Successful: {}", results.successful_queries);
println!("Failed: {}", results.failed_queries);
println!("Total time: {:.2}s", total_duration.as_secs_f64());
stats.items_processed = results.total_queries;
stats.finish();
stats.print_summary("Benchmark");
Ok(())
}
fn load_benchmark_queries(config: &BenchmarkConfig) -> ToolResult<Vec<BenchmarkQuery>> {
match config.suite {
BenchmarkSuite::SP2Bench => load_sp2bench_queries(),
BenchmarkSuite::WatDiv => load_watdiv_queries(),
BenchmarkSuite::LDBC => load_ldbc_queries(),
BenchmarkSuite::Custom => {
if let Some(ref path) = config.queries_path {
load_custom_queries(path)
} else {
Err("Custom benchmark requires --queries-path".into())
}
}
}
}
fn load_sp2bench_queries() -> ToolResult<Vec<BenchmarkQuery>> {
let queries = vec![
BenchmarkQuery {
id: "SP2B-Q1".to_string(),
name: "Simple Triple Pattern".to_string(),
query: "SELECT ?yr WHERE { ?journal rdf:type bench:Journal . ?journal dc:title \"Journal 1 (1940)\"^^xsd:string . ?journal dcterms:issued ?yr }".to_string(),
category: "Simple".to_string(),
},
BenchmarkQuery {
id: "SP2B-Q2".to_string(),
name: "Triple Patterns with FILTER".to_string(),
query: "SELECT ?inproc ?author ?booktitle ?title ?proc ?ee ?page ?url ?yr ?abstract WHERE { ?inproc rdf:type bench:Inproceedings . ?inproc dc:creator ?author . ?inproc bench:booktitle ?booktitle . ?inproc dc:title ?title . ?inproc dcterms:partOf ?proc . ?inproc rdfs:seeAlso ?ee . ?inproc swrc:pages ?page . ?inproc foaf:homepage ?url . ?inproc dcterms:issued ?yr FILTER(?yr > 1970) }".to_string(),
category: "Filter".to_string(),
},
BenchmarkQuery {
id: "SP2B-Q3a".to_string(),
name: "Join of Multiple Patterns".to_string(),
query:
"SELECT ?article WHERE { ?article rdf:type bench:Article . ?article ?property ?value }"
.to_string(),
category: "Join".to_string(),
},
BenchmarkQuery {
id: "SP2B-Q4".to_string(),
name: "OPTIONAL Patterns".to_string(),
query: "SELECT ?name ?nameTitle WHERE { ?article rdf:type bench:Article . ?article dc:creator ?author . ?author foaf:name ?name . OPTIONAL { ?author dc:title ?nameTitle } }".to_string(),
category: "Optional".to_string(),
},
BenchmarkQuery {
id: "SP2B-Q5a".to_string(),
name: "UNION Query".to_string(),
query: "SELECT DISTINCT ?person ?name WHERE { ?article rdf:type bench:Article . ?article dc:creator ?person . ?inproc rdf:type bench:Inproceedings . ?inproc dc:creator ?person2 . ?person foaf:name ?name . ?person2 foaf:name ?name2 FILTER(?name = ?name2) }".to_string(),
category: "Union".to_string(),
},
];
Ok(queries)
}
fn load_watdiv_queries() -> ToolResult<Vec<BenchmarkQuery>> {
let queries = vec![
BenchmarkQuery {
id: "WatDiv-L1".to_string(),
name: "Linear Query 1".to_string(),
query: "SELECT ?v0 ?v1 ?v2 ?v3 WHERE { ?v0 wsdbm:follows ?v1 . ?v1 wsdbm:follows ?v2 . ?v2 wsdbm:likes ?v3 }".to_string(),
category: "Linear".to_string(),
},
BenchmarkQuery {
id: "WatDiv-S1".to_string(),
name: "Star Query 1".to_string(),
query: "SELECT ?v0 ?v1 ?v2 ?v3 ?v4 WHERE { ?v0 wsdbm:follows ?v1 . ?v0 wsdbm:likes ?v2 . ?v0 wsdbm:friendOf ?v3 . ?v0 dc:Location ?v4 }".to_string(),
category: "Star".to_string(),
},
BenchmarkQuery {
id: "WatDiv-F1".to_string(),
name: "Snowflake Query 1".to_string(),
query: "SELECT ?v0 ?v1 ?v2 ?v3 ?v4 ?v5 WHERE { ?v0 wsdbm:follows ?v1 . ?v1 wsdbm:likes ?v2 . ?v0 wsdbm:friendOf ?v3 . ?v3 wsdbm:likes ?v4 . ?v0 dc:Location ?v5 }".to_string(),
category: "Snowflake".to_string(),
},
BenchmarkQuery {
id: "WatDiv-C1".to_string(),
name: "Complex Query 1".to_string(),
query: "SELECT ?v0 ?v1 ?v2 ?v3 WHERE { ?v0 wsdbm:follows ?v1 . ?v1 wsdbm:likes ?v2 . ?v1 wsdbm:friendOf ?v3 . ?v3 wsdbm:likes ?v2 }".to_string(),
category: "Complex".to_string(),
},
];
Ok(queries)
}
fn load_ldbc_queries() -> ToolResult<Vec<BenchmarkQuery>> {
let queries = vec![
BenchmarkQuery {
id: "LDBC-IC1".to_string(),
name: "Interactive Complex 1".to_string(),
query: "SELECT ?personId ?firstName ?lastName ?birthday ?locationIP ?browserUsed ?cityId WHERE { ?person rdf:type snvoc:Person . ?person snvoc:id ?personId . ?person snvoc:firstName ?firstName . ?person snvoc:lastName ?lastName . ?person snvoc:birthday ?birthday . ?person snvoc:locationIP ?locationIP . ?person snvoc:browserUsed ?browserUsed . ?person snvoc:isLocatedIn ?city . ?city snvoc:id ?cityId }".to_string(),
category: "Complex".to_string(),
},
BenchmarkQuery {
id: "LDBC-IS1".to_string(),
name: "Interactive Short 1".to_string(),
query: "SELECT ?firstName ?lastName ?birthday WHERE { ?person rdf:type snvoc:Person . ?person snvoc:id ?personId . ?person snvoc:firstName ?firstName . ?person snvoc:lastName ?lastName . ?person snvoc:birthday ?birthday }".to_string(),
category: "Short".to_string(),
},
];
Ok(queries)
}
fn load_custom_queries(path: &PathBuf) -> ToolResult<Vec<BenchmarkQuery>> {
let content = std::fs::read_to_string(path)?;
let mut queries = Vec::new();
let query_strs: Vec<&str> = content.split("---").collect();
for (i, query_str) in query_strs.iter().enumerate() {
let query_str = query_str.trim();
if !query_str.is_empty() {
queries.push(BenchmarkQuery {
id: format!("Custom-Q{}", i + 1),
name: format!("Custom Query {}", i + 1),
query: query_str.to_string(),
category: "Custom".to_string(),
});
}
}
Ok(queries)
}
async fn execute_benchmarks(
queries: &[BenchmarkQuery],
config: &BenchmarkConfig,
) -> ToolResult<BenchmarkResults> {
let mut query_results = Vec::new();
let mut successful = 0;
let mut failed = 0;
for (i, query) in queries.iter().enumerate() {
println!(
"Running benchmark {}/{}: {}",
i + 1,
queries.len(),
query.name
);
if config.warmup_runs > 0 {
print!(" Warmup ({} runs)...", config.warmup_runs);
for _ in 0..config.warmup_runs {
let _ = execute_query(&query.query, config.timeout).await;
}
println!(" done");
}
let mut run_times = Vec::new();
let mut error = None;
let mut timeout_occurred = false;
print!(" Benchmarking ({} runs): ", config.benchmark_runs);
for run in 0..config.benchmark_runs {
match execute_query(&query.query, config.timeout).await {
Ok(duration) => {
run_times.push(duration);
print!(".");
}
Err(e) => {
if e.to_string().contains("timeout") {
timeout_occurred = true;
print!("T");
} else {
error = Some(e.to_string());
print!("E");
}
}
}
std::io::Write::flush(&mut std::io::stdout()).unwrap_or(());
if run == 0 && (timeout_occurred || error.is_some()) {
println!(" (skipping remaining runs)");
break;
}
}
if !run_times.is_empty() {
println!(" done");
let result = calculate_query_statistics(&query.id, &query.name, &run_times);
println!(
" Mean: {:?}, Median: {:?}, Min: {:?}, Max: {:?}",
result.mean, result.median, result.min, result.max
);
successful += 1;
query_results.push(result);
} else {
println!();
println!(
" Failed: {:?}",
error.clone().or(Some("timeout".to_string()))
);
failed += 1;
query_results.push(QueryBenchmarkResult {
query_id: query.id.clone(),
query_name: query.name.clone(),
runs: vec![],
mean: Duration::ZERO,
median: Duration::ZERO,
min: Duration::ZERO,
max: Duration::ZERO,
std_dev: Duration::ZERO,
timeout: timeout_occurred,
error,
});
}
println!();
}
Ok(BenchmarkResults {
suite: config.suite,
total_queries: queries.len(),
successful_queries: successful,
failed_queries: failed,
total_duration: Duration::ZERO, query_results,
})
}
async fn execute_query(query: &str, timeout: Duration) -> ToolResult<Duration> {
let start = Instant::now();
let simulation_time = std::cmp::min(
query.len() as u64 / 100, 50, );
std::thread::sleep(Duration::from_millis(simulation_time));
let duration = start.elapsed();
if duration > timeout {
return Err("Query timeout".into());
}
Ok(duration)
}
fn calculate_query_statistics(
query_id: &str,
query_name: &str,
run_times: &[Duration],
) -> QueryBenchmarkResult {
let mut sorted_times = run_times.to_vec();
sorted_times.sort();
let mean = sorted_times.iter().sum::<Duration>() / sorted_times.len() as u32;
let median = if sorted_times.len() % 2 == 0 {
let mid = sorted_times.len() / 2;
(sorted_times[mid - 1] + sorted_times[mid]) / 2
} else {
sorted_times[sorted_times.len() / 2]
};
let min = *sorted_times
.first()
.expect("collection validated to be non-empty");
let max = *sorted_times
.last()
.expect("collection validated to be non-empty");
let variance: f64 = sorted_times
.iter()
.map(|&t| {
let diff = t.as_secs_f64() - mean.as_secs_f64();
diff * diff
})
.sum::<f64>()
/ sorted_times.len() as f64;
let std_dev = Duration::from_secs_f64(variance.sqrt());
QueryBenchmarkResult {
query_id: query_id.to_string(),
query_name: query_name.to_string(),
runs: run_times.to_vec(),
mean,
median,
min,
max,
std_dev,
timeout: false,
error: None,
}
}
fn display_results(results: &BenchmarkResults, format: &str) -> ToolResult<()> {
println!("\n=== Benchmark Results ===\n");
match format {
"table" => display_table_results(results),
"json" => display_json_results(results),
"csv" => display_csv_results(results),
_ => display_table_results(results),
}
}
fn display_table_results(results: &BenchmarkResults) -> ToolResult<()> {
println!(
"{:<20} {:>12} {:>12} {:>12} {:>12} {:>10}",
"Query", "Mean", "Median", "Min", "Max", "StdDev"
);
println!("{}", "-".repeat(80));
for result in &results.query_results {
if result.timeout {
println!("{:<20} {:>12}", result.query_id, "TIMEOUT");
} else if result.error.is_some() {
println!("{:<20} {:>12}", result.query_id, "ERROR");
} else {
println!(
"{:<20} {:>10.2?} {:>10.2?} {:>10.2?} {:>10.2?} {:>8.2?}",
result.query_id, result.mean, result.median, result.min, result.max, result.std_dev,
);
}
}
Ok(())
}
fn display_json_results(results: &BenchmarkResults) -> ToolResult<()> {
println!("{{");
println!(" \"suite\": \"{}\",", results.suite.name());
println!(" \"total_queries\": {},", results.total_queries);
println!(" \"successful\": {},", results.successful_queries);
println!(" \"failed\": {},", results.failed_queries);
println!(" \"queries\": [");
for (i, result) in results.query_results.iter().enumerate() {
println!(" {{");
println!(" \"id\": \"{}\",", result.query_id);
println!(" \"name\": \"{}\",", result.query_name);
println!(" \"mean_ms\": {},", result.mean.as_millis());
println!(" \"median_ms\": {},", result.median.as_millis());
println!(" \"min_ms\": {},", result.min.as_millis());
println!(" \"max_ms\": {},", result.max.as_millis());
println!(" \"std_dev_ms\": {}", result.std_dev.as_millis());
print!(" }}");
if i < results.query_results.len() - 1 {
println!(",");
} else {
println!();
}
}
println!(" ]");
println!("}}");
Ok(())
}
fn display_csv_results(results: &BenchmarkResults) -> ToolResult<()> {
println!("query_id,query_name,mean_ms,median_ms,min_ms,max_ms,std_dev_ms");
for result in &results.query_results {
println!(
"{},{},{},{},{},{},{}",
result.query_id,
result.query_name,
result.mean.as_millis(),
result.median.as_millis(),
result.min.as_millis(),
result.max.as_millis(),
result.std_dev.as_millis(),
);
}
Ok(())
}