use super::CommandResult;
use scirs2_core::random::{Random, SeedableRng};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fs;
use std::path::PathBuf;
use std::time::{Duration, Instant};
struct Store;
impl Store {
fn open(_path: &std::path::Path) -> Result<Self, Box<dyn std::error::Error>> {
Ok(Store)
}
}
pub async fn run(
dataset: String,
suite: String,
iterations: usize,
output: Option<PathBuf>,
detailed: bool,
warmup: usize,
) -> CommandResult {
println!("Running '{suite}' benchmark suite on dataset '{dataset}'");
println!("Iterations: {iterations}, Warmup: {warmup}, Detailed: {detailed}");
if !is_supported_benchmark_suite(&suite) {
return Err(format!(
"Unsupported benchmark suite '{suite}'. Supported suites: sp2bench, watdiv, ldbc, bsbm, custom"
)
.into());
}
let dataset_path = if PathBuf::from(&dataset).join("oxirs.toml").exists() {
load_dataset_from_config(&dataset)?
} else {
PathBuf::from(&dataset)
};
let store = if dataset_path.is_dir() {
Store::open(&dataset_path)?
} else {
return Err(format!(
"Dataset '{dataset}' not found. Use 'oxirs init' to create a dataset."
)
.into());
};
println!("Dataset loaded successfully\n");
if warmup > 0 {
println!("Running {warmup} warmup iterations...");
run_warmup_iterations(&store, &suite, warmup)?;
println!("Warmup complete\n");
}
let benchmark_results = run_benchmark_suite(&store, &suite, iterations, detailed)?;
display_benchmark_results(&benchmark_results, detailed);
if let Some(output_path) = output {
save_benchmark_results(&benchmark_results, &output_path)?;
println!("\nResults saved to: {}", output_path.display());
}
Ok(())
}
pub async fn generate(
output: PathBuf,
size: String,
dataset_type: String,
seed: Option<u64>,
triples: Option<usize>,
schema: Option<PathBuf>,
) -> CommandResult {
println!("Generating synthetic benchmark dataset");
println!("Output: {}", output.display());
println!("Size: {size}, Type: {dataset_type}");
if let Some(s) = seed {
println!("Random seed: {s}");
}
let triple_count = if let Some(count) = triples {
count
} else {
match size.as_str() {
"tiny" => 1_000,
"small" => 10_000,
"medium" => 100_000,
"large" => 1_000_000,
"xlarge" => 10_000_000,
_ => {
return Err(format!(
"Invalid size '{size}'. Valid sizes: tiny, small, medium, large, xlarge"
)
.into())
}
}
};
println!("Generating {} triples...\n", triple_count);
let mut _rng = if let Some(s) = seed {
Random::seed_from_u64(s)
} else {
Random::seed_from_u64(42) };
if let Some(schema_path) = schema {
println!("Using schema: {}", schema_path.display());
}
let dataset = match dataset_type.as_str() {
"rdf" => generate_rdf_dataset(triple_count, &mut _rng)?,
"graph" => generate_graph_dataset(triple_count, &mut _rng)?,
"semantic" => generate_semantic_dataset(triple_count, &mut _rng)?,
_ => {
return Err(format!(
"Invalid dataset type '{dataset_type}'. Valid types: rdf, graph, semantic"
)
.into())
}
};
if let Some(parent) = output.parent() {
fs::create_dir_all(parent)?;
}
fs::write(&output, dataset)?;
println!("✓ Dataset generated successfully");
println!(" Total triples: {}", triple_count);
println!(" Output file: {}", output.display());
println!(
" File size: {:.2} MB",
fs::metadata(&output)?.len() as f64 / 1_048_576.0
);
Ok(())
}
pub async fn analyze(
input: PathBuf,
output: Option<PathBuf>,
format: String,
suggestions: bool,
patterns: bool,
) -> CommandResult {
println!("Analyzing query workload");
println!("Input: {}", input.display());
println!("Format: {format}\n");
let log_content = fs::read_to_string(&input)?;
let queries = parse_query_log(&log_content)?;
println!("Parsed {} queries from log\n", queries.len());
let analysis = analyze_query_workload(&queries, patterns)?;
let report = generate_workload_report(&analysis, suggestions, format.as_str())?;
if let Some(output_path) = output {
fs::write(&output_path, &report)?;
println!("Analysis report saved to: {}", output_path.display());
} else {
println!("{}", report);
}
Ok(())
}
pub async fn compare(
baseline: PathBuf,
current: PathBuf,
output: Option<PathBuf>,
threshold: f64,
format: String,
) -> CommandResult {
println!("Comparing benchmark results");
println!("Baseline: {}", baseline.display());
println!("Current: {}", current.display());
println!("Regression threshold: {:.1}%\n", threshold);
let baseline_results: BenchmarkResults = load_benchmark_results(&baseline)?;
let current_results: BenchmarkResults = load_benchmark_results(¤t)?;
let comparison = compare_benchmark_results(&baseline_results, ¤t_results, threshold)?;
let report = generate_comparison_report(&comparison, format.as_str())?;
if let Some(output_path) = output {
fs::write(&output_path, &report)?;
println!("Comparison report saved to: {}", output_path.display());
} else {
println!("{}", report);
}
if comparison.has_regressions {
return Err("Performance regressions detected!".into());
}
println!("\n✓ No performance regressions detected");
Ok(())
}
fn is_supported_benchmark_suite(suite: &str) -> bool {
matches!(suite, "sp2bench" | "watdiv" | "ldbc" | "bsbm" | "custom")
}
fn load_dataset_from_config(dataset: &str) -> Result<PathBuf, Box<dyn std::error::Error>> {
let config_path = PathBuf::from(dataset).join("oxirs.toml");
if !config_path.exists() {
return Err(format!("Configuration file '{}' not found", config_path.display()).into());
}
Ok(PathBuf::from(dataset))
}
#[derive(Debug, Serialize, Deserialize)]
struct BenchmarkResults {
suite: String,
total_queries: usize,
iterations: usize,
warmup_iterations: usize,
total_duration: DurationSerde,
query_results: Vec<QueryBenchmarkResult>,
statistics: BenchmarkStatistics,
timestamp: String,
}
#[derive(Debug, Serialize, Deserialize)]
struct QueryBenchmarkResult {
query_name: String,
avg_time: DurationSerde,
min_time: DurationSerde,
max_time: DurationSerde,
median_time: DurationSerde,
p95_time: DurationSerde,
p99_time: DurationSerde,
success_rate: f64,
stddev: f64,
}
#[derive(Debug, Serialize, Deserialize)]
struct BenchmarkStatistics {
total_queries_executed: usize,
avg_query_time: DurationSerde,
queries_per_second: f64,
success_rate: f64,
total_errors: usize,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
struct DurationSerde {
secs: u64,
nanos: u32,
}
impl From<Duration> for DurationSerde {
fn from(d: Duration) -> Self {
DurationSerde {
secs: d.as_secs(),
nanos: d.subsec_nanos(),
}
}
}
impl From<DurationSerde> for Duration {
fn from(d: DurationSerde) -> Self {
Duration::new(d.secs, d.nanos)
}
}
impl DurationSerde {
fn as_secs_f64(&self) -> f64 {
self.secs as f64 + self.nanos as f64 / 1_000_000_000.0
}
}
fn run_warmup_iterations(
_store: &Store,
suite: &str,
warmup: usize,
) -> Result<(), Box<dyn std::error::Error>> {
let queries = get_benchmark_queries(suite)?;
for (_query_name, _query) in queries.iter().take(3) {
for _ in 0..warmup {
simulate_query_execution();
}
print!(".");
use std::io::Write;
std::io::stdout().flush().ok();
}
println!();
Ok(())
}
fn run_benchmark_suite(
_store: &Store,
suite: &str,
iterations: usize,
detailed: bool,
) -> Result<BenchmarkResults, Box<dyn std::error::Error>> {
let queries = get_benchmark_queries(suite)?;
let mut query_results = Vec::new();
let mut total_duration = Duration::new(0, 0);
let mut total_queries_executed = 0;
let mut successful_queries = 0;
let mut total_errors = 0;
for (i, (query_name, _query)) in queries.iter().enumerate() {
if detailed {
println!("Running query {}/{}: {}", i + 1, queries.len(), query_name);
} else {
print!("\rProgress: {}/{} queries", i + 1, queries.len());
use std::io::Write;
std::io::stdout().flush().ok();
}
let mut execution_times = Vec::new();
let mut successes = 0;
for iteration in 1..=iterations {
if detailed && (iteration % 10 == 0 || iteration == 1) {
print!(" Iteration {iteration}/{iterations}\r");
}
let start = Instant::now();
let success = simulate_query_execution();
let duration = start.elapsed();
execution_times.push(duration);
total_duration += duration;
total_queries_executed += 1;
if success {
successes += 1;
successful_queries += 1;
} else {
total_errors += 1;
}
}
if detailed {
println!(" Completed {iterations} iterations");
}
execution_times.sort();
let avg_time = Duration::from_nanos(
(execution_times.iter().map(|d| d.as_nanos()).sum::<u128>() / iterations as u128)
as u64,
);
let min_time = *execution_times
.first()
.expect("execution_times should have at least one entry");
let max_time = *execution_times
.last()
.expect("execution_times should have at least one entry");
let median_time = execution_times[iterations / 2];
let p95_time = execution_times[(iterations as f64 * 0.95) as usize];
let p99_time = execution_times[(iterations as f64 * 0.99) as usize];
let success_rate = successes as f64 / iterations as f64;
let mean_nanos = avg_time.as_nanos() as f64;
let variance: f64 = execution_times
.iter()
.map(|d| {
let diff = d.as_nanos() as f64 - mean_nanos;
diff * diff
})
.sum::<f64>()
/ iterations as f64;
let stddev = variance.sqrt() / 1_000_000.0;
query_results.push(QueryBenchmarkResult {
query_name: query_name.clone(),
avg_time: avg_time.into(),
min_time: min_time.into(),
max_time: max_time.into(),
median_time: median_time.into(),
p95_time: p95_time.into(),
p99_time: p99_time.into(),
success_rate,
stddev,
});
}
if !detailed {
println!(); }
let avg_query_time =
Duration::from_nanos((total_duration.as_nanos() / total_queries_executed as u128) as u64);
let queries_per_second = total_queries_executed as f64 / total_duration.as_secs_f64();
let success_rate = successful_queries as f64 / total_queries_executed as f64;
let statistics = BenchmarkStatistics {
total_queries_executed,
avg_query_time: avg_query_time.into(),
queries_per_second,
success_rate,
total_errors,
};
Ok(BenchmarkResults {
suite: suite.to_string(),
total_queries: queries.len(),
iterations,
warmup_iterations: 0,
total_duration: total_duration.into(),
query_results,
statistics,
timestamp: chrono::Utc::now().to_rfc3339(),
})
}
fn get_benchmark_queries(suite: &str) -> Result<Vec<(String, String)>, Box<dyn std::error::Error>> {
match suite {
"sp2bench" => Ok(vec![
("Q1-Simple".to_string(), "SELECT * WHERE { ?s ?p ?o } LIMIT 10".to_string()),
("Q2-FOAF".to_string(), "SELECT ?name WHERE { ?person <http://xmlns.com/foaf/0.1/name> ?name }".to_string()),
("Q3-Creator".to_string(), "SELECT ?article WHERE { ?article <http://purl.org/dc/elements/1.1/creator> ?author }".to_string()),
("Q4-Filter".to_string(), "SELECT ?name WHERE { ?person <http://xmlns.com/foaf/0.1/name> ?name FILTER(REGEX(?name, 'Smith')) }".to_string()),
("Q5-Optional".to_string(), "SELECT ?name ?email WHERE { ?person <http://xmlns.com/foaf/0.1/name> ?name OPTIONAL { ?person <http://xmlns.com/foaf/0.1/mbox> ?email } }".to_string()),
]),
"watdiv" => Ok(vec![
("C1-Caption".to_string(), "SELECT ?v0 WHERE { ?v0 <http://schema.org/caption> ?v1 }".to_string()),
("C2-Follows".to_string(), "SELECT ?v0 ?v1 WHERE { ?v0 <http://schema.org/follows> ?v1 }".to_string()),
("F1-Complex".to_string(), "SELECT ?v0 ?v2 WHERE { ?v0 <http://schema.org/likes> ?v1 . ?v1 <http://schema.org/friendOf> ?v2 }".to_string()),
]),
"ldbc" => Ok(vec![
("Q1-FirstName".to_string(), "SELECT ?name WHERE { ?person <http://www.ldbc.eu/ldbc_socialnet/1.0/vocabulary/firstName> ?name }".to_string()),
("Q2-Friends".to_string(), "SELECT ?p1 ?p2 WHERE { ?p1 <http://www.ldbc.eu/ldbc_socialnet/1.0/vocabulary/knows> ?p2 }".to_string()),
]),
"bsbm" => Ok(vec![
("Q1-Product".to_string(), "SELECT ?product ?label WHERE { ?product <http://www.w3.org/2000/01/rdf-schema#label> ?label }".to_string()),
("Q2-Features".to_string(), "SELECT ?product ?feature WHERE { ?product <http://www4.wiwiss.fu-berlin.de/bizer/bsbm/v01/vocabulary/productFeature> ?feature }".to_string()),
]),
"custom" => Ok(vec![
("simple".to_string(), "SELECT * WHERE { ?s ?p ?o } LIMIT 1".to_string()),
]),
_ => Err(format!("Unknown benchmark suite: {suite}").into()),
}
}
fn simulate_query_execution() -> bool {
use scirs2_core::random::Random;
let delay = {
let mut random = Random::default();
1 + random.random_range(0..15_u64) };
std::thread::sleep(Duration::from_millis(delay));
let mut random = Random::default();
random.random_f64() < 0.95
}
fn display_benchmark_results(results: &BenchmarkResults, detailed: bool) {
println!("\n==================== Benchmark Results ====================");
println!("Suite: {}", results.suite);
println!("Timestamp: {}", results.timestamp);
println!("Total queries: {}", results.total_queries);
println!("Iterations per query: {}", results.iterations);
println!(
"Total duration: {:.2}s",
results.total_duration.as_secs_f64()
);
println!();
println!("Overall Statistics:");
println!(
" Total queries executed: {}",
results.statistics.total_queries_executed
);
println!(
" Average query time: {:.3}ms",
results.statistics.avg_query_time.as_secs_f64() * 1000.0
);
println!(
" Queries per second: {:.2}",
results.statistics.queries_per_second
);
println!(
" Success rate: {:.1}%",
results.statistics.success_rate * 100.0
);
if results.statistics.total_errors > 0 {
println!(" Total errors: {}", results.statistics.total_errors);
}
println!();
println!("Query Details:");
for query_result in &results.query_results {
println!(" {}:", query_result.query_name);
println!(
" Average: {:.3}ms (±{:.2}ms)",
query_result.avg_time.as_secs_f64() * 1000.0,
query_result.stddev
);
if detailed {
println!(
" Min: {:.3}ms",
query_result.min_time.as_secs_f64() * 1000.0
);
println!(
" Max: {:.3}ms",
query_result.max_time.as_secs_f64() * 1000.0
);
println!(
" Median: {:.3}ms",
query_result.median_time.as_secs_f64() * 1000.0
);
println!(
" P95: {:.3}ms",
query_result.p95_time.as_secs_f64() * 1000.0
);
println!(
" P99: {:.3}ms",
query_result.p99_time.as_secs_f64() * 1000.0
);
}
println!(
" Success rate: {:.1}%",
query_result.success_rate * 100.0
);
}
println!("==========================================================");
}
fn save_benchmark_results(
results: &BenchmarkResults,
output_path: &PathBuf,
) -> Result<(), Box<dyn std::error::Error>> {
let json_results = serde_json::to_string_pretty(results)?;
fs::write(output_path, json_results)?;
Ok(())
}
fn load_benchmark_results(path: &PathBuf) -> Result<BenchmarkResults, Box<dyn std::error::Error>> {
let content = fs::read_to_string(path)?;
let results: BenchmarkResults = serde_json::from_str(&content)?;
Ok(results)
}
fn generate_rdf_dataset(
triple_count: usize,
_rng: &mut Random<scirs2_core::rngs::StdRng>,
) -> Result<String, Box<dyn std::error::Error>> {
let mut output = String::new();
output.push_str("# Generated RDF Dataset\n");
output.push_str(&format!("# Triples: {}\n\n", triple_count));
for i in 0..triple_count {
let subject = format!("<http://example.org/resource/{}>", i);
let predicate_choice = (i * 7 + 3) % 5; let predicate = match predicate_choice {
0 => "<http://www.w3.org/2000/01/rdf-schema#label>",
1 => "<http://purl.org/dc/terms/title>",
2 => "<http://xmlns.com/foaf/0.1/name>",
3 => "<http://schema.org/name>",
_ => "<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>",
};
let object = format!("\"Resource {}\"", i);
output.push_str(&format!("{} {} {} .\n", subject, predicate, object));
}
Ok(output)
}
fn generate_graph_dataset(
triple_count: usize,
_rng: &mut Random<scirs2_core::rngs::StdRng>,
) -> Result<String, Box<dyn std::error::Error>> {
let mut output = String::new();
output.push_str("# Generated Graph Dataset\n\n");
let node_count = (triple_count as f64).sqrt() as usize;
for i in 0..triple_count {
let from = (i * 13) % node_count;
let to = (i * 17 + 7) % node_count;
let subject = format!("<http://example.org/node/{}>", from);
let predicate = "<http://example.org/edge>";
let object = format!("<http://example.org/node/{}>", to);
output.push_str(&format!("{} {} {} .\n", subject, predicate, object));
}
Ok(output)
}
fn generate_semantic_dataset(
triple_count: usize,
_rng: &mut Random<scirs2_core::rngs::StdRng>,
) -> Result<String, Box<dyn std::error::Error>> {
let mut output = String::new();
output.push_str("# Generated Semantic Dataset\n\n");
let classes = ["Person", "Organization", "Place", "Event", "Document"];
let properties = ["name", "description", "createdAt", "author", "location"];
for i in 0..triple_count {
let class = classes[(i * 11) % classes.len()];
let property = properties[(i * 13) % properties.len()];
let subject = format!("<http://example.org/{}/{}>", class.to_lowercase(), i);
let predicate = format!("<http://schema.org/{}>", property);
let object = format!("\"{}_{}_value\"", class, property);
output.push_str(&format!("{} {} {} .\n", subject, predicate, object));
}
Ok(output)
}
#[derive(Debug)]
struct QueryLog {
query: String,
_timestamp: String,
duration_ms: f64,
}
#[derive(Debug)]
struct WorkloadAnalysis {
total_queries: usize,
unique_queries: usize,
_query_frequencies: HashMap<String, usize>,
avg_duration_ms: f64,
query_patterns: Vec<QueryPattern>,
}
#[derive(Debug)]
struct QueryPattern {
pattern_type: String,
count: usize,
percentage: f64,
}
fn parse_query_log(content: &str) -> Result<Vec<QueryLog>, Box<dyn std::error::Error>> {
let mut queries = Vec::new();
for (i, line) in content.lines().enumerate() {
if line.trim().is_empty() || line.starts_with('#') {
continue;
}
queries.push(QueryLog {
query: line.to_string(),
_timestamp: format!("2025-11-09T{:02}:00:00Z", i % 24),
duration_ms: (i as f64 % 100.0) + 5.0,
});
}
Ok(queries)
}
fn analyze_query_workload(
queries: &[QueryLog],
analyze_patterns: bool,
) -> Result<WorkloadAnalysis, Box<dyn std::error::Error>> {
let mut query_frequencies: HashMap<String, usize> = HashMap::new();
let mut total_duration = 0.0;
for log in queries {
*query_frequencies.entry(log.query.clone()).or_insert(0) += 1;
total_duration += log.duration_ms;
}
let unique_queries = query_frequencies.len();
let avg_duration_ms = total_duration / queries.len() as f64;
let mut query_patterns = Vec::new();
if analyze_patterns {
query_patterns = detect_query_patterns(queries)?;
}
Ok(WorkloadAnalysis {
total_queries: queries.len(),
unique_queries,
_query_frequencies: query_frequencies,
avg_duration_ms,
query_patterns,
})
}
fn detect_query_patterns(
queries: &[QueryLog],
) -> Result<Vec<QueryPattern>, Box<dyn std::error::Error>> {
let mut patterns: HashMap<String, usize> = HashMap::new();
for log in queries {
let pattern_type = if log.query.contains("SELECT") {
"SELECT"
} else if log.query.contains("ASK") {
"ASK"
} else if log.query.contains("CONSTRUCT") {
"CONSTRUCT"
} else if log.query.contains("DESCRIBE") {
"DESCRIBE"
} else {
"OTHER"
};
*patterns.entry(pattern_type.to_string()).or_insert(0) += 1;
}
let total = queries.len() as f64;
Ok(patterns
.into_iter()
.map(|(pattern_type, count)| QueryPattern {
pattern_type,
count,
percentage: (count as f64 / total) * 100.0,
})
.collect())
}
fn generate_workload_report(
analysis: &WorkloadAnalysis,
include_suggestions: bool,
format: &str,
) -> Result<String, Box<dyn std::error::Error>> {
match format {
"json" => Ok(serde_json::to_string_pretty(&serde_json::json!({
"total_queries": analysis.total_queries,
"unique_queries": analysis.unique_queries,
"avg_duration_ms": analysis.avg_duration_ms,
"patterns": analysis.query_patterns.iter().map(|p| {
serde_json::json!({
"type": p.pattern_type,
"count": p.count,
"percentage": p.percentage
})
}).collect::<Vec<_>>()
}))?),
"html" => Ok(format!(
r#"<html><body>
<h1>Query Workload Analysis</h1>
<p>Total Queries: {}</p>
<p>Unique Queries: {}</p>
<p>Average Duration: {:.2}ms</p>
</body></html>"#,
analysis.total_queries, analysis.unique_queries, analysis.avg_duration_ms
)),
_ => {
let mut report = String::new();
report.push_str("===== Query Workload Analysis =====\n\n");
report.push_str(&format!("Total queries: {}\n", analysis.total_queries));
report.push_str(&format!("Unique queries: {}\n", analysis.unique_queries));
report.push_str(&format!(
"Average duration: {:.2}ms\n\n",
analysis.avg_duration_ms
));
if !analysis.query_patterns.is_empty() {
report.push_str("Query Patterns:\n");
for pattern in &analysis.query_patterns {
report.push_str(&format!(
" {}: {} ({:.1}%)\n",
pattern.pattern_type, pattern.count, pattern.percentage
));
}
report.push('\n');
}
if include_suggestions {
report.push_str("Optimization Suggestions:\n");
report.push_str(" • Consider caching frequently executed queries\n");
report.push_str(" • Add indexes for commonly queried patterns\n");
report
.push_str(" • Review slow queries (>100ms) for optimization opportunities\n");
}
Ok(report)
}
}
}
#[derive(Debug, Serialize)]
struct BenchmarkComparison {
baseline_suite: String,
current_suite: String,
baseline_timestamp: String,
current_timestamp: String,
query_comparisons: Vec<QueryComparison>,
overall_change_percent: f64,
has_regressions: bool,
regressions: Vec<String>,
improvements: Vec<String>,
}
#[derive(Debug, Serialize)]
struct QueryComparison {
query_name: String,
baseline_avg_ms: f64,
current_avg_ms: f64,
change_percent: f64,
is_regression: bool,
}
fn compare_benchmark_results(
baseline: &BenchmarkResults,
current: &BenchmarkResults,
threshold: f64,
) -> Result<BenchmarkComparison, Box<dyn std::error::Error>> {
let mut query_comparisons = Vec::new();
let mut regressions = Vec::new();
let mut improvements = Vec::new();
let mut total_baseline_time = 0.0;
let mut total_current_time = 0.0;
for baseline_query in &baseline.query_results {
if let Some(current_query) = current
.query_results
.iter()
.find(|q| q.query_name == baseline_query.query_name)
{
let baseline_ms = baseline_query.avg_time.as_secs_f64() * 1000.0;
let current_ms = current_query.avg_time.as_secs_f64() * 1000.0;
let change_percent = ((current_ms - baseline_ms) / baseline_ms) * 100.0;
total_baseline_time += baseline_ms;
total_current_time += current_ms;
let is_regression = change_percent > threshold;
if is_regression {
regressions.push(format!(
"{}: {:.1}% slower ({:.2}ms → {:.2}ms)",
baseline_query.query_name, change_percent, baseline_ms, current_ms
));
} else if change_percent < -5.0 {
improvements.push(format!(
"{}: {:.1}% faster ({:.2}ms → {:.2}ms)",
baseline_query.query_name,
change_percent.abs(),
baseline_ms,
current_ms
));
}
query_comparisons.push(QueryComparison {
query_name: baseline_query.query_name.clone(),
baseline_avg_ms: baseline_ms,
current_avg_ms: current_ms,
change_percent,
is_regression,
});
}
}
let overall_change_percent =
((total_current_time - total_baseline_time) / total_baseline_time) * 100.0;
let has_regressions = !regressions.is_empty();
Ok(BenchmarkComparison {
baseline_suite: baseline.suite.clone(),
current_suite: current.suite.clone(),
baseline_timestamp: baseline.timestamp.clone(),
current_timestamp: current.timestamp.clone(),
query_comparisons,
overall_change_percent,
has_regressions,
regressions,
improvements,
})
}
fn generate_comparison_report(
comparison: &BenchmarkComparison,
format: &str,
) -> Result<String, Box<dyn std::error::Error>> {
match format {
"json" => Ok(serde_json::to_string_pretty(comparison)?),
"html" => {
let mut html = String::new();
html.push_str("<html><body>\n");
html.push_str("<h1>Benchmark Comparison Report</h1>\n");
html.push_str(&format!(
"<p>Overall change: {:.1}%</p>\n",
comparison.overall_change_percent
));
html.push_str("</body></html>");
Ok(html)
}
_ => {
let mut report = String::new();
report.push_str("===== Benchmark Comparison Report =====\n\n");
report.push_str(&format!(
"Baseline: {} ({})\n",
comparison.baseline_suite, comparison.baseline_timestamp
));
report.push_str(&format!(
"Current: {} ({})\n\n",
comparison.current_suite, comparison.current_timestamp
));
report.push_str(&format!(
"Overall Performance Change: {:.1}%\n\n",
comparison.overall_change_percent
));
if !comparison.regressions.is_empty() {
report.push_str("⚠️ REGRESSIONS DETECTED:\n");
for regression in &comparison.regressions {
report.push_str(&format!(" • {}\n", regression));
}
report.push('\n');
}
if !comparison.improvements.is_empty() {
report.push_str("✓ Improvements:\n");
for improvement in &comparison.improvements {
report.push_str(&format!(" • {}\n", improvement));
}
report.push('\n');
}
report.push_str("Query-by-Query Comparison:\n");
for comp in &comparison.query_comparisons {
let status = if comp.is_regression {
"⚠️ REGRESSION"
} else if comp.change_percent < -5.0 {
"✓ IMPROVED"
} else {
"≈ UNCHANGED"
};
report.push_str(&format!(" {} {}:\n", status, comp.query_name));
report.push_str(&format!(
" Baseline: {:.2}ms → Current: {:.2}ms ({:+.1}%)\n",
comp.baseline_avg_ms, comp.current_avg_ms, comp.change_percent
));
}
Ok(report)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_benchmark_suite_validation() {
assert!(is_supported_benchmark_suite("sp2bench"));
assert!(is_supported_benchmark_suite("watdiv"));
assert!(is_supported_benchmark_suite("ldbc"));
assert!(is_supported_benchmark_suite("bsbm"));
assert!(!is_supported_benchmark_suite("invalid"));
}
#[test]
fn test_dataset_size_parsing() {
assert_eq!(get_triple_count_for_size("tiny"), Some(1_000));
assert_eq!(get_triple_count_for_size("small"), Some(10_000));
assert_eq!(get_triple_count_for_size("medium"), Some(100_000));
assert_eq!(get_triple_count_for_size("large"), Some(1_000_000));
assert_eq!(get_triple_count_for_size("xlarge"), Some(10_000_000));
assert_eq!(get_triple_count_for_size("invalid"), None);
}
fn get_triple_count_for_size(size: &str) -> Option<usize> {
match size {
"tiny" => Some(1_000),
"small" => Some(10_000),
"medium" => Some(100_000),
"large" => Some(1_000_000),
"xlarge" => Some(10_000_000),
_ => None,
}
}
#[test]
fn test_duration_serde() {
let duration = Duration::from_millis(1234);
let serde: DurationSerde = duration.into();
let back: Duration = serde.into();
assert_eq!(duration, back);
}
}