use super::{ToolResult, ToolStats};
use std::path::PathBuf;
use std::time::{Duration, Instant};
pub struct ProfilingConfig {
pub query: Option<String>,
pub query_file: Option<PathBuf>,
pub data: Vec<PathBuf>,
pub iterations: usize,
pub warmup: usize,
pub memory_profile: bool,
pub output_format: String,
}
#[derive(Debug, Clone)]
pub struct QueryProfile {
pub total_time: Duration,
pub parsing_time: Duration,
pub optimization_time: Duration,
pub execution_time: Duration,
pub result_materialization_time: Duration,
pub memory_used: usize,
pub intermediate_results: usize,
pub final_results: usize,
pub phases: Vec<PhaseProfile>,
}
#[derive(Debug, Clone)]
pub struct PhaseProfile {
pub name: String,
pub duration: Duration,
pub memory_delta: isize,
pub results_count: usize,
}
#[derive(Debug)]
pub struct ProfilingStats {
pub runs: Vec<QueryProfile>,
pub mean_time: Duration,
pub median_time: Duration,
pub min_time: Duration,
pub max_time: Duration,
pub std_dev: Duration,
pub throughput: f64, }
pub async fn run(config: ProfilingConfig) -> ToolResult {
let mut stats = ToolStats::new();
println!("SPARQL Query Profiler");
println!("====================\n");
let query_string = if let Some(q) = config.query {
q
} else if let Some(ref path) = config.query_file {
std::fs::read_to_string(path)?
} else {
return Err("Must specify either --query or --query-file".into());
};
println!("Query:");
println!("---");
println!("{query_string}");
println!("---\n");
if config.warmup > 0 {
println!("Warmup: {} iteration(s)...", config.warmup);
for i in 0..config.warmup {
let profile = profile_query_execution(&query_string, &config.data)?;
println!(" Warmup {}: {:?}", i + 1, profile.total_time);
}
println!();
}
println!("Profiling: {} iteration(s)...", config.iterations);
let mut profiles = Vec::new();
for i in 0..config.iterations {
let profile = profile_query_execution(&query_string, &config.data)?;
println!(
" Run {}: {:?} ({} results)",
i + 1,
profile.total_time,
profile.final_results
);
profiles.push(profile);
}
let profiling_stats = calculate_statistics(&profiles);
println!("\n=== Profiling Results ===\n");
display_statistics(&profiling_stats, &config.output_format)?;
if config.memory_profile {
display_memory_profile(&profiles)?;
}
display_bottlenecks(&profiles)?;
display_recommendations(&profiling_stats)?;
stats.items_processed = config.iterations;
stats.finish();
stats.print_summary("Profiler");
Ok(())
}
fn profile_query_execution(query: &str, _data_sources: &[PathBuf]) -> ToolResult<QueryProfile> {
let start = Instant::now();
let parse_start = Instant::now();
let _parsed_query = parse_query(query)?;
let parsing_time = parse_start.elapsed();
let opt_start = Instant::now();
let _optimized_query = optimize_query_plan(query)?;
let optimization_time = opt_start.elapsed();
let exec_start = Instant::now();
let intermediate_count = execute_query(query)?;
let execution_time = exec_start.elapsed();
let mat_start = Instant::now();
let final_count = materialize_results()?;
let result_materialization_time = mat_start.elapsed();
let total_time = start.elapsed();
let phases = vec![
PhaseProfile {
name: "Parsing".to_string(),
duration: parsing_time,
memory_delta: 0,
results_count: 0,
},
PhaseProfile {
name: "Optimization".to_string(),
duration: optimization_time,
memory_delta: 0,
results_count: 0,
},
PhaseProfile {
name: "Execution".to_string(),
duration: execution_time,
memory_delta: 0,
results_count: intermediate_count,
},
PhaseProfile {
name: "Materialization".to_string(),
duration: result_materialization_time,
memory_delta: 0,
results_count: final_count,
},
];
Ok(QueryProfile {
total_time,
parsing_time,
optimization_time,
execution_time,
result_materialization_time,
memory_used: 0, intermediate_results: intermediate_count,
final_results: final_count,
phases,
})
}
fn parse_query(_query: &str) -> ToolResult<String> {
std::thread::sleep(Duration::from_micros(100));
Ok(String::from("parsed"))
}
fn optimize_query_plan(_query: &str) -> ToolResult<String> {
std::thread::sleep(Duration::from_micros(200));
Ok(String::from("optimized"))
}
fn execute_query(_query: &str) -> ToolResult<usize> {
std::thread::sleep(Duration::from_millis(5));
Ok(100) }
fn materialize_results() -> ToolResult<usize> {
std::thread::sleep(Duration::from_micros(500));
Ok(10) }
fn calculate_statistics(profiles: &[QueryProfile]) -> ProfilingStats {
let mut times: Vec<Duration> = profiles.iter().map(|p| p.total_time).collect();
times.sort();
let total_time: Duration = times.iter().sum();
let mean_time = total_time / times.len() as u32;
let median_time = if times.len() % 2 == 0 {
let mid = times.len() / 2;
(times[mid - 1] + times[mid]) / 2
} else {
times[times.len() / 2]
};
let min_time = *times.first().expect("collection validated to be non-empty");
let max_time = *times.last().expect("collection validated to be non-empty");
let variance: f64 = times
.iter()
.map(|&t| {
let diff = t.as_secs_f64() - mean_time.as_secs_f64();
diff * diff
})
.sum::<f64>()
/ times.len() as f64;
let std_dev = Duration::from_secs_f64(variance.sqrt());
let throughput = times.len() as f64 / total_time.as_secs_f64();
ProfilingStats {
runs: profiles.to_vec(),
mean_time,
median_time,
min_time,
max_time,
std_dev,
throughput,
}
}
fn display_statistics(stats: &ProfilingStats, format: &str) -> ToolResult<()> {
match format {
"table" => display_stats_table(stats),
"json" => display_stats_json(stats),
_ => display_stats_table(stats),
}
}
fn display_stats_table(stats: &ProfilingStats) -> ToolResult<()> {
println!("Execution Time Statistics:");
println!(" Runs: {}", stats.runs.len());
println!(" Mean: {:?}", stats.mean_time);
println!(" Median: {:?}", stats.median_time);
println!(" Min: {:?}", stats.min_time);
println!(" Max: {:?}", stats.max_time);
println!(" Std Dev: {:?}", stats.std_dev);
println!(" Throughput: {:.2} queries/sec", stats.throughput);
if !stats.runs.is_empty() {
let first = &stats.runs[0];
println!("\nPhase Breakdown (first run):");
println!(" Parsing: {:?}", first.parsing_time);
println!(" Optimization: {:?}", first.optimization_time);
println!(" Execution: {:?}", first.execution_time);
println!(
" Materialization: {:?}",
first.result_materialization_time
);
}
Ok(())
}
fn display_stats_json(stats: &ProfilingStats) -> ToolResult<()> {
println!("{{");
println!(" \"runs\": {},", stats.runs.len());
println!(" \"mean_ms\": {},", stats.mean_time.as_millis());
println!(" \"median_ms\": {},", stats.median_time.as_millis());
println!(" \"min_ms\": {},", stats.min_time.as_millis());
println!(" \"max_ms\": {},", stats.max_time.as_millis());
println!(" \"std_dev_ms\": {},", stats.std_dev.as_millis());
println!(" \"throughput_qps\": {:.2}", stats.throughput);
if !stats.runs.is_empty() {
let first = &stats.runs[0];
println!(" \"phases\": {{");
println!(" \"parsing_ms\": {},", first.parsing_time.as_millis());
println!(
" \"optimization_ms\": {},",
first.optimization_time.as_millis()
);
println!(
" \"execution_ms\": {},",
first.execution_time.as_millis()
);
println!(
" \"materialization_ms\": {}",
first.result_materialization_time.as_millis()
);
println!(" }}");
}
println!("}}");
Ok(())
}
fn display_memory_profile(profiles: &[QueryProfile]) -> ToolResult<()> {
println!("\n=== Memory Profile ===\n");
if profiles.is_empty() {
println!("No profiling data available");
return Ok(());
}
let avg_memory: usize = profiles.iter().map(|p| p.memory_used).sum::<usize>() / profiles.len();
let max_memory = profiles.iter().map(|p| p.memory_used).max().unwrap_or(0);
println!("Memory Usage:");
println!(" Average: {} KB", avg_memory / 1024);
println!(" Maximum: {} KB", max_memory / 1024);
let first = &profiles[0];
println!("\nMemory by Phase:");
for phase in &first.phases {
let delta_str = if phase.memory_delta >= 0 {
format!("+{} KB", phase.memory_delta / 1024)
} else {
format!("{} KB", phase.memory_delta / 1024)
};
println!(" {}: {}", phase.name, delta_str);
}
Ok(())
}
fn display_bottlenecks(profiles: &[QueryProfile]) -> ToolResult<()> {
println!("\n=== Performance Bottlenecks ===\n");
if profiles.is_empty() {
println!("No profiling data available");
return Ok(());
}
let profile = &profiles[0];
let total_micros = profile.total_time.as_micros();
let mut phase_percentages: Vec<(String, f64)> = profile
.phases
.iter()
.map(|phase| {
let percent = (phase.duration.as_micros() as f64 / total_micros as f64) * 100.0;
(phase.name.clone(), percent)
})
.collect();
phase_percentages.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
println!("Time Distribution:");
for (name, percent) in &phase_percentages {
let bar_len = (*percent as usize).min(50);
let bar = "█".repeat(bar_len);
println!(" {:<20} {:>6.2}% {}", name, percent, bar);
}
println!("\nIdentified Bottlenecks:");
let mut found_bottleneck = false;
for (name, percent) in &phase_percentages {
if *percent > 30.0 {
println!(" ⚠ {}: {:.1}% of execution time", name, percent);
found_bottleneck = true;
}
}
if !found_bottleneck {
println!(" ✓ No major bottlenecks detected");
}
Ok(())
}
fn display_recommendations(stats: &ProfilingStats) -> ToolResult<()> {
println!("\n=== Recommendations ===\n");
let mut recommendations = Vec::new();
if stats.runs.is_empty() {
println!("No profiling data for recommendations");
return Ok(());
}
let first = &stats.runs[0];
if first.execution_time > Duration::from_millis(100) {
recommendations
.push("Execution phase is slow - consider adding indexes or optimizing query patterns");
}
if first.parsing_time > Duration::from_millis(10) {
recommendations.push("Query parsing is slow - consider simplifying query syntax");
}
if first.intermediate_results > 10000 {
recommendations.push(
"Large number of intermediate results - add FILTER or LIMIT clauses earlier in query",
);
}
let variability = stats.std_dev.as_secs_f64() / stats.mean_time.as_secs_f64();
if variability > 0.2 {
recommendations.push("High execution time variability - query performance may be unstable");
}
if stats.throughput < 1.0 {
recommendations.push("Low throughput - consider query optimization or hardware upgrades");
}
if recommendations.is_empty() {
println!("✓ Query performance looks good - no recommendations");
} else {
for (i, rec) in recommendations.iter().enumerate() {
println!("{}. {}", i + 1, rec);
}
}
Ok(())
}