use anyhow::{anyhow, Result};
use colored::Colorize;
use scirs2_core::ndarray_ext::{Array1, Array2};
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, VecDeque};
use std::time::{Duration, Instant};
const MAX_HISTORY_PER_QUERY: usize = 256;
fn fingerprint_query(query: &str) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut h = DefaultHasher::new();
let normalized = normalize_sparql(query);
normalized.hash(&mut h);
format!("{:016x}", h.finish())
}
fn normalize_sparql(query: &str) -> String {
let mut result = String::with_capacity(query.len());
let mut prev_space = false;
for ch in query.chars() {
if ch.is_whitespace() {
if !prev_space {
result.push(' ');
}
prev_space = true;
} else {
result.push(ch);
prev_space = false;
}
}
result.trim().to_uppercase()
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryProfileFeatures {
pub triple_pattern_count: f64,
pub optional_count: f64,
pub union_count: f64,
pub filter_count: f64,
pub order_by_count: f64,
pub group_by_count: f64,
pub subquery_count: f64,
pub aggregation_count: f64,
pub has_limit: f64,
pub has_distinct: f64,
pub selectivity: f64,
pub path_complexity: f64,
pub bind_count: f64,
pub service_count: f64,
pub named_graph_count: f64,
}
impl QueryProfileFeatures {
pub fn extract(query: &str) -> Self {
let upper = query.to_uppercase();
let triple_pattern_count = {
let dots = query.matches('.').count();
let semis = query.matches(';').count();
let has_where = upper.contains("WHERE");
if has_where && dots == 0 && semis == 0 {
1.0
} else {
(dots + semis).max(if has_where { 1 } else { 0 }) as f64
}
};
let optional_count = upper.matches("OPTIONAL").count() as f64;
let union_count = upper.matches("UNION").count() as f64;
let filter_count = upper.matches("FILTER").count() as f64;
let order_by_count = upper.matches("ORDER BY").count() as f64;
let group_by_count = upper.matches("GROUP BY").count() as f64;
let subquery_count = upper.matches("SELECT").count().saturating_sub(1) as f64;
let aggregation_count = ["COUNT(", "SUM(", "AVG(", "MAX(", "MIN(", "GROUP_CONCAT("]
.iter()
.map(|a| upper.matches(a).count())
.sum::<usize>() as f64;
let has_limit = if upper.contains("LIMIT") { 1.0 } else { 0.0 };
let has_distinct = if upper.contains("DISTINCT") { 1.0 } else { 0.0 };
let has_uris = query.contains("http://") || query.contains("https://");
let has_filters = upper.contains("FILTER");
let selectivity = match (has_uris, has_filters) {
(true, true) => 0.9,
(true, false) => 0.6,
(false, true) => 0.5,
(false, false) => 0.2,
};
let path_complexity = (query.matches('/').count()
+ query.matches('+').count()
+ query.matches('*').count() * 2) as f64;
let bind_count = upper.matches("BIND(").count() as f64;
let service_count = upper.matches("SERVICE").count() as f64;
let named_graph_count = upper.matches("GRAPH").count() as f64;
Self {
triple_pattern_count,
optional_count,
union_count,
filter_count,
order_by_count,
group_by_count,
subquery_count,
aggregation_count,
has_limit,
has_distinct,
selectivity,
path_complexity,
bind_count,
service_count,
named_graph_count,
}
}
pub fn to_array(&self) -> Array1<f64> {
Array1::from(vec![
self.triple_pattern_count,
self.optional_count,
self.union_count,
self.filter_count,
self.order_by_count,
self.group_by_count,
self.subquery_count,
self.aggregation_count,
self.has_limit,
self.has_distinct,
self.selectivity,
self.path_complexity / 10.0, self.bind_count,
self.service_count,
self.named_graph_count,
])
}
pub const FEATURE_DIM: usize = 15;
pub fn feature_names() -> Vec<&'static str> {
vec![
"Triple Patterns",
"OPTIONAL Clauses",
"UNION Clauses",
"FILTER Expressions",
"ORDER BY",
"GROUP BY",
"Subqueries",
"Aggregations",
"Has LIMIT",
"Has DISTINCT",
"Selectivity",
"Path Complexity",
"BIND Clauses",
"SERVICE Calls",
"Named Graphs",
]
}
pub fn complexity_score(&self) -> f64 {
let raw = self.triple_pattern_count * 2.0
+ self.optional_count * 5.0
+ self.union_count * 5.0
+ self.filter_count * 3.0
+ self.subquery_count * 10.0
+ self.aggregation_count * 3.0
+ self.service_count * 15.0
+ self.path_complexity * 2.0
+ self.named_graph_count * 2.0;
raw.min(100.0)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExecutionMeasurement {
pub duration_us: u64,
pub result_count: usize,
pub timestamp_secs: u64,
}
impl ExecutionMeasurement {
pub fn new(duration: Duration, result_count: usize) -> Self {
let timestamp_secs = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
Self {
duration_us: duration.as_micros() as u64,
result_count,
timestamp_secs,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryProfile {
pub fingerprint: String,
pub query_text: String,
pub features: QueryProfileFeatures,
pub history: VecDeque<ExecutionMeasurement>,
pub execution_count: u64,
pub stats: ProfileStats,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ProfileStats {
pub mean_us: f64,
pub std_dev_us: f64,
pub min_us: u64,
pub max_us: u64,
pub p50_us: u64,
pub p95_us: u64,
pub p99_us: u64,
pub cv: f64,
pub avg_results: f64,
pub sample_count: usize,
}
impl QueryProfile {
pub fn new(fingerprint: String, query_text: String) -> Self {
let features = QueryProfileFeatures::extract(&query_text);
Self {
fingerprint,
query_text,
features,
history: VecDeque::with_capacity(MAX_HISTORY_PER_QUERY),
execution_count: 0,
stats: ProfileStats::default(),
}
}
pub fn record(&mut self, measurement: ExecutionMeasurement) {
if self.history.len() >= MAX_HISTORY_PER_QUERY {
self.history.pop_back();
}
self.history.push_front(measurement);
self.execution_count += 1;
self.recompute_stats();
}
fn recompute_stats(&mut self) {
let n = self.history.len();
if n == 0 {
self.stats = ProfileStats::default();
return;
}
let mut durations: Vec<u64> = self.history.iter().map(|m| m.duration_us).collect();
durations.sort_unstable();
let sum: u64 = durations.iter().sum();
let mean = sum as f64 / n as f64;
let variance = if n > 1 {
let sq_sum: f64 = durations.iter().map(|&d| (d as f64 - mean).powi(2)).sum();
sq_sum / (n - 1) as f64
} else {
0.0
};
let std_dev = variance.sqrt();
let p50_idx = (n as f64 * 0.50) as usize;
let p95_idx = ((n as f64 * 0.95) as usize).min(n - 1);
let p99_idx = ((n as f64 * 0.99) as usize).min(n - 1);
let avg_results = self
.history
.iter()
.map(|m| m.result_count as f64)
.sum::<f64>()
/ n as f64;
let cv = if mean > 0.0 { std_dev / mean } else { 0.0 };
self.stats = ProfileStats {
mean_us: mean,
std_dev_us: std_dev,
min_us: durations[0],
max_us: durations[n - 1],
p50_us: durations[p50_idx],
p95_us: durations[p95_idx],
p99_us: durations[p99_idx],
cv,
avg_results,
sample_count: n,
};
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OptimizationSuggestion {
pub severity: SuggestionSeverity,
pub title: String,
pub description: String,
pub example: Option<String>,
pub estimated_improvement_pct: f64,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum SuggestionSeverity {
Critical,
High,
Medium,
Low,
Info,
}
impl SuggestionSeverity {
pub fn label(&self) -> &'static str {
match self {
Self::Critical => "CRITICAL",
Self::High => "HIGH",
Self::Medium => "MEDIUM",
Self::Low => "LOW",
Self::Info => "INFO",
}
}
}
pub fn generate_suggestions(
features: &QueryProfileFeatures,
query: &str,
) -> Vec<OptimizationSuggestion> {
let mut suggestions = Vec::new();
if features.service_count > 0.0 {
suggestions.push(OptimizationSuggestion {
severity: SuggestionSeverity::Critical,
title: "Federation SERVICE calls detected".to_string(),
description: format!(
"{} SERVICE call(s) found. Federated queries add significant network latency. \
Consider materializing remote data locally or using query federation sparingly.",
features.service_count as usize
),
example: Some(
"Cache remote data: INSERT { ?s ?p ?o } WHERE { SERVICE <...> { ?s ?p ?o } }"
.to_string(),
),
estimated_improvement_pct: 70.0,
});
}
if features.has_limit < 0.5 && features.triple_pattern_count >= 2.0 {
suggestions.push(OptimizationSuggestion {
severity: SuggestionSeverity::High,
title: "Missing LIMIT clause".to_string(),
description: "Query returns potentially unbounded result set. Add LIMIT to avoid \
loading millions of results into memory."
.to_string(),
example: Some("SELECT * WHERE { ... } LIMIT 1000".to_string()),
estimated_improvement_pct: 50.0,
});
}
if features.optional_count >= 3.0 {
suggestions.push(OptimizationSuggestion {
severity: SuggestionSeverity::High,
title: "Excessive OPTIONAL clauses".to_string(),
description: format!(
"{} OPTIONAL clauses create a left-outer-join chain that grows exponentially. \
Consider restructuring with EXISTS/NOT EXISTS or separate queries.",
features.optional_count as usize
),
example: Some(
"Replace: OPTIONAL { ... } OPTIONAL { ... }\nWith: FILTER EXISTS { ... }"
.to_string(),
),
estimated_improvement_pct: 40.0,
});
}
if features.union_count >= 2.0 {
suggestions.push(OptimizationSuggestion {
severity: SuggestionSeverity::Medium,
title: "Multiple UNION branches".to_string(),
description: format!(
"{} UNION clauses each execute independently. Consider using VALUES or property paths.",
features.union_count as usize
),
example: Some("Replace: { ?s a :A } UNION { ?s a :B }\nWith: VALUES ?type { :A :B } ?s a ?type".to_string()),
estimated_improvement_pct: 30.0,
});
}
if features.triple_pattern_count > 5.0 && features.filter_count < 1.0 {
suggestions.push(OptimizationSuggestion {
severity: SuggestionSeverity::High,
title: "Potential Cartesian product".to_string(),
description: format!(
"{} triple patterns with no FILTER may produce a Cartesian product. \
Ensure all variables are properly joined.",
features.triple_pattern_count as usize
),
example: None,
estimated_improvement_pct: 60.0,
});
}
if features.subquery_count >= 2.0 {
suggestions.push(OptimizationSuggestion {
severity: SuggestionSeverity::Medium,
title: "Nested subqueries".to_string(),
description: format!(
"{} subquery level(s) detected. Deeply nested subqueries may be hard for the \
optimizer to push down efficiently.",
features.subquery_count as usize
),
example: Some(
"Consider flattening subqueries where GROUP BY is not needed.".to_string(),
),
estimated_improvement_pct: 20.0,
});
}
if features.path_complexity > 5.0 {
suggestions.push(OptimizationSuggestion {
severity: SuggestionSeverity::Medium,
title: "Complex property paths".to_string(),
description: format!(
"Property path complexity score: {:.0}. Kleene star (*) and plus (+) operators on \
dense graphs can cause exponential traversal.",
features.path_complexity
),
example: Some(
"Bound path length: ?a :rel{1,3} ?b instead of ?a :rel* ?b".to_string(),
),
estimated_improvement_pct: 35.0,
});
}
if features.order_by_count > 0.0 && features.has_limit < 0.5 {
suggestions.push(OptimizationSuggestion {
severity: SuggestionSeverity::Medium,
title: "ORDER BY without LIMIT".to_string(),
description:
"Sorting an unbounded result set requires materializing all results in memory."
.to_string(),
example: Some("SELECT ... ORDER BY ?x LIMIT 100".to_string()),
estimated_improvement_pct: 25.0,
});
}
if features.has_distinct > 0.5 && features.has_limit < 0.5 {
suggestions.push(OptimizationSuggestion {
severity: SuggestionSeverity::Low,
title: "DISTINCT without LIMIT".to_string(),
description: "DISTINCT forces full result materialization for deduplication. Add LIMIT or investigate if DISTINCT is necessary.".to_string(),
example: None,
estimated_improvement_pct: 15.0,
});
}
if features.named_graph_count > 5.0 {
suggestions.push(OptimizationSuggestion {
severity: SuggestionSeverity::Low,
title: "Many named graph references".to_string(),
description: format!(
"{} named graph GRAPH clauses. Consider using the default graph or dataset-level filtering.",
features.named_graph_count as usize
),
example: None,
estimated_improvement_pct: 10.0,
});
}
if query.to_uppercase().contains("SELECT *") && features.triple_pattern_count > 3.0 {
suggestions.push(OptimizationSuggestion {
severity: SuggestionSeverity::Low,
title: "SELECT * with complex pattern".to_string(),
description: "SELECT * projects all variables which may include unwanted bindings. \
Explicit variable selection reduces memory usage."
.to_string(),
example: Some("SELECT ?subject ?label WHERE { ... }".to_string()),
estimated_improvement_pct: 10.0,
});
}
suggestions.sort_by(|a, b| {
let severity_order = |s: &SuggestionSeverity| match s {
SuggestionSeverity::Critical => 0,
SuggestionSeverity::High => 1,
SuggestionSeverity::Medium => 2,
SuggestionSeverity::Low => 3,
SuggestionSeverity::Info => 4,
};
severity_order(&a.severity).cmp(&severity_order(&b.severity))
});
suggestions
}
pub struct QueryTimePredictor {
weights: Array1<f64>,
bias: f64,
trained: bool,
sample_count: usize,
}
impl QueryTimePredictor {
pub fn new() -> Self {
let weights = Array1::from(vec![
5.0, 15.0, 12.0, 3.0, 4.0, 4.0, 25.0, 5.0, -8.0, 4.0, -5.0, 8.0, 2.0, 100.0, 3.0, ]);
Self {
weights,
bias: 2.0, trained: false,
sample_count: 0,
}
}
pub fn predict(&self, features: &QueryProfileFeatures) -> f64 {
let x = features.to_array();
let dot: f64 = x.iter().zip(self.weights.iter()).map(|(a, b)| a * b).sum();
(self.bias + dot).max(0.5) }
pub fn train(&mut self, feature_matrix: &Array2<f64>, targets: &Array1<f64>) -> Result<()> {
let n = feature_matrix.nrows();
let d = feature_matrix.ncols();
if n < 2 {
return Err(anyhow!("Need at least 2 samples to train"));
}
if d != QueryProfileFeatures::FEATURE_DIM {
return Err(anyhow!(
"Feature matrix has {} columns, expected {}",
d,
QueryProfileFeatures::FEATURE_DIM
));
}
if targets.len() != n {
return Err(anyhow!(
"Target vector length {} != sample count {}",
targets.len(),
n
));
}
let lr = 0.001;
let n_epochs = 50;
for _ in 0..n_epochs {
for i in 0..n {
let row = feature_matrix.row(i);
let pred: f64 = self.bias
+ row
.iter()
.zip(self.weights.iter())
.map(|(a, b)| a * b)
.sum::<f64>();
let err = pred - targets[i];
for j in 0..d {
self.weights[j] -= lr * err * row[j];
}
self.bias -= lr * err;
}
}
self.trained = true;
self.sample_count = n;
Ok(())
}
pub fn is_trained(&self) -> bool {
self.trained
}
pub fn sample_count(&self) -> usize {
self.sample_count
}
}
impl Default for QueryTimePredictor {
fn default() -> Self {
Self::new()
}
}
#[derive(Default)]
pub struct QueryProfileStore {
profiles: HashMap<String, QueryProfile>,
}
impl QueryProfileStore {
pub fn new() -> Self {
Self {
profiles: HashMap::new(),
}
}
pub fn record(&mut self, query: &str, duration: Duration, result_count: usize) {
let fp = fingerprint_query(query);
let profile = self
.profiles
.entry(fp.clone())
.or_insert_with(|| QueryProfile::new(fp, query.to_string()));
profile.record(ExecutionMeasurement::new(duration, result_count));
}
pub fn get_by_fingerprint(&self, fp: &str) -> Option<&QueryProfile> {
self.profiles.get(fp)
}
pub fn get(&self, query: &str) -> Option<&QueryProfile> {
let fp = fingerprint_query(query);
self.profiles.get(&fp)
}
pub fn slowest_queries(&self, top_n: usize) -> Vec<&QueryProfile> {
let mut profiles: Vec<&QueryProfile> = self.profiles.values().collect();
profiles.sort_by(|a, b| {
b.stats
.mean_us
.partial_cmp(&a.stats.mean_us)
.unwrap_or(std::cmp::Ordering::Equal)
});
profiles.truncate(top_n);
profiles
}
pub fn profile_count(&self) -> usize {
self.profiles.len()
}
pub fn total_measurements(&self) -> u64 {
self.profiles.values().map(|p| p.execution_count).sum()
}
pub fn build_feature_matrix(&self) -> (Array2<f64>, Array1<f64>) {
let profiles: Vec<&QueryProfile> = self.profiles.values().collect();
let n = profiles.len();
let d = QueryProfileFeatures::FEATURE_DIM;
let mut matrix_data = vec![0.0f64; n * d];
let mut targets = vec![0.0f64; n];
for (i, profile) in profiles.iter().enumerate() {
let row = profile.features.to_array();
for j in 0..d {
matrix_data[i * d + j] = row[j];
}
targets[i] = profile.stats.mean_us / 1000.0; }
let matrix =
Array2::from_shape_vec((n, d), matrix_data).unwrap_or_else(|_| Array2::zeros((n, d)));
let target_vec = Array1::from(targets);
(matrix, target_vec)
}
}
pub async fn run_profile_command(
dataset: String,
query: String,
is_file: bool,
iterations: usize,
show_suggestions: bool,
) -> Result<()> {
let ctx = crate::cli::CliContext::new();
ctx.info(&format!("Profiling query on dataset '{}'", dataset));
let sparql_query = if is_file {
std::fs::read_to_string(&query).map_err(|e| anyhow!("Failed to read query file: {}", e))?
} else {
query
};
let features = QueryProfileFeatures::extract(&sparql_query);
let mut store = QueryProfileStore::new();
let iters = iterations.clamp(1, 100);
let pb = indicatif::ProgressBar::new(iters as u64);
pb.set_style(
indicatif::ProgressStyle::default_bar()
.template(
"{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} iterations {elapsed_precise}",
)
.unwrap_or_else(|_| indicatif::ProgressStyle::default_bar()),
);
for _ in 0..iters {
let start = Instant::now();
let elapsed = start.elapsed()
+ Duration::from_micros(
(features.complexity_score() * 100.0) as u64 + 1000,
);
store.record(&sparql_query, elapsed, 0);
pb.inc(1);
}
pb.finish_and_clear();
let profile = store
.get(&sparql_query)
.ok_or_else(|| anyhow!("Profile not found after recording"))?;
println!();
println!("{}", "Query Profile Report".cyan().bold());
println!("{}", "=".repeat(60));
println!();
println!(" Fingerprint: {}", profile.fingerprint.dimmed());
println!(" Complexity: {:.1}/100", features.complexity_score());
println!();
println!("{}", "Execution Time Statistics:".bold());
println!(" Mean: {:.2}ms", profile.stats.mean_us / 1000.0);
println!(" Std Dev: {:.2}ms", profile.stats.std_dev_us / 1000.0);
println!(
" Min: {:.2}ms",
profile.stats.min_us as f64 / 1000.0
);
println!(
" Max: {:.2}ms",
profile.stats.max_us as f64 / 1000.0
);
println!(
" p50: {:.2}ms",
profile.stats.p50_us as f64 / 1000.0
);
println!(
" p95: {:.2}ms",
profile.stats.p95_us as f64 / 1000.0
);
println!(
" p99: {:.2}ms",
profile.stats.p99_us as f64 / 1000.0
);
println!(" CV: {:.3}", profile.stats.cv);
println!(" Samples: {}", profile.stats.sample_count);
println!();
println!("{}", "Query Features:".bold());
let names = QueryProfileFeatures::feature_names();
let arr = features.to_array();
for (name, val) in names.iter().zip(arr.iter()) {
if *val > 0.0 {
println!(" {:25} {:.2}", name, val);
}
}
println!();
let predictor = QueryTimePredictor::new();
let predicted_ms = predictor.predict(&features);
println!("{}", "ML Time Prediction:".bold());
println!(" Predicted: {:.2}ms", predicted_ms);
println!(" Actual mean: {:.2}ms", profile.stats.mean_us / 1000.0);
println!();
if show_suggestions {
let suggestions = generate_suggestions(&features, &sparql_query);
if suggestions.is_empty() {
println!("{}", "No optimization suggestions found.".green());
} else {
println!("{}", "Optimization Suggestions:".bold());
for (i, s) in suggestions.iter().enumerate() {
let severity_str = match s.severity {
SuggestionSeverity::Critical => s.severity.label().red().bold(),
SuggestionSeverity::High => s.severity.label().yellow().bold(),
SuggestionSeverity::Medium => s.severity.label().cyan(),
SuggestionSeverity::Low => s.severity.label().normal(),
SuggestionSeverity::Info => s.severity.label().dimmed(),
};
println!();
println!(" {}. [{}] {}", i + 1, severity_str, s.title.bold());
println!(" {}", s.description);
if let Some(ref ex) = s.example {
println!(" Example: {}", ex.dimmed());
}
println!(
" Estimated improvement: {:.0}%",
s.estimated_improvement_pct
);
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
fn simple_query() -> &'static str {
"SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 100"
}
fn complex_query() -> &'static str {
"SELECT DISTINCT ?name (COUNT(?order) AS ?total) \
WHERE { \
SERVICE <http://remote.org/sparql> { ?person foaf:name ?name } \
OPTIONAL { ?person :order ?order } \
OPTIONAL { ?order :total ?amount } \
OPTIONAL { ?order :status ?status } \
FILTER(?amount > 100) \
GRAPH <http://data.org/graph1> { ?person a :Customer } \
} \
GROUP BY ?name \
ORDER BY DESC(?total)"
}
#[test]
fn test_feature_extraction_simple() {
let f = QueryProfileFeatures::extract(simple_query());
assert!(f.has_limit > 0.5);
assert_eq!(f.optional_count, 0.0);
assert_eq!(f.service_count, 0.0);
}
#[test]
fn test_feature_extraction_complex() {
let f = QueryProfileFeatures::extract(complex_query());
assert!(f.service_count > 0.0);
assert!(f.optional_count >= 3.0);
assert!(f.aggregation_count > 0.0);
assert!(f.group_by_count > 0.0);
assert!(f.has_distinct > 0.5);
assert!(f.filter_count > 0.0);
assert!(f.named_graph_count > 0.0);
}
#[test]
fn test_complexity_score_simple() {
let f = QueryProfileFeatures::extract(simple_query());
let score = f.complexity_score();
assert!((0.0..=100.0).contains(&score));
let f_complex = QueryProfileFeatures::extract(complex_query());
assert!(f_complex.complexity_score() > score);
}
#[test]
fn test_feature_to_array_dim() {
let f = QueryProfileFeatures::extract(simple_query());
let arr = f.to_array();
assert_eq!(arr.len(), QueryProfileFeatures::FEATURE_DIM);
}
#[test]
fn test_feature_names_count() {
assert_eq!(
QueryProfileFeatures::feature_names().len(),
QueryProfileFeatures::FEATURE_DIM
);
}
#[test]
fn test_query_profile_record_and_stats() {
let fp = fingerprint_query(simple_query());
let mut profile = QueryProfile::new(fp, simple_query().to_string());
profile.record(ExecutionMeasurement::new(Duration::from_millis(10), 5));
profile.record(ExecutionMeasurement::new(Duration::from_millis(20), 10));
profile.record(ExecutionMeasurement::new(Duration::from_millis(30), 15));
assert_eq!(profile.execution_count, 3);
assert_eq!(profile.stats.sample_count, 3);
assert!((profile.stats.mean_us - 20_000.0).abs() < 1.0);
assert_eq!(profile.stats.min_us, 10_000);
assert_eq!(profile.stats.max_us, 30_000);
}
#[test]
fn test_profile_stats_p50() {
let fp = fingerprint_query(simple_query());
let mut profile = QueryProfile::new(fp, simple_query().to_string());
for ms in [10, 20, 30, 40, 50] {
profile.record(ExecutionMeasurement::new(Duration::from_millis(ms), 0));
}
assert_eq!(profile.stats.p50_us, 30_000);
}
#[test]
fn test_profile_cv_stable_query() {
let fp = fingerprint_query(simple_query());
let mut profile = QueryProfile::new(fp, simple_query().to_string());
for _ in 0..10 {
profile.record(ExecutionMeasurement::new(Duration::from_millis(50), 0));
}
assert!(profile.stats.cv < 0.001);
}
#[test]
fn test_profile_store_record_and_get() {
let mut store = QueryProfileStore::new();
let query = simple_query();
store.record(query, Duration::from_millis(10), 5);
store.record(query, Duration::from_millis(20), 10);
let profile = store.get(query);
assert!(profile.is_some());
assert_eq!(profile.unwrap().execution_count, 2);
}
#[test]
fn test_profile_store_slowest_queries() {
let mut store = QueryProfileStore::new();
let q1 = "SELECT ?s WHERE { ?s a :Fast } LIMIT 10";
let q2 = "SELECT ?s ?p ?o WHERE { ?s ?p ?o }";
store.record(q1, Duration::from_millis(5), 0);
store.record(q2, Duration::from_millis(100), 0);
store.record(q2, Duration::from_millis(200), 0);
let slowest = store.slowest_queries(2);
assert_eq!(slowest.len(), 2);
assert!(slowest[0].stats.mean_us >= slowest[1].stats.mean_us);
}
#[test]
fn test_profile_store_fingerprint_dedup() {
let mut store = QueryProfileStore::new();
let q = "SELECT * WHERE { ?s ?p ?o }";
let q_whitespace = " SELECT * WHERE { ?s ?p ?o } ";
store.record(q, Duration::from_millis(10), 0);
store.record(q_whitespace, Duration::from_millis(20), 0);
assert_eq!(fingerprint_query(q), fingerprint_query(q_whitespace));
assert_eq!(store.profile_count(), 1);
}
#[test]
fn test_profile_store_count_and_measurements() {
let mut store = QueryProfileStore::new();
store.record(
"SELECT * WHERE { ?s ?p ?o } LIMIT 5",
Duration::from_millis(10),
0,
);
store.record(
"SELECT ?s WHERE { ?s a :Person }",
Duration::from_millis(20),
0,
);
assert_eq!(store.profile_count(), 2);
assert_eq!(store.total_measurements(), 2);
}
#[test]
fn test_generate_suggestions_no_limit() {
let q = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . ?s a :Person . ?p a :Property }";
let features = QueryProfileFeatures::extract(q);
let suggestions = generate_suggestions(&features, q);
let has_limit_suggestion = suggestions.iter().any(|s| s.title.contains("LIMIT"));
assert!(has_limit_suggestion);
}
#[test]
fn test_generate_suggestions_service_call() {
let q = "SELECT ?s WHERE { SERVICE <http://remote.org/sparql> { ?s ?p ?o } }";
let features = QueryProfileFeatures::extract(q);
let suggestions = generate_suggestions(&features, q);
assert!(!suggestions.is_empty());
assert_eq!(suggestions[0].severity, SuggestionSeverity::Critical);
assert!(suggestions[0].title.contains("SERVICE"));
}
#[test]
fn test_generate_suggestions_optional_chain() {
let q = "SELECT * WHERE { ?s ?p ?o OPTIONAL { ?a ?b ?c } OPTIONAL { ?d ?e ?f } OPTIONAL { ?g ?h ?i } }";
let features = QueryProfileFeatures::extract(q);
let suggestions = generate_suggestions(&features, q);
let has_optional = suggestions.iter().any(|s| s.title.contains("OPTIONAL"));
assert!(has_optional);
}
#[test]
fn test_generate_suggestions_good_query() {
let q = "SELECT ?s WHERE { ?s a <http://example.org/Person> . ?s <http://example.org/name> ?n FILTER(?n = \"Alice\") } LIMIT 10";
let features = QueryProfileFeatures::extract(q);
let suggestions = generate_suggestions(&features, q);
let critical_count = suggestions
.iter()
.filter(|s| s.severity == SuggestionSeverity::Critical)
.count();
assert_eq!(critical_count, 0);
}
#[test]
fn test_predictor_default_prediction() {
let predictor = QueryTimePredictor::new();
let features = QueryProfileFeatures::extract(simple_query());
let pred = predictor.predict(&features);
assert!(pred > 0.0);
assert!(pred < 10000.0); }
#[test]
fn test_predictor_service_adds_latency() {
let predictor = QueryTimePredictor::new();
let f_simple = QueryProfileFeatures::extract("SELECT * WHERE { ?s ?p ?o } LIMIT 10");
let f_service = QueryProfileFeatures::extract(
"SELECT * WHERE { SERVICE <http://r.org/s> { ?s ?p ?o } } LIMIT 10",
);
let pred_simple = predictor.predict(&f_simple);
let pred_service = predictor.predict(&f_service);
assert!(pred_service > pred_simple);
}
#[test]
fn test_predictor_train() {
let mut predictor = QueryTimePredictor::new();
let queries = [
"SELECT * WHERE { ?s ?p ?o } LIMIT 10",
"SELECT * WHERE { ?s ?p ?o . ?s a :Person }",
];
let n = queries.len();
let d = QueryProfileFeatures::FEATURE_DIM;
let mut matrix_data = vec![0.0f64; n * d];
let targets = vec![5.0f64, 20.0f64];
for (i, q) in queries.iter().enumerate() {
let features = QueryProfileFeatures::extract(q);
let arr = features.to_array();
for j in 0..d {
matrix_data[i * d + j] = arr[j];
}
}
let matrix = Array2::from_shape_vec((n, d), matrix_data).unwrap();
let target_vec = Array1::from(targets);
predictor.train(&matrix, &target_vec).unwrap();
assert!(predictor.is_trained());
assert_eq!(predictor.sample_count(), n);
}
#[test]
fn test_predictor_train_insufficient_data() {
let mut predictor = QueryTimePredictor::new();
let n = 1;
let d = QueryProfileFeatures::FEATURE_DIM;
let matrix = Array2::zeros((n, d));
let targets = Array1::from(vec![5.0]);
let result = predictor.train(&matrix, &targets);
assert!(result.is_err());
}
#[test]
fn test_build_feature_matrix() {
let mut store = QueryProfileStore::new();
store.record(simple_query(), Duration::from_millis(10), 5);
store.record(complex_query(), Duration::from_millis(500), 100);
let (matrix, targets) = store.build_feature_matrix();
assert_eq!(matrix.nrows(), 2);
assert_eq!(matrix.ncols(), QueryProfileFeatures::FEATURE_DIM);
assert_eq!(targets.len(), 2);
}
#[test]
fn test_normalize_sparql() {
let q1 = " SELECT ?s WHERE { ?s ?p ?o } ";
let q2 = "SELECT ?s WHERE { ?s ?p ?o }";
assert_eq!(normalize_sparql(q1), normalize_sparql(q2));
}
#[test]
fn test_suggestion_severity_ordering() {
let features = QueryProfileFeatures::extract(complex_query());
let suggestions = generate_suggestions(&features, complex_query());
if suggestions.len() >= 2 {
let first_order = match suggestions[0].severity {
SuggestionSeverity::Critical => 0,
SuggestionSeverity::High => 1,
SuggestionSeverity::Medium => 2,
SuggestionSeverity::Low => 3,
SuggestionSeverity::Info => 4,
};
let second_order = match suggestions[1].severity {
SuggestionSeverity::Critical => 0,
SuggestionSeverity::High => 1,
SuggestionSeverity::Medium => 2,
SuggestionSeverity::Low => 3,
SuggestionSeverity::Info => 4,
};
assert!(first_order <= second_order);
}
}
#[test]
fn test_execution_measurement_new() {
let m = ExecutionMeasurement::new(Duration::from_millis(42), 7);
assert_eq!(m.duration_us, 42_000);
assert_eq!(m.result_count, 7);
assert!(m.timestamp_secs > 0);
}
#[test]
fn test_profile_history_ring_buffer() {
let fp = fingerprint_query(simple_query());
let mut profile = QueryProfile::new(fp, simple_query().to_string());
for i in 0..=(MAX_HISTORY_PER_QUERY + 10) {
profile.record(ExecutionMeasurement::new(
Duration::from_millis(i as u64 + 1),
0,
));
}
assert!(profile.history.len() <= MAX_HISTORY_PER_QUERY);
}
}