use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq)]
pub struct PatternStats {
pub pattern: String,
pub cardinality_estimate: usize,
pub actual_cardinality: usize,
pub evaluation_time_ms: u64,
pub cache_hit: bool,
}
impl PatternStats {
pub fn new(
pattern: impl Into<String>,
cardinality_estimate: usize,
actual_cardinality: usize,
evaluation_time_ms: u64,
cache_hit: bool,
) -> Self {
Self {
pattern: pattern.into(),
cardinality_estimate,
actual_cardinality,
evaluation_time_ms,
cache_hit,
}
}
pub fn cardinality_ratio(&self) -> f64 {
self.actual_cardinality as f64 / self.cardinality_estimate.max(1) as f64
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct QueryExecutionStats {
pub query_id: String,
pub total_time_ms: u64,
pub pattern_stats: Vec<PatternStats>,
pub result_count: usize,
pub join_count: usize,
pub filter_count: usize,
pub cache_hits: usize,
pub cache_misses: usize,
}
impl QueryExecutionStats {
#[allow(clippy::too_many_arguments)]
pub fn new(
query_id: impl Into<String>,
total_time_ms: u64,
pattern_stats: Vec<PatternStats>,
result_count: usize,
join_count: usize,
filter_count: usize,
cache_hits: usize,
cache_misses: usize,
) -> Self {
Self {
query_id: query_id.into(),
total_time_ms,
pattern_stats,
result_count,
join_count,
filter_count,
cache_hits,
cache_misses,
}
}
pub fn cache_hit_rate(&self) -> f64 {
let total = self.cache_hits + self.cache_misses;
if total == 0 {
0.0
} else {
self.cache_hits as f64 / total as f64
}
}
}
const EMA_ALPHA: f64 = 0.3;
pub struct RuntimeStatsCollector {
history: Vec<QueryExecutionStats>,
max_history: usize,
pattern_selectivity: HashMap<String, f64>,
}
impl RuntimeStatsCollector {
pub fn new(max_history: usize) -> Self {
Self {
history: Vec::new(),
max_history: max_history.max(1),
pattern_selectivity: HashMap::new(),
}
}
pub fn record(&mut self, stats: QueryExecutionStats) {
for ps in &stats.pattern_stats {
self.update_selectivity(
&ps.pattern.clone(),
ps.actual_cardinality,
ps.cardinality_estimate,
);
}
if self.history.len() >= self.max_history {
self.history.remove(0);
}
self.history.push(stats);
}
pub fn update_selectivity(&mut self, pattern: &str, actual: usize, estimated: usize) {
let observation = actual as f64 / estimated.max(1) as f64;
let entry = self
.pattern_selectivity
.entry(pattern.to_string())
.or_insert(observation);
*entry = EMA_ALPHA * observation + (1.0 - EMA_ALPHA) * *entry;
}
pub fn get_selectivity(&self, pattern: &str) -> f64 {
self.pattern_selectivity
.get(pattern)
.copied()
.unwrap_or(1.0)
}
pub fn avg_query_time(&self) -> f64 {
if self.history.is_empty() {
return 0.0;
}
let sum: u64 = self.history.iter().map(|q| q.total_time_ms).sum();
sum as f64 / self.history.len() as f64
}
pub fn slowest_queries(&self, n: usize) -> Vec<&QueryExecutionStats> {
let mut refs: Vec<&QueryExecutionStats> = self.history.iter().collect();
refs.sort_by_key(|b| std::cmp::Reverse(b.total_time_ms));
refs.into_iter().take(n).collect()
}
pub fn pattern_hit_rate(&self, pattern: &str) -> f64 {
let (hits, total) = self
.history
.iter()
.flat_map(|q| &q.pattern_stats)
.filter(|ps| ps.pattern == pattern)
.fold((0usize, 0usize), |(h, t), ps| {
(if ps.cache_hit { h + 1 } else { h }, t + 1)
});
if total == 0 {
0.0
} else {
hits as f64 / total as f64
}
}
pub fn total_queries(&self) -> usize {
self.history.len()
}
pub fn reset(&mut self) {
self.history.clear();
self.pattern_selectivity.clear();
}
pub fn history(&self) -> &[QueryExecutionStats] {
&self.history
}
pub fn tracked_pattern_count(&self) -> usize {
self.pattern_selectivity.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_pattern_stats(
pattern: &str,
estimated: usize,
actual: usize,
ms: u64,
cache_hit: bool,
) -> PatternStats {
PatternStats::new(pattern, estimated, actual, ms, cache_hit)
}
fn make_query_stats(id: &str, ms: u64, patterns: Vec<PatternStats>) -> QueryExecutionStats {
let cache_hits = patterns.iter().filter(|p| p.cache_hit).count();
let cache_misses = patterns.len() - cache_hits;
QueryExecutionStats::new(id, ms, patterns, 10, 2, 1, cache_hits, cache_misses)
}
#[test]
fn test_pattern_stats_construction() {
let ps = make_pattern_stats("?s rdf:type ?t", 100, 50, 5, false);
assert_eq!(ps.pattern, "?s rdf:type ?t");
assert_eq!(ps.cardinality_estimate, 100);
assert_eq!(ps.actual_cardinality, 50);
assert_eq!(ps.evaluation_time_ms, 5);
assert!(!ps.cache_hit);
}
#[test]
fn test_pattern_stats_cardinality_ratio() {
let ps = make_pattern_stats("?s ?p ?o", 200, 100, 10, false);
let ratio = ps.cardinality_ratio();
assert!((ratio - 0.5).abs() < 1e-10);
}
#[test]
fn test_pattern_stats_cardinality_ratio_zero_estimate() {
let ps = make_pattern_stats("?s ?p ?o", 0, 5, 1, false);
let ratio = ps.cardinality_ratio();
assert!((ratio - 5.0).abs() < 1e-10);
}
#[test]
fn test_pattern_stats_cache_hit_flag() {
let ps = make_pattern_stats("?x owl:sameAs ?y", 50, 50, 0, true);
assert!(ps.cache_hit);
}
#[test]
fn test_query_stats_cache_hit_rate() {
let patterns = vec![
make_pattern_stats("p1", 10, 10, 1, true),
make_pattern_stats("p2", 10, 10, 1, false),
make_pattern_stats("p3", 10, 10, 1, true),
make_pattern_stats("p4", 10, 10, 1, false),
];
let qs = make_query_stats("q1", 100, patterns);
let rate = qs.cache_hit_rate();
assert!((rate - 0.5).abs() < 1e-10);
}
#[test]
fn test_query_stats_zero_cache_rate_when_no_patterns() {
let qs = make_query_stats("q_empty", 10, vec![]);
assert_eq!(qs.cache_hit_rate(), 0.0);
}
#[test]
fn test_collector_starts_empty() {
let collector = RuntimeStatsCollector::new(50);
assert_eq!(collector.total_queries(), 0);
assert_eq!(collector.avg_query_time(), 0.0);
assert_eq!(collector.tracked_pattern_count(), 0);
}
#[test]
fn test_collector_records_single_query() {
let mut collector = RuntimeStatsCollector::new(50);
let qs = make_query_stats("q1", 120, vec![make_pattern_stats("p1", 50, 30, 10, false)]);
collector.record(qs);
assert_eq!(collector.total_queries(), 1);
assert_eq!(collector.avg_query_time(), 120.0);
}
#[test]
fn test_collector_avg_time_multiple_queries() {
let mut collector = RuntimeStatsCollector::new(50);
collector.record(make_query_stats("q1", 100, vec![]));
collector.record(make_query_stats("q2", 200, vec![]));
collector.record(make_query_stats("q3", 300, vec![]));
let avg = collector.avg_query_time();
assert!((avg - 200.0).abs() < 1e-10);
}
#[test]
fn test_collector_history_bounded() {
let max = 3;
let mut collector = RuntimeStatsCollector::new(max);
for i in 0..6u64 {
collector.record(make_query_stats(&format!("q{i}"), i * 10, vec![]));
}
assert_eq!(collector.total_queries(), max);
let ids: Vec<&str> = collector
.history()
.iter()
.map(|q| q.query_id.as_str())
.collect();
assert_eq!(ids, vec!["q3", "q4", "q5"]);
}
#[test]
fn test_collector_reset_clears_everything() {
let mut collector = RuntimeStatsCollector::new(10);
collector.record(make_query_stats("q1", 100, vec![]));
collector.update_selectivity("p1", 10, 20);
collector.reset();
assert_eq!(collector.total_queries(), 0);
assert_eq!(collector.tracked_pattern_count(), 0);
assert_eq!(collector.avg_query_time(), 0.0);
}
#[test]
fn test_get_selectivity_unknown_pattern_returns_one() {
let collector = RuntimeStatsCollector::new(10);
assert_eq!(collector.get_selectivity("unknown"), 1.0);
}
#[test]
fn test_update_selectivity_sets_initial_value() {
let mut collector = RuntimeStatsCollector::new(10);
collector.update_selectivity("p1", 10, 100);
let sel = collector.get_selectivity("p1");
assert!((sel - 0.1).abs() < 1e-9);
}
#[test]
fn test_selectivity_ema_converges() {
let mut collector = RuntimeStatsCollector::new(100);
for _ in 0..200 {
collector.update_selectivity("p_conv", 50, 100);
}
let sel = collector.get_selectivity("p_conv");
assert!((sel - 0.5).abs() < 1e-6);
}
#[test]
fn test_selectivity_updated_on_record() {
let mut collector = RuntimeStatsCollector::new(50);
let ps = make_pattern_stats("?x :p ?y", 100, 10, 5, false);
collector.record(make_query_stats("q1", 50, vec![ps]));
assert_ne!(collector.get_selectivity("?x :p ?y"), 1.0);
}
#[test]
fn test_slowest_queries_returns_descending_order() {
let mut collector = RuntimeStatsCollector::new(20);
collector.record(make_query_stats("fast", 10, vec![]));
collector.record(make_query_stats("slow", 500, vec![]));
collector.record(make_query_stats("medium", 150, vec![]));
let top2 = collector.slowest_queries(2);
assert_eq!(top2.len(), 2);
assert_eq!(top2[0].query_id, "slow");
assert_eq!(top2[1].query_id, "medium");
}
#[test]
fn test_slowest_queries_n_exceeds_history() {
let mut collector = RuntimeStatsCollector::new(10);
collector.record(make_query_stats("q1", 100, vec![]));
let result = collector.slowest_queries(50);
assert_eq!(result.len(), 1);
}
#[test]
fn test_slowest_queries_empty_history() {
let collector = RuntimeStatsCollector::new(10);
let result = collector.slowest_queries(5);
assert!(result.is_empty());
}
#[test]
fn test_pattern_hit_rate_all_hits() {
let mut collector = RuntimeStatsCollector::new(20);
for _ in 0..4 {
collector.record(make_query_stats(
"q",
10,
vec![make_pattern_stats("hot_pattern", 10, 10, 0, true)],
));
}
let rate = collector.pattern_hit_rate("hot_pattern");
assert!((rate - 1.0).abs() < 1e-10);
}
#[test]
fn test_pattern_hit_rate_no_hits() {
let mut collector = RuntimeStatsCollector::new(20);
for _ in 0..3 {
collector.record(make_query_stats(
"q",
10,
vec![make_pattern_stats("cold_pattern", 10, 10, 5, false)],
));
}
assert_eq!(collector.pattern_hit_rate("cold_pattern"), 0.0);
}
#[test]
fn test_pattern_hit_rate_mixed() {
let mut collector = RuntimeStatsCollector::new(20);
collector.record(make_query_stats(
"q1",
10,
vec![make_pattern_stats("mp", 10, 10, 1, true)],
));
collector.record(make_query_stats(
"q2",
10,
vec![make_pattern_stats("mp", 10, 10, 1, false)],
));
collector.record(make_query_stats(
"q3",
10,
vec![make_pattern_stats("mp", 10, 10, 1, true)],
));
collector.record(make_query_stats(
"q4",
10,
vec![make_pattern_stats("mp", 10, 10, 1, false)],
));
let rate = collector.pattern_hit_rate("mp");
assert!((rate - 0.5).abs() < 1e-10);
}
#[test]
fn test_pattern_hit_rate_unknown_pattern() {
let collector = RuntimeStatsCollector::new(10);
assert_eq!(collector.pattern_hit_rate("never_seen"), 0.0);
}
#[test]
fn test_tracked_pattern_count_grows() {
let mut collector = RuntimeStatsCollector::new(20);
collector.update_selectivity("p1", 5, 10);
collector.update_selectivity("p2", 5, 10);
assert_eq!(collector.tracked_pattern_count(), 2);
}
#[test]
fn test_tracked_pattern_count_no_duplicate() {
let mut collector = RuntimeStatsCollector::new(20);
collector.update_selectivity("p1", 5, 10);
collector.update_selectivity("p1", 6, 10);
assert_eq!(collector.tracked_pattern_count(), 1);
}
}