use crate::ast::ast::PathPattern;
use crate::plan::pattern_optimization::pattern_analysis::{
JoinStep, LinearPath, PatternPlanStrategy,
};
use std::collections::HashMap;
#[allow(dead_code)]
#[derive(Debug, Clone)]
pub struct GraphStatistics {
pub node_counts: HashMap<String, u64>,
pub relationship_counts: HashMap<String, u64>,
pub avg_relationships_per_node: HashMap<String, f64>,
pub pattern_selectivity: HashMap<String, f64>,
}
impl Default for GraphStatistics {
fn default() -> Self {
Self {
node_counts: HashMap::new(),
relationship_counts: HashMap::new(),
avg_relationships_per_node: HashMap::new(),
pattern_selectivity: HashMap::new(),
}
}
}
#[allow(dead_code)]
#[derive(Debug, Clone, PartialEq, PartialOrd)]
pub struct ExecutionCost {
pub operations: f64,
pub memory_bytes: u64,
pub intermediate_results: u64,
pub confidence: f64,
}
impl ExecutionCost {
pub fn new(
operations: f64,
memory_bytes: u64,
intermediate_results: u64,
confidence: f64,
) -> Self {
Self {
operations,
memory_bytes,
intermediate_results,
confidence,
}
}
pub fn total_cost(&self) -> f64 {
self.operations
+ (self.memory_bytes as f64 * 0.001)
+ (self.intermediate_results as f64 * 0.01)
}
#[allow(dead_code)] pub fn is_better_than(&self, other: &ExecutionCost) -> bool {
let self_adjusted = self.total_cost() / self.confidence.max(0.1);
let other_adjusted = other.total_cost() / other.confidence.max(0.1);
self_adjusted < other_adjusted
}
}
#[allow(dead_code)]
pub struct PlanCostEstimator {
statistics: GraphStatistics,
cost_cache: HashMap<String, ExecutionCost>,
}
impl PlanCostEstimator {
pub fn new(statistics: GraphStatistics) -> Self {
Self {
statistics,
cost_cache: HashMap::new(),
}
}
pub fn estimate_cost(&mut self, strategy: &PatternPlanStrategy) -> ExecutionCost {
match strategy {
PatternPlanStrategy::PathTraversal(linear_path) => {
self.estimate_path_traversal_cost(linear_path)
}
PatternPlanStrategy::HashJoin {
patterns,
join_order,
..
} => self.estimate_hash_join_cost(patterns, join_order),
PatternPlanStrategy::NestedLoopJoin { patterns, .. } => {
self.estimate_nested_loop_join_cost(patterns)
}
PatternPlanStrategy::CartesianProduct { patterns, .. } => {
self.estimate_cartesian_product_cost(patterns)
}
}
}
fn estimate_path_traversal_cost(&self, linear_path: &LinearPath) -> ExecutionCost {
let mut total_operations = 0.0;
let mut total_memory = 0u64;
let mut current_cardinality = 1.0;
for step in &linear_path.steps {
let start_nodes = if total_operations == 0.0 {
1000.0
} else {
current_cardinality
};
let default_type = "DEFAULT".to_string();
let relationship_type = step.relationship.labels.first().unwrap_or(&default_type);
let avg_relationships = self.get_avg_relationships(relationship_type).unwrap_or(2.0);
let relationship_cost = start_nodes * avg_relationships;
total_operations += relationship_cost;
current_cardinality = relationship_cost;
total_memory += (current_cardinality as u64) * 1024;
}
ExecutionCost::new(
total_operations,
total_memory,
current_cardinality as u64,
0.8, )
}
fn estimate_hash_join_cost(
&self,
patterns: &[PathPattern],
join_order: &[JoinStep],
) -> ExecutionCost {
let mut total_operations = 0.0;
let mut total_memory = 0u64;
let mut intermediate_size = 0u64;
for pattern in patterns {
let pattern_cost = self.estimate_single_pattern_cost(pattern);
total_operations += pattern_cost.operations;
total_memory += pattern_cost.memory_bytes;
intermediate_size += pattern_cost.intermediate_results;
}
for _join_step in join_order {
let join_cost = intermediate_size as f64 * 1.5;
total_operations += join_cost;
total_memory += intermediate_size * 512; }
ExecutionCost::new(
total_operations,
total_memory,
intermediate_size,
0.7, )
}
fn estimate_nested_loop_join_cost(&self, patterns: &[PathPattern]) -> ExecutionCost {
if patterns.len() < 2 {
return self.estimate_single_pattern_cost(&patterns[0]);
}
let mut total_operations = 1.0;
let mut total_memory = 0u64;
for pattern in patterns {
let pattern_cost = self.estimate_single_pattern_cost(pattern);
total_operations *= pattern_cost.intermediate_results as f64;
total_memory += pattern_cost.memory_bytes;
}
ExecutionCost::new(
total_operations,
total_memory,
total_operations as u64,
0.9, )
}
fn estimate_cartesian_product_cost(&self, patterns: &[PathPattern]) -> ExecutionCost {
let mut total_operations = 1.0;
let mut total_memory = 0u64;
let mut result_cardinality = 1u64;
for pattern in patterns {
let pattern_cost = self.estimate_single_pattern_cost(pattern);
total_operations *= pattern_cost.intermediate_results as f64;
total_memory += pattern_cost.memory_bytes;
result_cardinality *= pattern_cost.intermediate_results;
}
ExecutionCost::new(
total_operations,
total_memory,
result_cardinality,
0.95, )
}
fn estimate_single_pattern_cost(&self, pattern: &PathPattern) -> ExecutionCost {
let base_operations = match pattern.elements.len() {
0 | 1 => 100.0,
2 => 1000.0,
3 => 10000.0,
_ => 100000.0,
};
let selectivity = self.estimate_pattern_selectivity(pattern);
let operations = base_operations * selectivity;
let results = (operations * 0.1) as u64;
ExecutionCost::new(
operations,
results * 1024, results,
0.6, )
}
fn estimate_pattern_selectivity(&self, _pattern: &PathPattern) -> f64 {
let base_selectivity = 0.1;
let constraint_factor = 1.0;
base_selectivity * constraint_factor
}
fn get_avg_relationships(&self, rel_type: &str) -> Option<f64> {
self.statistics
.avg_relationships_per_node
.get(rel_type)
.copied()
}
#[allow(dead_code)] pub fn update_statistics(&mut self, new_stats: GraphStatistics) {
self.statistics = new_stats;
self.cost_cache.clear(); }
#[allow(dead_code)] pub fn clear_cache(&mut self) {
self.cost_cache.clear();
}
}
#[allow(dead_code)]
#[derive(Debug)]
pub struct StatisticsManager {
statistics: GraphStatistics,
update_threshold: u64,
operations_since_update: u64,
}
impl StatisticsManager {
pub fn new() -> Self {
Self {
statistics: GraphStatistics::default(),
update_threshold: 1000,
operations_since_update: 0,
}
}
#[allow(dead_code)] pub fn get_statistics(&self) -> &GraphStatistics {
&self.statistics
}
#[allow(dead_code)] pub fn record_query_execution(&mut self, _query: &str, _result_count: u64) {
self.operations_since_update += 1;
if self.operations_since_update >= self.update_threshold {
self.refresh_statistics();
}
}
#[allow(dead_code)] fn refresh_statistics(&mut self) {
self.operations_since_update = 0;
}
#[allow(dead_code)] pub fn set_statistics(&mut self, stats: GraphStatistics) {
self.statistics = stats;
}
pub fn create_cost_estimator(&self) -> PlanCostEstimator {
PlanCostEstimator::new(self.statistics.clone())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_execution_cost_comparison() {
let cost1 = ExecutionCost::new(1000.0, 1024, 100, 0.8);
let cost2 = ExecutionCost::new(2000.0, 2048, 200, 0.9);
assert!(cost1.is_better_than(&cost2));
assert!(!cost2.is_better_than(&cost1));
}
#[test]
fn test_cost_estimator_basic() {
let mut stats = GraphStatistics::default();
stats.node_counts.insert("Person".to_string(), 1000);
stats.relationship_counts.insert("KNOWS".to_string(), 500);
stats
.avg_relationships_per_node
.insert("KNOWS".to_string(), 2.0);
let mut estimator = PlanCostEstimator::new(stats);
use crate::ast::ast::{Location, PathPattern};
let pattern = PathPattern {
assignment: None,
path_type: None,
elements: vec![],
location: Location::default(),
};
let strategy = PatternPlanStrategy::CartesianProduct {
patterns: vec![pattern],
estimated_cost: 0.0, };
let cost = estimator.estimate_cost(&strategy);
assert!(cost.operations > 0.0);
assert!(cost.confidence > 0.0);
}
#[test]
fn test_statistics_manager() {
let mut manager = StatisticsManager::new();
let stats = manager.get_statistics();
assert!(stats.node_counts.is_empty());
assert!(stats.relationship_counts.is_empty());
manager.record_query_execution("MATCH (n) RETURN n", 100);
let estimator = manager.create_cost_estimator();
assert_eq!(estimator.statistics.node_counts.len(), 0);
}
}