1use crate::query::advanced_statistics::AdvancedStatisticsCollector;
29use crate::query::algebra::{AlgebraTriplePattern, GraphPattern, TermPattern};
30use crate::query::query_plan_visualizer::QueryPlanNode;
31use crate::OxirsError;
32
33use std::collections::HashMap;
34use std::sync::atomic::{AtomicU64, Ordering};
35use std::sync::{Arc, RwLock};
36
37pub struct CostBasedOptimizer {
47 advanced_stats: Arc<AdvancedStatisticsCollector>,
49 stats: Arc<StatisticsCollector>,
51 cost_config: CostConfiguration,
53 query_count: AtomicU64,
55}
56
57impl CostBasedOptimizer {
58 pub fn new() -> Self {
64 Self {
65 advanced_stats: Arc::new(AdvancedStatisticsCollector::new()),
66 stats: Arc::new(StatisticsCollector::new()),
67 cost_config: CostConfiguration::default(),
68 query_count: AtomicU64::new(0),
69 }
70 }
71
72 pub fn with_config(config: CostConfiguration) -> Self {
74 Self {
75 advanced_stats: Arc::new(AdvancedStatisticsCollector::new()),
76 stats: Arc::new(StatisticsCollector::new()),
77 cost_config: config,
78 query_count: AtomicU64::new(0),
79 }
80 }
81
82 pub fn optimize_pattern(&self, pattern: &GraphPattern) -> Result<OptimizedPlan, OxirsError> {
84 self.query_count.fetch_add(1, Ordering::Relaxed);
85
86 match pattern {
87 GraphPattern::Bgp(patterns) => self.optimize_bgp(patterns),
88 GraphPattern::Join(left, right) => self.optimize_join(left, right),
89 GraphPattern::LeftJoin { left, right, .. } => self.optimize_left_join(left, right),
90 GraphPattern::Filter { expr: _, inner } => self.optimize_filter(inner),
91 GraphPattern::Union(left, right) => self.optimize_union(left, right),
92 GraphPattern::Extend { inner, .. } => self.optimize_pattern(inner),
93 GraphPattern::Minus(left, right) => self.optimize_minus(left, right),
94 GraphPattern::Service { inner, .. } => self.optimize_pattern(inner),
95 GraphPattern::Graph { inner, .. } => self.optimize_pattern(inner),
96 GraphPattern::OrderBy { inner, .. } => self.optimize_pattern(inner),
97 GraphPattern::Project { inner, .. } => self.optimize_pattern(inner),
98 GraphPattern::Distinct(inner) => self.optimize_pattern(inner),
99 GraphPattern::Reduced(inner) => self.optimize_pattern(inner),
100 GraphPattern::Slice { inner, .. } => self.optimize_pattern(inner),
101 GraphPattern::Group { inner, .. } => self.optimize_pattern(inner),
102 GraphPattern::Path {
103 subject,
104 path,
105 object,
106 ..
107 } => {
108 self.optimize_property_path(subject, path, object)
110 }
111 GraphPattern::Values { .. } => {
112 Ok(OptimizedPlan::empty())
114 }
115 }
116 }
117
118 fn optimize_bgp(&self, patterns: &[AlgebraTriplePattern]) -> Result<OptimizedPlan, OxirsError> {
120 if patterns.is_empty() {
121 return Ok(OptimizedPlan::empty());
122 }
123
124 let mut pattern_costs: Vec<(usize, PatternCost)> = patterns
126 .iter()
127 .enumerate()
128 .map(|(idx, pattern)| {
129 let cost = self.estimate_pattern_cost(pattern);
130 (idx, cost)
131 })
132 .collect();
133
134 pattern_costs.sort_by(|a, b| {
136 a.1.estimated_cardinality
137 .partial_cmp(&b.1.estimated_cardinality)
138 .unwrap_or(std::cmp::Ordering::Equal)
139 });
140
141 let optimal_order: Vec<usize> = pattern_costs.iter().map(|(idx, _)| *idx).collect();
143
144 let total_cost: f64 = pattern_costs.iter().map(|(_, cost)| cost.total_cost).sum();
146
147 Ok(OptimizedPlan {
148 join_order: optimal_order,
149 estimated_cost: total_cost,
150 estimated_cardinality: self.estimate_bgp_cardinality(patterns, &pattern_costs),
151 use_index: self.should_use_index(patterns),
152 parallel_execution: self.should_parallelize(patterns, total_cost),
153 optimizations: vec![Optimization::JoinReordering],
154 })
155 }
156
157 fn optimize_join(
159 &self,
160 left: &GraphPattern,
161 right: &GraphPattern,
162 ) -> Result<OptimizedPlan, OxirsError> {
163 let left_plan = self.optimize_pattern(left)?;
164 let right_plan = self.optimize_pattern(right)?;
165
166 let (first, second, swapped) =
168 if left_plan.estimated_cardinality < right_plan.estimated_cardinality {
169 (left_plan, right_plan, false)
170 } else {
171 (right_plan, left_plan, true)
172 };
173
174 let join_cost = self.estimate_join_cost(&first, &second);
176
177 Ok(OptimizedPlan {
178 join_order: if swapped { vec![1, 0] } else { vec![0, 1] },
179 estimated_cost: first.estimated_cost + second.estimated_cost + join_cost,
180 estimated_cardinality: self.estimate_join_cardinality(&first, &second),
181 use_index: true,
182 parallel_execution: join_cost > self.cost_config.parallel_threshold,
183 optimizations: vec![Optimization::JoinReordering, Optimization::HashJoin],
184 })
185 }
186
187 fn optimize_left_join(
189 &self,
190 left: &GraphPattern,
191 right: &GraphPattern,
192 ) -> Result<OptimizedPlan, OxirsError> {
193 let left_plan = self.optimize_pattern(left)?;
195 let right_plan = self.optimize_pattern(right)?;
196
197 let join_cost = self.estimate_join_cost(&left_plan, &right_plan);
198
199 Ok(OptimizedPlan {
200 join_order: vec![0, 1], estimated_cost: left_plan.estimated_cost + right_plan.estimated_cost + join_cost,
202 estimated_cardinality: left_plan.estimated_cardinality, use_index: true,
204 parallel_execution: false, optimizations: vec![Optimization::IndexNLJ],
206 })
207 }
208
209 fn optimize_filter(&self, inner: &GraphPattern) -> Result<OptimizedPlan, OxirsError> {
211 let mut inner_plan = self.optimize_pattern(inner)?;
212
213 inner_plan.estimated_cardinality = ((inner_plan.estimated_cardinality as f64)
215 * self.cost_config.filter_selectivity)
216 as usize;
217 inner_plan.optimizations.push(Optimization::FilterPushdown);
218
219 Ok(inner_plan)
220 }
221
222 fn optimize_union(
224 &self,
225 left: &GraphPattern,
226 right: &GraphPattern,
227 ) -> Result<OptimizedPlan, OxirsError> {
228 let left_plan = self.optimize_pattern(left)?;
229 let right_plan = self.optimize_pattern(right)?;
230
231 Ok(OptimizedPlan {
232 join_order: vec![0, 1],
233 estimated_cost: left_plan.estimated_cost + right_plan.estimated_cost,
234 estimated_cardinality: left_plan.estimated_cardinality
235 + right_plan.estimated_cardinality,
236 use_index: true,
237 parallel_execution: true, optimizations: vec![Optimization::ParallelUnion],
239 })
240 }
241
242 fn optimize_minus(
244 &self,
245 left: &GraphPattern,
246 right: &GraphPattern,
247 ) -> Result<OptimizedPlan, OxirsError> {
248 let left_plan = self.optimize_pattern(left)?;
249 let right_plan = self.optimize_pattern(right)?;
250
251 Ok(OptimizedPlan {
252 join_order: vec![0, 1],
253 estimated_cost: left_plan.estimated_cost + right_plan.estimated_cost,
254 estimated_cardinality: ((left_plan.estimated_cardinality as f64) * 0.7) as usize, use_index: true,
256 parallel_execution: false,
257 optimizations: vec![Optimization::HashAntiJoin],
258 })
259 }
260
261 fn optimize_property_path(
266 &self,
267 _subject: &TermPattern,
268 path: &crate::query::algebra::PropertyPath,
269 _object: &TermPattern,
270 ) -> Result<OptimizedPlan, OxirsError> {
271 use crate::query::algebra::PropertyPath;
272
273 let (complexity_factor, estimated_card) = self.estimate_path_complexity(path);
275
276 let base_cost = self.stats.total_triples() as f64 * self.cost_config.sequential_scan_cost;
278
279 let estimated_cost = base_cost * complexity_factor;
281
282 let parallel_execution = complexity_factor > 10.0 && estimated_card > 1000;
284
285 Ok(OptimizedPlan {
286 join_order: vec![],
287 estimated_cost,
288 estimated_cardinality: estimated_card,
289 use_index: matches!(path, PropertyPath::Predicate(_)), parallel_execution,
291 optimizations: vec![Optimization::PropertyPathEvaluation],
292 })
293 }
294
295 fn estimate_path_complexity(&self, path: &crate::query::algebra::PropertyPath) -> (f64, usize) {
301 use crate::query::algebra::PropertyPath;
302
303 match path {
304 PropertyPath::Predicate(_) => {
306 let card = (self.stats.total_triples() / 10).max(1); (1.0, card)
308 }
309
310 PropertyPath::Inverse(inner) => {
312 let (inner_complexity, inner_card) = self.estimate_path_complexity(inner);
313 (inner_complexity * 1.2, inner_card)
314 }
315
316 PropertyPath::Sequence(left, right) => {
318 let (left_complexity, left_card) = self.estimate_path_complexity(left);
319 let (right_complexity, _) = self.estimate_path_complexity(right);
320 let complexity = left_complexity * right_complexity;
321 let card = (left_card as f64 * 0.5) as usize;
323 (complexity, card.max(1))
324 }
325
326 PropertyPath::Alternative(left, right) => {
328 let (left_complexity, left_card) = self.estimate_path_complexity(left);
329 let (right_complexity, right_card) = self.estimate_path_complexity(right);
330 let complexity = (left_complexity + right_complexity) / 2.0;
331 let card = left_card + right_card;
333 (complexity, card)
334 }
335
336 PropertyPath::ZeroOrMore(inner) => {
338 let (inner_complexity, _) = self.estimate_path_complexity(inner);
339 let complexity = inner_complexity * 50.0;
341 let card = (self.stats.total_triples() as f64 * 0.3) as usize;
343 (complexity.min(100.0), card.max(1))
344 }
345
346 PropertyPath::OneOrMore(inner) => {
348 let (inner_complexity, _) = self.estimate_path_complexity(inner);
349 let complexity = inner_complexity * 30.0;
350 let card = (self.stats.total_triples() as f64 * 0.2) as usize;
351 (complexity.min(100.0), card.max(1))
352 }
353
354 PropertyPath::ZeroOrOne(inner) => {
356 let (inner_complexity, inner_card) = self.estimate_path_complexity(inner);
357 (inner_complexity * 1.5, (inner_card as f64 * 1.2) as usize)
359 }
360
361 PropertyPath::NegatedPropertySet(props) => {
363 let complexity = 2.0 + (props.len() as f64 * 0.5);
365 let card = (self.stats.total_triples() as f64 * 0.8) as usize;
367 (complexity, card.max(1))
368 }
369 }
370 }
371
372 fn estimate_pattern_cost(&self, pattern: &AlgebraTriplePattern) -> PatternCost {
375 let estimated_card =
377 if let Some(hist_card) = self.advanced_stats.estimate_cardinality(pattern) {
378 hist_card
380 } else {
381 let selectivity = self.calculate_selectivity(pattern);
383 (self.stats.total_triples() as f64 * selectivity) as usize
384 };
385
386 let io_cost = if self.can_use_index(pattern) {
388 estimated_card as f64 * self.cost_config.index_access_cost
389 } else {
390 self.stats.total_triples() as f64 * self.cost_config.sequential_scan_cost
391 };
392
393 let cpu_cost = estimated_card as f64 * self.cost_config.cpu_tuple_cost;
395
396 PatternCost {
397 estimated_cardinality: estimated_card,
398 io_cost,
399 cpu_cost,
400 total_cost: io_cost + cpu_cost,
401 }
402 }
403
404 fn calculate_selectivity(&self, pattern: &AlgebraTriplePattern) -> f64 {
405 let mut selectivity = 1.0;
406
407 match &pattern.subject {
409 TermPattern::Variable(_) => selectivity *= 0.5, _ => selectivity *= 0.01, }
412
413 match &pattern.predicate {
414 TermPattern::Variable(_) => selectivity *= 0.5,
415 _ => selectivity *= 0.1,
416 }
417
418 match &pattern.object {
419 TermPattern::Variable(_) => selectivity *= 0.5,
420 _ => selectivity *= 0.01,
421 }
422
423 selectivity
424 }
425
426 fn can_use_index(&self, pattern: &AlgebraTriplePattern) -> bool {
427 !matches!(pattern.subject, TermPattern::Variable(_))
429 || !matches!(pattern.predicate, TermPattern::Variable(_))
430 || !matches!(pattern.object, TermPattern::Variable(_))
431 }
432
433 fn estimate_bgp_cardinality(
434 &self,
435 _patterns: &[AlgebraTriplePattern],
436 pattern_costs: &[(usize, PatternCost)],
437 ) -> usize {
438 if pattern_costs.is_empty() {
439 return 0;
440 }
441
442 let mut cardinality = pattern_costs[0].1.estimated_cardinality as f64;
444
445 for (_, cost) in pattern_costs.iter().skip(1) {
446 let join_selectivity = 0.1; cardinality *= join_selectivity * (cost.estimated_cardinality as f64);
448 }
449
450 cardinality.max(1.0) as usize
451 }
452
453 fn should_use_index(&self, patterns: &[AlgebraTriplePattern]) -> bool {
454 patterns.iter().any(|p| self.can_use_index(p))
456 }
457
458 fn should_parallelize(&self, _patterns: &[AlgebraTriplePattern], total_cost: f64) -> bool {
459 total_cost > self.cost_config.parallel_threshold
460 }
461
462 fn estimate_join_cost(&self, left: &OptimizedPlan, right: &OptimizedPlan) -> f64 {
463 let build_cost = left.estimated_cardinality as f64 * self.cost_config.hash_build_cost;
465 let probe_cost = right.estimated_cardinality as f64 * self.cost_config.hash_probe_cost;
466
467 build_cost + probe_cost
468 }
469
470 fn estimate_join_cardinality(&self, left: &OptimizedPlan, right: &OptimizedPlan) -> usize {
471 let join_selectivity = self
474 .advanced_stats
475 .estimate_join_selectivity(left.estimated_cardinality, right.estimated_cardinality);
476 let product = left.estimated_cardinality as f64 * right.estimated_cardinality as f64;
477
478 (product * join_selectivity).max(1.0) as usize
479 }
480
481 pub fn stats(&self) -> OptimizerStats {
483 OptimizerStats {
484 queries_optimized: self.query_count.load(Ordering::Relaxed),
485 total_triples: self.stats.total_triples(),
486 }
487 }
488
489 pub fn advanced_stats(&self) -> crate::query::advanced_statistics::AdvancedStatistics {
494 self.advanced_stats.get_statistics()
495 }
496
497 pub fn get_pattern_history(
502 &self,
503 pattern: &AlgebraTriplePattern,
504 ) -> Vec<crate::query::advanced_statistics::PatternExecution> {
505 self.advanced_stats.get_pattern_history(pattern)
506 }
507
508 pub fn clear_statistics(&self) {
513 self.advanced_stats.clear();
514 self.query_count.store(0, Ordering::Relaxed);
515 }
516
517 pub fn update_stats(&self, pattern: &GraphPattern, actual_cardinality: usize) {
528 self.update_stats_with_time(pattern, actual_cardinality, 0);
529 }
530
531 pub fn update_stats_with_time(
536 &self,
537 pattern: &GraphPattern,
538 actual_cardinality: usize,
539 execution_time_ms: u64,
540 ) {
541 if let GraphPattern::Bgp(patterns) = pattern {
543 for triple_pattern in patterns {
544 self.advanced_stats.record_pattern_execution(
545 triple_pattern,
546 actual_cardinality,
547 execution_time_ms,
548 );
549 }
550 }
551
552 const ALPHA: f64 = 0.2;
554 let pattern_hash = self.compute_pattern_hash(pattern);
555 let mut pattern_stats = self
556 .stats
557 .pattern_stats
558 .write()
559 .expect("lock should not be poisoned");
560
561 let entry = pattern_stats
562 .entry(pattern_hash)
563 .or_insert_with(|| PatternStats {
564 execution_count: 0,
565 avg_cardinality: actual_cardinality as f64,
566 last_cardinality: actual_cardinality,
567 });
568
569 entry.avg_cardinality =
570 ALPHA * (actual_cardinality as f64) + (1.0 - ALPHA) * entry.avg_cardinality;
571 entry.last_cardinality = actual_cardinality;
572 entry.execution_count += 1;
573
574 if entry.execution_count > 5 {
576 let deviation =
577 (actual_cardinality as f64 - entry.avg_cardinality).abs() / entry.avg_cardinality;
578 if deviation > 0.5 {
579 tracing::debug!(
580 pattern_hash = pattern_hash,
581 actual = actual_cardinality,
582 avg = entry.avg_cardinality,
583 deviation_pct = deviation * 100.0,
584 "Significant cardinality deviation detected"
585 );
586 }
587 }
588 }
589
590 pub fn record_join_execution(
595 &self,
596 left_pattern: &GraphPattern,
597 right_pattern: &GraphPattern,
598 left_cardinality: usize,
599 right_cardinality: usize,
600 result_cardinality: usize,
601 ) {
602 self.advanced_stats.record_join_execution(
603 left_pattern,
604 right_pattern,
605 left_cardinality,
606 right_cardinality,
607 result_cardinality,
608 );
609 }
610
611 fn compute_pattern_hash(&self, pattern: &GraphPattern) -> u64 {
613 use std::collections::hash_map::DefaultHasher;
614 use std::hash::{Hash, Hasher};
615
616 let mut hasher = DefaultHasher::new();
617
618 match pattern {
620 GraphPattern::Bgp(patterns) => {
621 "bgp".hash(&mut hasher);
622 patterns.len().hash(&mut hasher);
623 }
624 GraphPattern::Join(_, _) => {
625 "join".hash(&mut hasher);
626 }
627 GraphPattern::LeftJoin { .. } => {
628 "leftjoin".hash(&mut hasher);
629 }
630 GraphPattern::Union(_, _) => {
631 "union".hash(&mut hasher);
632 }
633 GraphPattern::Filter { .. } => {
634 "filter".hash(&mut hasher);
635 }
636 GraphPattern::Extend { .. } => {
637 "extend".hash(&mut hasher);
638 }
639 GraphPattern::Group { .. } => {
640 "group".hash(&mut hasher);
641 }
642 GraphPattern::Service { .. } => {
643 "service".hash(&mut hasher);
644 }
645 GraphPattern::Minus(_, _) => {
646 "minus".hash(&mut hasher);
647 }
648 GraphPattern::Graph { .. } => {
649 "graph".hash(&mut hasher);
650 }
651 GraphPattern::OrderBy { .. } => {
652 "orderby".hash(&mut hasher);
653 }
654 GraphPattern::Project { .. } => {
655 "project".hash(&mut hasher);
656 }
657 GraphPattern::Distinct(_) => {
658 "distinct".hash(&mut hasher);
659 }
660 GraphPattern::Reduced(_) => {
661 "reduced".hash(&mut hasher);
662 }
663 GraphPattern::Slice { .. } => {
664 "slice".hash(&mut hasher);
665 }
666 GraphPattern::Path { .. } => {
667 "path".hash(&mut hasher);
668 }
669 GraphPattern::Values { .. } => {
670 "values".hash(&mut hasher);
671 }
672 }
673
674 hasher.finish()
675 }
676
677 pub fn get_learned_cardinality(&self, pattern: &GraphPattern) -> Option<f64> {
679 let pattern_hash = self.compute_pattern_hash(pattern);
680 let pattern_stats = self
681 .stats
682 .pattern_stats
683 .read()
684 .expect("lock should not be poisoned");
685 pattern_stats.get(&pattern_hash).map(|s| s.avg_cardinality)
686 }
687
688 pub fn to_visual_plan(&self, pattern: &GraphPattern, plan: &OptimizedPlan) -> QueryPlanNode {
711 self.pattern_to_visual_node(pattern, Some(plan))
712 }
713
714 fn pattern_to_visual_node(
716 &self,
717 pattern: &GraphPattern,
718 plan: Option<&OptimizedPlan>,
719 ) -> QueryPlanNode {
720 match pattern {
721 GraphPattern::Bgp(patterns) => {
722 let mut node =
723 QueryPlanNode::new("BGP", format!("{} triple patterns", patterns.len()))
724 .with_metadata("pattern_count", patterns.len().to_string());
725
726 if let Some(p) = plan {
727 node = node
728 .with_estimated_cardinality(p.estimated_cardinality)
729 .with_estimated_cost(p.estimated_cost);
730
731 if p.use_index {
732 node = node.with_index("SPO/POS/OSP");
733 }
734 }
735
736 for (i, idx) in plan
738 .map(|p| &p.join_order)
739 .unwrap_or(&vec![])
740 .iter()
741 .enumerate()
742 {
743 if let Some(triple_pattern) = patterns.get(*idx) {
744 let pattern_desc = format!(
745 "{} {} {}",
746 Self::term_pattern_to_string(&triple_pattern.subject),
747 Self::term_pattern_to_string(&triple_pattern.predicate),
748 Self::term_pattern_to_string(&triple_pattern.object)
749 );
750
751 let cost = self.estimate_pattern_cost(triple_pattern);
752 let mut child = QueryPlanNode::new("TriplePattern", pattern_desc)
753 .with_estimated_cardinality(cost.estimated_cardinality)
754 .with_estimated_cost(cost.total_cost)
755 .with_metadata("order", (i + 1).to_string());
756
757 if self.can_use_index(triple_pattern) {
758 child = child.with_index(self.suggest_index(triple_pattern));
759 }
760
761 node.add_child(child);
762 }
763 }
764
765 node
766 }
767 GraphPattern::Join(left, right) => {
768 let mut node = QueryPlanNode::new("Join", "Hash Join");
769
770 if let Some(p) = plan {
771 node = node
772 .with_estimated_cardinality(p.estimated_cardinality)
773 .with_estimated_cost(p.estimated_cost);
774
775 if p.parallel_execution {
776 node = node.with_metadata("execution", "parallel");
777 }
778 }
779
780 let left_plan = self.optimize_pattern(left).ok();
782 let right_plan = self.optimize_pattern(right).ok();
783
784 let swapped = plan.map(|p| p.join_order == vec![1, 0]).unwrap_or(false);
785
786 if swapped {
787 node.add_child(self.pattern_to_visual_node(right, right_plan.as_ref()));
788 node.add_child(self.pattern_to_visual_node(left, left_plan.as_ref()));
789 } else {
790 node.add_child(self.pattern_to_visual_node(left, left_plan.as_ref()));
791 node.add_child(self.pattern_to_visual_node(right, right_plan.as_ref()));
792 }
793
794 node
795 }
796 GraphPattern::LeftJoin { left, right, .. } => {
797 let mut node = QueryPlanNode::new("LeftJoin", "Optional Pattern");
798
799 if let Some(p) = plan {
800 node = node
801 .with_estimated_cardinality(p.estimated_cardinality)
802 .with_estimated_cost(p.estimated_cost);
803 }
804
805 let left_plan = self.optimize_pattern(left).ok();
806 let right_plan = self.optimize_pattern(right).ok();
807
808 node.add_child(self.pattern_to_visual_node(left, left_plan.as_ref()));
809 node.add_child(self.pattern_to_visual_node(right, right_plan.as_ref()));
810
811 node
812 }
813 GraphPattern::Filter { expr: _, inner } => {
814 let inner_plan = self.optimize_pattern(inner).ok();
815 let mut node = QueryPlanNode::new("Filter", "Filter expression");
816
817 if let Some(p) = plan {
818 node = node
819 .with_estimated_cardinality(p.estimated_cardinality)
820 .with_estimated_cost(p.estimated_cost)
821 .with_metadata(
822 "selectivity",
823 format!("{:.2}", self.cost_config.filter_selectivity),
824 );
825 }
826
827 node.add_child(self.pattern_to_visual_node(inner, inner_plan.as_ref()));
828 node
829 }
830 GraphPattern::Union(left, right) => {
831 let mut node = QueryPlanNode::new("Union", "Parallel Union");
832
833 if let Some(p) = plan {
834 node = node
835 .with_estimated_cardinality(p.estimated_cardinality)
836 .with_estimated_cost(p.estimated_cost)
837 .with_metadata("execution", "parallel");
838 }
839
840 let left_plan = self.optimize_pattern(left).ok();
841 let right_plan = self.optimize_pattern(right).ok();
842
843 node.add_child(self.pattern_to_visual_node(left, left_plan.as_ref()));
844 node.add_child(self.pattern_to_visual_node(right, right_plan.as_ref()));
845
846 node
847 }
848 GraphPattern::Minus(left, right) => {
849 let mut node = QueryPlanNode::new("Minus", "Hash Anti-Join");
850
851 if let Some(p) = plan {
852 node = node
853 .with_estimated_cardinality(p.estimated_cardinality)
854 .with_estimated_cost(p.estimated_cost);
855 }
856
857 let left_plan = self.optimize_pattern(left).ok();
858 let right_plan = self.optimize_pattern(right).ok();
859
860 node.add_child(self.pattern_to_visual_node(left, left_plan.as_ref()));
861 node.add_child(self.pattern_to_visual_node(right, right_plan.as_ref()));
862
863 node
864 }
865 GraphPattern::Extend { inner, .. } => {
866 let inner_plan = self.optimize_pattern(inner).ok();
867 let mut node = QueryPlanNode::new("Extend", "Variable binding");
868
869 if let Some(p) = plan {
870 node = node.with_estimated_cardinality(p.estimated_cardinality);
871 }
872
873 node.add_child(self.pattern_to_visual_node(inner, inner_plan.as_ref()));
874 node
875 }
876 GraphPattern::Service { .. } => QueryPlanNode::new("Service", "Federated query")
877 .with_metadata("type", "remote")
878 .with_metadata("note", "actual_cardinality_depends_on_remote_endpoint"),
879 GraphPattern::Graph { inner, .. } => {
880 let inner_plan = self.optimize_pattern(inner).ok();
881 let mut node = QueryPlanNode::new("Graph", "Named graph access");
882
883 if let Some(p) = plan {
884 node = node.with_estimated_cardinality(p.estimated_cardinality);
885 }
886
887 node.add_child(self.pattern_to_visual_node(inner, inner_plan.as_ref()));
888 node
889 }
890 GraphPattern::OrderBy { inner, .. } => {
891 let inner_plan = self.optimize_pattern(inner).ok();
892 let mut node = QueryPlanNode::new("OrderBy", "Sort operation");
893
894 if let Some(p) = plan {
895 node = node.with_estimated_cardinality(p.estimated_cardinality);
896 }
897
898 node.add_child(self.pattern_to_visual_node(inner, inner_plan.as_ref()));
899 node
900 }
901 GraphPattern::Project { inner, variables } => {
902 let inner_plan = self.optimize_pattern(inner).ok();
903 let mut node =
904 QueryPlanNode::new("Project", format!("{} variables", variables.len()));
905
906 if let Some(p) = plan {
907 node = node.with_estimated_cardinality(p.estimated_cardinality);
908 }
909
910 node.add_child(self.pattern_to_visual_node(inner, inner_plan.as_ref()));
911 node
912 }
913 GraphPattern::Distinct(inner) => {
914 let inner_plan = self.optimize_pattern(inner).ok();
915 let mut node = QueryPlanNode::new("Distinct", "Remove duplicates");
916
917 if let Some(p) = plan {
918 node = node.with_estimated_cardinality(p.estimated_cardinality);
919 }
920
921 node.add_child(self.pattern_to_visual_node(inner, inner_plan.as_ref()));
922 node
923 }
924 GraphPattern::Reduced(inner) => {
925 let inner_plan = self.optimize_pattern(inner).ok();
926 let mut node = QueryPlanNode::new("Reduced", "Best-effort deduplication");
927
928 if let Some(p) = plan {
929 node = node.with_estimated_cardinality(p.estimated_cardinality);
930 }
931
932 node.add_child(self.pattern_to_visual_node(inner, inner_plan.as_ref()));
933 node
934 }
935 GraphPattern::Slice {
936 inner,
937 offset,
938 limit,
939 } => {
940 let inner_plan = self.optimize_pattern(inner).ok();
941 let limit_str = limit
942 .map(|l| l.to_string())
943 .unwrap_or_else(|| "∞".to_string());
944 let mut node =
945 QueryPlanNode::new("Slice", format!("LIMIT {} OFFSET {}", limit_str, offset));
946
947 if let Some(p) = plan {
948 let limited_card = if let Some(lim) = limit {
949 (*lim).min(p.estimated_cardinality.saturating_sub(*offset))
950 } else {
951 p.estimated_cardinality.saturating_sub(*offset)
952 };
953 node = node.with_estimated_cardinality(limited_card);
954 }
955
956 node.add_child(self.pattern_to_visual_node(inner, inner_plan.as_ref()));
957 node
958 }
959 GraphPattern::Group { inner, .. } => {
960 let inner_plan = self.optimize_pattern(inner).ok();
961 let mut node = QueryPlanNode::new("Group", "GROUP BY aggregation");
962
963 if let Some(p) = plan {
964 node = node.with_estimated_cardinality(p.estimated_cardinality);
965 }
966
967 node.add_child(self.pattern_to_visual_node(inner, inner_plan.as_ref()));
968 node
969 }
970 GraphPattern::Path { .. } => {
971 QueryPlanNode::new("PropertyPath", "Property path traversal")
972 .with_metadata("note", "cardinality_depends_on_path_length")
973 }
974 GraphPattern::Values { bindings, .. } => {
975 QueryPlanNode::new("Values", format!("{} bindings", bindings.len()))
976 .with_estimated_cardinality(bindings.len())
977 .with_estimated_cost(0.0)
978 }
979 }
980 }
981
982 fn term_pattern_to_string(term: &TermPattern) -> String {
984 match term {
985 TermPattern::Variable(v) => format!("?{}", v),
986 TermPattern::NamedNode(n) => {
987 let uri = n.as_str();
989 if let Some(local) = uri.rsplit('/').next() {
990 local.to_string()
991 } else {
992 uri.to_string()
993 }
994 }
995 TermPattern::BlankNode(b) => format!("_:{}", b),
996 TermPattern::Literal(l) => {
997 let value = l.value();
998 if value.len() > 20 {
999 format!("\"{}...\"", &value[..17])
1000 } else {
1001 format!("\"{}\"", value)
1002 }
1003 }
1004 TermPattern::QuotedTriple(t) => format!(
1005 "<<{} {} {}>>",
1006 Self::term_pattern_to_string(&t.subject),
1007 Self::term_pattern_to_string(&t.predicate),
1008 Self::term_pattern_to_string(&t.object)
1009 ),
1010 }
1011 }
1012
1013 fn suggest_index(&self, pattern: &AlgebraTriplePattern) -> String {
1015 let s_bound = !matches!(pattern.subject, TermPattern::Variable(_));
1016 let p_bound = !matches!(pattern.predicate, TermPattern::Variable(_));
1017 let o_bound = !matches!(pattern.object, TermPattern::Variable(_));
1018
1019 match (s_bound, p_bound, o_bound) {
1020 (true, _, _) => "SPO",
1021 (false, true, _) => "POS",
1022 (false, false, true) => "OSP",
1023 _ => "FullScan",
1024 }
1025 .to_string()
1026 }
1027}
1028
1029impl Default for CostBasedOptimizer {
1030 fn default() -> Self {
1031 Self::new()
1032 }
1033}
1034
1035struct StatisticsCollector {
1037 total_triples: AtomicU64,
1039 #[allow(dead_code)]
1041 predicate_stats: HashMap<String, PredicateStats>,
1042 pattern_stats: Arc<RwLock<HashMap<u64, PatternStats>>>,
1044}
1045
1046impl StatisticsCollector {
1047 fn new() -> Self {
1048 Self {
1049 total_triples: AtomicU64::new(1_000_000), predicate_stats: HashMap::new(),
1051 pattern_stats: Arc::new(RwLock::new(HashMap::new())),
1052 }
1053 }
1054
1055 fn total_triples(&self) -> usize {
1056 self.total_triples.load(Ordering::Relaxed) as usize
1057 }
1058}
1059
1060#[derive(Debug, Clone)]
1062struct PatternStats {
1063 execution_count: usize,
1065 avg_cardinality: f64,
1067 last_cardinality: usize,
1069}
1070
1071#[derive(Debug, Clone)]
1073#[allow(dead_code)]
1074struct PredicateStats {
1075 count: usize,
1077 avg_objects_per_subject: f64,
1079 distinct_subjects: usize,
1081 distinct_objects: usize,
1083}
1084
1085#[derive(Debug, Clone)]
1087pub struct CostConfiguration {
1088 pub sequential_scan_cost: f64,
1090 pub index_access_cost: f64,
1092 pub cpu_tuple_cost: f64,
1094 pub hash_build_cost: f64,
1096 pub hash_probe_cost: f64,
1098 pub filter_selectivity: f64,
1100 pub parallel_threshold: f64,
1102}
1103
1104impl Default for CostConfiguration {
1105 fn default() -> Self {
1106 Self {
1107 sequential_scan_cost: 1.0,
1108 index_access_cost: 0.01,
1109 cpu_tuple_cost: 0.001,
1110 hash_build_cost: 0.005,
1111 hash_probe_cost: 0.002,
1112 filter_selectivity: 0.3,
1113 parallel_threshold: 10000.0,
1114 }
1115 }
1116}
1117
1118#[derive(Debug, Clone)]
1120struct PatternCost {
1121 estimated_cardinality: usize,
1123 #[allow(dead_code)]
1125 io_cost: f64,
1126 #[allow(dead_code)]
1128 cpu_cost: f64,
1129 total_cost: f64,
1131}
1132
1133#[derive(Debug, Clone)]
1135pub struct OptimizedPlan {
1136 pub join_order: Vec<usize>,
1138 pub estimated_cost: f64,
1140 pub estimated_cardinality: usize,
1142 pub use_index: bool,
1144 pub parallel_execution: bool,
1146 pub optimizations: Vec<Optimization>,
1148}
1149
1150impl OptimizedPlan {
1151 fn empty() -> Self {
1153 Self {
1154 join_order: vec![],
1155 estimated_cost: 0.0,
1156 estimated_cardinality: 0,
1157 use_index: false,
1158 parallel_execution: false,
1159 optimizations: vec![],
1160 }
1161 }
1162}
1163
1164#[derive(Debug, Clone, PartialEq, Eq)]
1166pub enum Optimization {
1167 JoinReordering,
1169 HashJoin,
1171 FilterPushdown,
1173 IndexAccess,
1175 IndexNLJ,
1177 ParallelUnion,
1179 HashAntiJoin,
1181 PropertyPathEvaluation,
1183}
1184
1185#[derive(Debug, Clone)]
1187pub struct OptimizerStats {
1188 pub queries_optimized: u64,
1190 pub total_triples: usize,
1192}
1193
1194#[cfg(test)]
1195mod tests {
1196 use super::*;
1197 use crate::model::{NamedNode, Variable};
1198 use crate::query::algebra::TermPattern;
1199
1200 fn create_test_pattern(
1201 subject: TermPattern,
1202 predicate: TermPattern,
1203 object: TermPattern,
1204 ) -> AlgebraTriplePattern {
1205 AlgebraTriplePattern {
1206 subject,
1207 predicate,
1208 object,
1209 }
1210 }
1211
1212 #[test]
1213 fn test_optimizer_creation() {
1214 let optimizer = CostBasedOptimizer::new();
1215 let stats = optimizer.stats();
1216
1217 assert_eq!(stats.queries_optimized, 0);
1218 assert!(stats.total_triples > 0);
1219 }
1220
1221 #[test]
1222 fn test_selectivity_calculation() {
1223 let optimizer = CostBasedOptimizer::new();
1224
1225 let pattern1 = create_test_pattern(
1227 TermPattern::Variable(Variable::new("s").expect("valid variable name")),
1228 TermPattern::Variable(Variable::new("p").expect("valid variable name")),
1229 TermPattern::Variable(Variable::new("o").expect("valid variable name")),
1230 );
1231
1232 let cost1 = optimizer.estimate_pattern_cost(&pattern1);
1233 assert!(cost1.estimated_cardinality > 0);
1234
1235 let pattern2 = create_test_pattern(
1237 TermPattern::NamedNode(NamedNode::new("http://example.org/s").expect("valid IRI")),
1238 TermPattern::NamedNode(NamedNode::new("http://example.org/p").expect("valid IRI")),
1239 TermPattern::NamedNode(NamedNode::new("http://example.org/o").expect("valid IRI")),
1240 );
1241
1242 let cost2 = optimizer.estimate_pattern_cost(&pattern2);
1243
1244 assert!(cost2.estimated_cardinality < cost1.estimated_cardinality);
1246 }
1247
1248 #[test]
1249 fn test_cost_configuration() {
1250 let config = CostConfiguration::default();
1251
1252 assert!(config.index_access_cost < config.sequential_scan_cost);
1253 assert!(config.cpu_tuple_cost < config.index_access_cost);
1254 }
1255
1256 #[test]
1257 fn test_can_use_index() {
1258 let optimizer = CostBasedOptimizer::new();
1259
1260 let pattern1 = create_test_pattern(
1262 TermPattern::NamedNode(NamedNode::new("http://example.org/s").expect("valid IRI")),
1263 TermPattern::Variable(Variable::new("p").expect("valid variable name")),
1264 TermPattern::Variable(Variable::new("o").expect("valid variable name")),
1265 );
1266
1267 assert!(optimizer.can_use_index(&pattern1));
1268
1269 let pattern2 = create_test_pattern(
1271 TermPattern::Variable(Variable::new("s").expect("valid variable name")),
1272 TermPattern::Variable(Variable::new("p").expect("valid variable name")),
1273 TermPattern::Variable(Variable::new("o").expect("valid variable name")),
1274 );
1275
1276 assert!(!optimizer.can_use_index(&pattern2));
1277 }
1278
1279 #[test]
1280 fn test_empty_plan() {
1281 let plan = OptimizedPlan::empty();
1282
1283 assert_eq!(plan.join_order.len(), 0);
1284 assert_eq!(plan.estimated_cost, 0.0);
1285 assert_eq!(plan.estimated_cardinality, 0);
1286 }
1287}