1#![allow(dead_code)]
7
8use crate::indexing::IndexStats as BaseIndexStats;
9use crate::model::pattern::{
10 ObjectPattern, PredicatePattern, SubjectPattern, TriplePattern as ModelTriplePattern,
11};
12use crate::model::*;
13use crate::query::algebra::{AlgebraTriplePattern, TermPattern as AlgebraTermPattern};
14use crate::store::IndexedGraph;
15use crate::OxirsError;
16use std::collections::{HashMap, HashSet};
17use std::sync::Arc;
18
19#[derive(Debug, Default)]
21pub struct IndexStats {
22 base_stats: Arc<BaseIndexStats>,
24 pub predicate_counts: std::sync::RwLock<HashMap<String, usize>>,
26 pub total_triples: std::sync::atomic::AtomicUsize,
28 pub subject_cardinality: std::sync::RwLock<HashMap<String, usize>>,
30 pub object_cardinality: std::sync::RwLock<HashMap<String, usize>>,
32 pub join_selectivity_cache: std::sync::RwLock<HashMap<String, f64>>,
34}
35
36impl IndexStats {
37 pub fn new() -> Self {
39 Self {
40 base_stats: Arc::new(BaseIndexStats::new()),
41 predicate_counts: std::sync::RwLock::new(HashMap::new()),
42 total_triples: std::sync::atomic::AtomicUsize::new(0),
43 subject_cardinality: std::sync::RwLock::new(HashMap::new()),
44 object_cardinality: std::sync::RwLock::new(HashMap::new()),
45 join_selectivity_cache: std::sync::RwLock::new(HashMap::new()),
46 }
47 }
48
49 pub fn update_predicate_count(&self, predicate: &str, count: usize) {
51 if let Ok(mut counts) = self.predicate_counts.write() {
52 counts.insert(predicate.to_string(), count);
53 }
54 }
55
56 pub fn set_total_triples(&self, count: usize) {
58 self.total_triples
59 .store(count, std::sync::atomic::Ordering::Relaxed);
60 }
61
62 pub fn update_subject_cardinality(&self, predicate: &str, cardinality: usize) {
64 if let Ok(mut card) = self.subject_cardinality.write() {
65 card.insert(predicate.to_string(), cardinality);
66 }
67 }
68
69 pub fn update_object_cardinality(&self, predicate: &str, cardinality: usize) {
71 if let Ok(mut card) = self.object_cardinality.write() {
72 card.insert(predicate.to_string(), cardinality);
73 }
74 }
75
76 pub fn cache_join_selectivity(&self, pattern_pair: &str, selectivity: f64) {
78 if let Ok(mut cache) = self.join_selectivity_cache.write() {
79 cache.insert(pattern_pair.to_string(), selectivity);
80 }
81 }
82
83 pub fn get_join_selectivity(&self, pattern_pair: &str) -> Option<f64> {
85 match self.join_selectivity_cache.read() {
86 Ok(cache) => cache.get(pattern_pair).copied(),
87 _ => None,
88 }
89 }
90}
91
92#[derive(Debug)]
94pub struct PatternOptimizer {
95 index_stats: Arc<IndexStats>,
97 available_indexes: Vec<IndexType>,
99}
100
101#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
103pub enum IndexType {
104 SPO,
106 POS,
108 OSP,
110 SOP,
112 PSO,
114 OPS,
116}
117
118#[derive(Debug, Clone, Copy, PartialEq, Eq)]
120enum VarPosition {
121 Subject,
122 Predicate,
123 Object,
124}
125
126#[derive(Debug, Clone)]
128pub enum FilterExpression {
129 Equals(Variable, Term),
131 LessThan(Variable, Term),
133 GreaterThan(Variable, Term),
135 Regex(Variable, String),
137 In(Variable, Vec<Term>),
139 And(Box<FilterExpression>, Box<FilterExpression>),
141 Or(Box<FilterExpression>, Box<FilterExpression>),
143 Not(Box<FilterExpression>),
145}
146
147#[derive(Debug, Clone)]
149pub struct PatternStrategy {
150 pub index_type: IndexType,
152 pub estimated_cost: f64,
154 pub selectivity: f64,
156 pub bound_vars: HashSet<Variable>,
158 pub pushdown_filters: Vec<FilterExpression>,
160}
161
162#[derive(Debug, Clone)]
164pub struct OptimizedPatternPlan {
165 pub patterns: Vec<(AlgebraTriplePattern, PatternStrategy)>,
167 pub total_cost: f64,
169 pub binding_order: Vec<HashSet<Variable>>,
171}
172
173impl PatternOptimizer {
174 pub fn new(index_stats: Arc<IndexStats>) -> Self {
176 Self {
177 index_stats,
178 available_indexes: vec![IndexType::SPO, IndexType::POS, IndexType::OSP],
179 }
180 }
181
182 pub fn optimize_patterns(
184 &self,
185 patterns: &[AlgebraTriplePattern],
186 ) -> Result<OptimizedPatternPlan, OxirsError> {
187 if patterns.is_empty() {
188 return Ok(OptimizedPatternPlan {
189 patterns: Vec::new(),
190 total_cost: 0.0,
191 binding_order: Vec::new(),
192 });
193 }
194
195 let mut pattern_strategies: Vec<(usize, Vec<PatternStrategy>)> = patterns
197 .iter()
198 .enumerate()
199 .map(|(idx, pattern)| {
200 let strategies = self.analyze_pattern(pattern);
201 (idx, strategies)
202 })
203 .collect();
204
205 pattern_strategies.sort_by(|a, b| {
207 let best_a =
208 a.1.iter()
209 .map(|s| s.selectivity)
210 .min_by(|x, y| x.partial_cmp(y).unwrap_or(std::cmp::Ordering::Equal))
211 .unwrap_or(1.0);
212 let best_b =
213 b.1.iter()
214 .map(|s| s.selectivity)
215 .min_by(|x, y| x.partial_cmp(y).unwrap_or(std::cmp::Ordering::Equal))
216 .unwrap_or(1.0);
217 best_a
218 .partial_cmp(&best_b)
219 .unwrap_or(std::cmp::Ordering::Equal)
220 });
221
222 let mut ordered_patterns = Vec::new();
224 let mut bound_vars = HashSet::new();
225 let mut binding_order = Vec::new();
226 let mut total_cost = 0.0;
227
228 let (first_idx, first_strategies) = &pattern_strategies[0];
230 let first_pattern = &patterns[*first_idx];
231 let first_strategy = self.select_best_strategy(first_strategies, &bound_vars)?;
232
233 ordered_patterns.push((first_pattern.clone(), first_strategy.clone()));
234 bound_vars.extend(first_strategy.bound_vars.clone());
235 binding_order.push(bound_vars.clone());
236 total_cost += first_strategy.estimated_cost;
237
238 let mut remaining: Vec<_> = pattern_strategies[1..].to_vec();
240
241 while !remaining.is_empty() {
242 let (best_pos, best_idx, best_strategy) = remaining
244 .iter()
245 .enumerate()
246 .map(|(pos, (idx, strategies))| {
247 let pattern = &patterns[*idx];
248 let strategy =
249 self.select_strategy_with_bindings(pattern, strategies, &bound_vars);
250 (pos, idx, strategy)
251 })
252 .min_by(|(_, _, a), (_, _, b)| {
253 a.estimated_cost
254 .partial_cmp(&b.estimated_cost)
255 .unwrap_or(std::cmp::Ordering::Equal)
256 })
257 .ok_or_else(|| OxirsError::Query("Failed to find next pattern".to_string()))?;
258
259 let pattern = &patterns[*best_idx];
260 ordered_patterns.push((pattern.clone(), best_strategy.clone()));
261 bound_vars.extend(best_strategy.bound_vars.clone());
262 binding_order.push(bound_vars.clone());
263 total_cost += best_strategy.estimated_cost;
264
265 remaining.remove(best_pos);
266 }
267
268 Ok(OptimizedPatternPlan {
269 patterns: ordered_patterns,
270 total_cost,
271 binding_order,
272 })
273 }
274
275 pub fn analyze_pattern(&self, pattern: &AlgebraTriplePattern) -> Vec<PatternStrategy> {
277 let mut strategies = Vec::new();
278
279 let s_bound = !matches!(pattern.subject, AlgebraTermPattern::Variable(_));
281 let p_bound = !matches!(pattern.predicate, AlgebraTermPattern::Variable(_));
282 let o_bound = !matches!(pattern.object, AlgebraTermPattern::Variable(_));
283
284 for &index_type in &self.available_indexes {
286 let (cost, selectivity) =
287 self.estimate_index_cost(index_type, s_bound, p_bound, o_bound, pattern);
288
289 let bound_vars = self.extract_bound_vars(pattern);
290
291 strategies.push(PatternStrategy {
292 index_type,
293 estimated_cost: cost,
294 selectivity,
295 bound_vars,
296 pushdown_filters: Vec::new(), });
298 }
299
300 strategies
301 }
302
303 fn estimate_index_cost(
305 &self,
306 index_type: IndexType,
307 s_bound: bool,
308 p_bound: bool,
309 o_bound: bool,
310 pattern: &AlgebraTriplePattern,
311 ) -> (f64, f64) {
312 let base_cost = match index_type {
314 IndexType::SPO => {
315 if s_bound && p_bound && o_bound {
316 1.0 } else if s_bound && p_bound {
318 10.0 } else if s_bound {
320 100.0 } else {
322 10000.0 }
324 }
325 IndexType::POS => {
326 if p_bound && o_bound && s_bound {
327 1.0
328 } else if p_bound && o_bound {
329 10.0
330 } else if p_bound {
331 100.0
332 } else {
333 10000.0
334 }
335 }
336 IndexType::OSP => {
337 if o_bound && s_bound && p_bound {
338 1.0
339 } else if o_bound && s_bound {
340 10.0
341 } else if o_bound {
342 100.0
343 } else {
344 10000.0
345 }
346 }
347 _ => 10000.0, };
349
350 let selectivity = self.estimate_selectivity(pattern);
352 let adjusted_cost = base_cost * selectivity;
353
354 (adjusted_cost, selectivity)
355 }
356
357 fn estimate_selectivity(&self, pattern: &AlgebraTriplePattern) -> f64 {
359 let mut selectivity: f64 = 1.0;
361 let total_triples = self
362 .index_stats
363 .total_triples
364 .load(std::sync::atomic::Ordering::Relaxed) as f64;
365
366 if total_triples == 0.0 {
367 return 0.001; }
369
370 match &pattern.subject {
372 AlgebraTermPattern::NamedNode(_) | AlgebraTermPattern::BlankNode(_) => {
373 selectivity *= 0.001;
375 }
376 AlgebraTermPattern::Variable(_) => {
377 if let AlgebraTermPattern::NamedNode(pred) = &pattern.predicate {
379 if let Ok(card) = self.index_stats.subject_cardinality.read() {
380 if let Some(subj_card) = card.get(pred.as_str()) {
381 selectivity *= (*subj_card as f64) / total_triples;
382 }
383 }
384 }
385 }
386 _ => {}
387 }
388
389 match &pattern.predicate {
391 AlgebraTermPattern::NamedNode(pred) => {
392 if let Ok(counts) = self.index_stats.predicate_counts.read() {
393 if let Some(pred_count) = counts.get(pred.as_str()) {
394 selectivity *= (*pred_count as f64) / total_triples;
395 } else {
396 selectivity *= 0.001;
398 }
399 }
400 }
401 AlgebraTermPattern::Variable(_) => {
402 selectivity *= 0.5;
404 }
405 _ => {}
406 }
407
408 match &pattern.object {
410 AlgebraTermPattern::Literal(_) => {
411 selectivity *= 0.01;
413 }
414 AlgebraTermPattern::NamedNode(_) => {
415 selectivity *= 0.1;
417 }
418 AlgebraTermPattern::BlankNode(_) => {
419 selectivity *= 0.1;
421 }
422 AlgebraTermPattern::Variable(_) => {
423 if let AlgebraTermPattern::NamedNode(pred) = &pattern.predicate {
425 if let Ok(card) = self.index_stats.object_cardinality.read() {
426 if let Some(obj_card) = card.get(pred.as_str()) {
427 selectivity *= (*obj_card as f64) / total_triples;
428 }
429 }
430 }
431 }
432 AlgebraTermPattern::QuotedTriple(_) => {
433 selectivity *= 0.1;
435 }
436 }
437
438 selectivity.clamp(0.00001, 1.0)
440 }
441
442 fn extract_bound_vars(&self, pattern: &AlgebraTriplePattern) -> HashSet<Variable> {
444 let mut vars = HashSet::new();
445
446 if let AlgebraTermPattern::Variable(v) = &pattern.subject {
447 vars.insert(v.clone());
448 }
449 if let AlgebraTermPattern::Variable(v) = &pattern.predicate {
450 vars.insert(v.clone());
451 }
452 if let AlgebraTermPattern::Variable(v) = &pattern.object {
453 vars.insert(v.clone());
454 }
455
456 vars
457 }
458
459 fn select_best_strategy(
461 &self,
462 strategies: &[PatternStrategy],
463 _bound_vars: &HashSet<Variable>,
464 ) -> Result<PatternStrategy, OxirsError> {
465 strategies
466 .iter()
467 .min_by(|a, b| {
468 a.estimated_cost
469 .partial_cmp(&b.estimated_cost)
470 .unwrap_or(std::cmp::Ordering::Equal)
471 })
472 .cloned()
473 .ok_or_else(|| OxirsError::Query("No valid strategy found".to_string()))
474 }
475
476 fn select_strategy_with_bindings(
478 &self,
479 pattern: &AlgebraTriplePattern,
480 strategies: &[PatternStrategy],
481 bound_vars: &HashSet<Variable>,
482 ) -> PatternStrategy {
483 let s_bound = match &pattern.subject {
485 AlgebraTermPattern::Variable(v) => bound_vars.contains(v),
486 _ => true,
487 };
488 let p_bound = match &pattern.predicate {
489 AlgebraTermPattern::Variable(v) => bound_vars.contains(v),
490 _ => true,
491 };
492 let o_bound = match &pattern.object {
493 AlgebraTermPattern::Variable(v) => bound_vars.contains(v),
494 _ => true,
495 };
496
497 let mut best_strategy = strategies[0].clone();
499 let mut best_cost = f64::MAX;
500
501 for strategy in strategies {
502 let (adjusted_cost, _) =
503 self.estimate_index_cost(strategy.index_type, s_bound, p_bound, o_bound, pattern);
504
505 if adjusted_cost < best_cost {
506 best_cost = adjusted_cost;
507 best_strategy = strategy.clone();
508 best_strategy.estimated_cost = adjusted_cost;
509 }
510 }
511
512 best_strategy
513 }
514
515 pub fn estimate_join_selectivity(
517 &self,
518 left: &AlgebraTriplePattern,
519 right: &AlgebraTriplePattern,
520 ) -> f64 {
521 let cache_key = format!("{left:?}|{right:?}");
523
524 if let Some(cached) = self.index_stats.get_join_selectivity(&cache_key) {
526 return cached;
527 }
528
529 let left_vars = self.extract_bound_vars(left);
531 let right_vars = self.extract_bound_vars(right);
532 let common_vars: HashSet<_> = left_vars.intersection(&right_vars).cloned().collect();
533
534 let selectivity = if common_vars.is_empty() {
535 1.0
537 } else {
538 let mut join_selectivity = 1.0;
540
541 for var in common_vars.iter() {
542 let var_selectivity = self.estimate_variable_join_selectivity(var, left, right);
544 join_selectivity *= var_selectivity;
545 }
546
547 if common_vars.len() > 1 {
549 join_selectivity *= 0.1_f64.powf(common_vars.len() as f64 - 1.0);
550 }
551
552 join_selectivity
553 };
554
555 self.index_stats
557 .cache_join_selectivity(&cache_key, selectivity);
558
559 selectivity.clamp(0.00001, 1.0)
560 }
561
562 fn estimate_variable_join_selectivity(
564 &self,
565 var: &Variable,
566 left: &AlgebraTriplePattern,
567 right: &AlgebraTriplePattern,
568 ) -> f64 {
569 let left_pos = self.find_variable_position(var, left);
571 let right_pos = self.find_variable_position(var, right);
572
573 match (left_pos, right_pos) {
574 (Some(pos1), Some(pos2)) => {
575 match (pos1, pos2) {
577 (VarPosition::Subject, VarPosition::Subject) => 0.01, (VarPosition::Subject, VarPosition::Object) => 0.1, (VarPosition::Object, VarPosition::Subject) => 0.1, (VarPosition::Object, VarPosition::Object) => 0.2, (VarPosition::Predicate, _) | (_, VarPosition::Predicate) => 0.05, }
583 }
584 _ => 1.0, }
586 }
587
588 fn find_variable_position(
590 &self,
591 var: &Variable,
592 pattern: &AlgebraTriplePattern,
593 ) -> Option<VarPosition> {
594 if let AlgebraTermPattern::Variable(v) = &pattern.subject {
595 if v == var {
596 return Some(VarPosition::Subject);
597 }
598 }
599 if let AlgebraTermPattern::Variable(v) = &pattern.predicate {
600 if v == var {
601 return Some(VarPosition::Predicate);
602 }
603 }
604 if let AlgebraTermPattern::Variable(v) = &pattern.object {
605 if v == var {
606 return Some(VarPosition::Object);
607 }
608 }
609 None
610 }
611
612 pub fn optimize_filters(
614 &self,
615 patterns: &[AlgebraTriplePattern],
616 filters: &[FilterExpression],
617 ) -> Vec<(usize, Vec<FilterExpression>)> {
618 let mut pushdown_map = Vec::new();
619
620 for (pattern_idx, pattern) in patterns.iter().enumerate() {
621 let mut pattern_filters = Vec::new();
622
623 for filter in filters {
625 if self.can_pushdown_filter(filter, pattern) {
626 pattern_filters.push(filter.clone());
627 }
628 }
629
630 if !pattern_filters.is_empty() {
631 pushdown_map.push((pattern_idx, pattern_filters));
632 }
633 }
634
635 pushdown_map
636 }
637
638 fn can_pushdown_filter(
640 &self,
641 filter: &FilterExpression,
642 pattern: &AlgebraTriplePattern,
643 ) -> bool {
644 match filter {
645 FilterExpression::Equals(var, _)
646 | FilterExpression::LessThan(var, _)
647 | FilterExpression::GreaterThan(var, _)
648 | FilterExpression::Regex(var, _)
649 | FilterExpression::In(var, _) => {
650 self.pattern_binds_variable(var, pattern)
652 }
653 FilterExpression::And(left, right) => {
654 self.can_pushdown_filter(left, pattern) || self.can_pushdown_filter(right, pattern)
656 }
657 FilterExpression::Or(left, right) => {
658 self.can_pushdown_filter(left, pattern) && self.can_pushdown_filter(right, pattern)
660 }
661 FilterExpression::Not(inner) => self.can_pushdown_filter(inner, pattern),
662 }
663 }
664
665 fn pattern_binds_variable(&self, var: &Variable, pattern: &AlgebraTriplePattern) -> bool {
667 matches!(&pattern.subject, AlgebraTermPattern::Variable(v) if v == var)
668 || matches!(&pattern.predicate, AlgebraTermPattern::Variable(v) if v == var)
669 || matches!(&pattern.object, AlgebraTermPattern::Variable(v) if v == var)
670 }
671
672 #[allow(clippy::only_used_in_recursion)]
674 pub fn estimate_filter_selectivity(&self, filter: &FilterExpression) -> f64 {
675 match filter {
676 FilterExpression::Equals(_, _) => 0.1, FilterExpression::LessThan(_, _) => 0.3, FilterExpression::GreaterThan(_, _) => 0.3,
679 FilterExpression::Regex(_, _) => 0.2, FilterExpression::In(_, values) => {
681 (values.len() as f64 * 0.1).min(0.9)
683 }
684 FilterExpression::And(left, right) => {
685 self.estimate_filter_selectivity(left) * self.estimate_filter_selectivity(right)
687 }
688 FilterExpression::Or(left, right) => {
689 let left_sel = self.estimate_filter_selectivity(left);
691 let right_sel = self.estimate_filter_selectivity(right);
692 left_sel + right_sel - (left_sel * right_sel)
693 }
694 FilterExpression::Not(inner) => {
695 1.0 - self.estimate_filter_selectivity(inner)
697 }
698 }
699 }
700
701 pub fn get_optimal_index(
703 &self,
704 pattern: &ModelTriplePattern,
705 bound_vars: &HashSet<Variable>,
706 ) -> IndexType {
707 let s_bound = pattern.subject.as_ref().is_some_and(|s| match s {
709 SubjectPattern::Variable(v) => bound_vars.contains(v),
710 _ => true,
711 });
712
713 let p_bound = pattern.predicate.as_ref().is_some_and(|p| match p {
714 PredicatePattern::Variable(v) => bound_vars.contains(v),
715 _ => true,
716 });
717
718 let o_bound = pattern.object.as_ref().is_some_and(|o| match o {
719 ObjectPattern::Variable(v) => bound_vars.contains(v),
720 _ => true,
721 });
722
723 match (s_bound, p_bound, o_bound) {
725 (true, true, _) => IndexType::SPO,
726 (true, false, true) => IndexType::SPO, (false, true, true) => IndexType::POS,
728 (true, false, false) => IndexType::SPO,
729 (false, true, false) => IndexType::POS,
730 (false, false, true) => IndexType::OSP,
731 (false, false, false) => IndexType::SPO, }
733 }
734}
735
736pub struct PatternExecutor {
738 graph: Arc<IndexedGraph>,
740 optimizer: PatternOptimizer,
742}
743
744impl PatternExecutor {
745 pub fn new(graph: Arc<IndexedGraph>, index_stats: Arc<IndexStats>) -> Self {
747 Self {
748 graph,
749 optimizer: PatternOptimizer::new(index_stats),
750 }
751 }
752
753 pub fn execute_plan(
755 &self,
756 plan: &OptimizedPatternPlan,
757 ) -> Result<Vec<HashMap<Variable, Term>>, OxirsError> {
758 let mut results = vec![HashMap::new()];
759
760 for (pattern, strategy) in &plan.patterns {
761 results = self.execute_pattern_with_strategy(pattern, strategy, results)?;
762 }
763
764 Ok(results)
765 }
766
767 fn execute_pattern_with_strategy(
769 &self,
770 pattern: &AlgebraTriplePattern,
771 _strategy: &PatternStrategy,
772 bindings: Vec<HashMap<Variable, Term>>,
773 ) -> Result<Vec<HashMap<Variable, Term>>, OxirsError> {
774 let mut new_results = Vec::new();
775
776 for binding in bindings {
777 let bound_pattern = self.bind_pattern(pattern, &binding)?;
779
780 let subject = bound_pattern
782 .subject
783 .as_ref()
784 .and_then(|s| self.subject_pattern_to_subject(s));
785 let predicate = bound_pattern
786 .predicate
787 .as_ref()
788 .and_then(|p| self.predicate_pattern_to_predicate(p));
789 let object = bound_pattern
790 .object
791 .as_ref()
792 .and_then(|o| self.object_pattern_to_object(o));
793
794 let matches = self
796 .graph
797 .query(subject.as_ref(), predicate.as_ref(), object.as_ref());
798
799 for triple in matches {
801 let mut new_binding = binding.clone();
802
803 if let AlgebraTermPattern::Variable(v) = &pattern.subject {
805 new_binding.insert(v.clone(), Term::from(triple.subject().clone()));
806 }
807 if let AlgebraTermPattern::Variable(v) = &pattern.predicate {
808 new_binding.insert(v.clone(), Term::from(triple.predicate().clone()));
809 }
810 if let AlgebraTermPattern::Variable(v) = &pattern.object {
811 new_binding.insert(v.clone(), Term::from(triple.object().clone()));
812 }
813
814 new_results.push(new_binding);
815 }
816 }
817
818 Ok(new_results)
819 }
820
821 fn bind_pattern(
823 &self,
824 pattern: &AlgebraTriplePattern,
825 bindings: &HashMap<Variable, Term>,
826 ) -> Result<ModelTriplePattern, OxirsError> {
827 let subject = match &pattern.subject {
828 AlgebraTermPattern::Variable(v) => {
829 if let Some(term) = bindings.get(v) {
830 Some(self.term_to_subject_pattern(term)?)
831 } else {
832 None
833 }
834 }
835 AlgebraTermPattern::NamedNode(n) => Some(SubjectPattern::NamedNode(n.clone())),
836 AlgebraTermPattern::BlankNode(b) => Some(SubjectPattern::BlankNode(b.clone())),
837 AlgebraTermPattern::Literal(_) => {
838 return Err(OxirsError::Query("Literal cannot be subject".to_string()))
839 }
840 AlgebraTermPattern::QuotedTriple(_) => {
841 return Err(OxirsError::Query(
842 "RDF-star quoted triples not yet fully supported".to_string(),
843 ))
844 }
845 };
846
847 let predicate = match &pattern.predicate {
848 AlgebraTermPattern::Variable(v) => {
849 if let Some(term) = bindings.get(v) {
850 Some(self.term_to_predicate_pattern(term)?)
851 } else {
852 None
853 }
854 }
855 AlgebraTermPattern::NamedNode(n) => Some(PredicatePattern::NamedNode(n.clone())),
856 _ => return Err(OxirsError::Query("Invalid predicate pattern".to_string())),
857 };
858
859 let object = match &pattern.object {
860 AlgebraTermPattern::Variable(v) => {
861 if let Some(term) = bindings.get(v) {
862 Some(self.term_to_object_pattern(term)?)
863 } else {
864 None
865 }
866 }
867 AlgebraTermPattern::NamedNode(n) => Some(ObjectPattern::NamedNode(n.clone())),
868 AlgebraTermPattern::BlankNode(b) => Some(ObjectPattern::BlankNode(b.clone())),
869 AlgebraTermPattern::Literal(l) => Some(ObjectPattern::Literal(l.clone())),
870 AlgebraTermPattern::QuotedTriple(_) => {
871 return Err(OxirsError::Query(
872 "RDF-star quoted triples not yet fully supported".to_string(),
873 ))
874 }
875 };
876
877 Ok(ModelTriplePattern::new(subject, predicate, object))
878 }
879
880 fn term_to_subject_pattern(&self, term: &Term) -> Result<SubjectPattern, OxirsError> {
882 match term {
883 Term::NamedNode(n) => Ok(SubjectPattern::NamedNode(n.clone())),
884 Term::BlankNode(b) => Ok(SubjectPattern::BlankNode(b.clone())),
885 _ => Err(OxirsError::Query("Invalid term for subject".to_string())),
886 }
887 }
888
889 fn term_to_predicate_pattern(&self, term: &Term) -> Result<PredicatePattern, OxirsError> {
891 match term {
892 Term::NamedNode(n) => Ok(PredicatePattern::NamedNode(n.clone())),
893 _ => Err(OxirsError::Query("Invalid term for predicate".to_string())),
894 }
895 }
896
897 fn term_to_object_pattern(&self, term: &Term) -> Result<ObjectPattern, OxirsError> {
899 match term {
900 Term::NamedNode(n) => Ok(ObjectPattern::NamedNode(n.clone())),
901 Term::BlankNode(b) => Ok(ObjectPattern::BlankNode(b.clone())),
902 Term::Literal(l) => Ok(ObjectPattern::Literal(l.clone())),
903 _ => Err(OxirsError::Query(
904 "Invalid term for object pattern".to_string(),
905 )),
906 }
907 }
908
909 fn subject_pattern_to_subject(&self, pattern: &SubjectPattern) -> Option<Subject> {
911 match pattern {
912 SubjectPattern::NamedNode(n) => Some(Subject::NamedNode(n.clone())),
913 SubjectPattern::BlankNode(b) => Some(Subject::BlankNode(b.clone())),
914 SubjectPattern::Variable(_) => None,
915 }
916 }
917
918 fn predicate_pattern_to_predicate(&self, pattern: &PredicatePattern) -> Option<Predicate> {
920 match pattern {
921 PredicatePattern::NamedNode(n) => Some(Predicate::NamedNode(n.clone())),
922 PredicatePattern::Variable(_) => None,
923 }
924 }
925
926 fn object_pattern_to_object(&self, pattern: &ObjectPattern) -> Option<Object> {
928 match pattern {
929 ObjectPattern::NamedNode(n) => Some(Object::NamedNode(n.clone())),
930 ObjectPattern::BlankNode(b) => Some(Object::BlankNode(b.clone())),
931 ObjectPattern::Literal(l) => Some(Object::Literal(l.clone())),
932 ObjectPattern::Variable(_) => None,
933 }
934 }
935}
936
937#[cfg(test)]
938mod tests {
939 use super::*;
940
941 #[test]
942 fn test_pattern_optimizer_creation() {
943 let stats = Arc::new(IndexStats::new());
944 let optimizer = PatternOptimizer::new(stats);
945
946 assert_eq!(optimizer.available_indexes.len(), 3);
947 }
948
949 #[test]
950 fn test_index_selection() {
951 let stats = Arc::new(IndexStats::new());
952 let optimizer = PatternOptimizer::new(stats);
953
954 let pattern = ModelTriplePattern::new(
956 Some(SubjectPattern::NamedNode(
957 NamedNode::new("http://example.org/s").expect("valid IRI"),
958 )),
959 None,
960 None,
961 );
962
963 let bound_vars = HashSet::new();
964 let index = optimizer.get_optimal_index(&pattern, &bound_vars);
965
966 assert_eq!(index, IndexType::SPO);
967 }
968
969 #[test]
970 fn test_selectivity_estimation() {
971 let stats = Arc::new(IndexStats::new());
972 let optimizer = PatternOptimizer::new(stats);
973
974 let pattern = AlgebraTriplePattern::new(
975 AlgebraTermPattern::Variable(Variable::new("s").expect("valid variable name")),
976 AlgebraTermPattern::NamedNode(
977 NamedNode::new("http://example.org/type").expect("valid IRI"),
978 ),
979 AlgebraTermPattern::Literal(Literal::new("test")),
980 );
981
982 let selectivity = optimizer.estimate_selectivity(&pattern);
983
984 assert!(selectivity < 0.2);
986 }
987
988 #[test]
989 fn test_pattern_optimization() {
990 let stats = Arc::new(IndexStats::new());
991 let optimizer = PatternOptimizer::new(stats);
992
993 let patterns = vec![
994 AlgebraTriplePattern::new(
995 AlgebraTermPattern::Variable(Variable::new("s").expect("valid variable name")),
996 AlgebraTermPattern::NamedNode(
997 NamedNode::new("http://example.org/type").expect("valid IRI"),
998 ),
999 AlgebraTermPattern::NamedNode(
1000 NamedNode::new("http://example.org/Person").expect("valid IRI"),
1001 ),
1002 ),
1003 AlgebraTriplePattern::new(
1004 AlgebraTermPattern::Variable(Variable::new("s").expect("valid variable name")),
1005 AlgebraTermPattern::NamedNode(
1006 NamedNode::new("http://example.org/name").expect("valid IRI"),
1007 ),
1008 AlgebraTermPattern::Variable(Variable::new("name").expect("valid variable name")),
1009 ),
1010 ];
1011
1012 let plan = optimizer
1013 .optimize_patterns(&patterns)
1014 .expect("operation should succeed");
1015
1016 assert_eq!(plan.patterns.len(), 2);
1017 assert!(plan.total_cost > 0.0);
1018 assert_eq!(plan.binding_order.len(), 2);
1019 }
1020}