Skip to main content

oxirs_core/query/
pattern_optimizer.rs

1//! Pattern matching optimization for query execution
2//!
3//! This module provides specialized pattern matching optimization that
4//! leverages multi-index strategies (SPO/POS/OSP) for efficient query execution.
5
6#![allow(dead_code)]
7
8use crate::indexing::IndexStats as BaseIndexStats;
9use crate::model::pattern::{
10    ObjectPattern, PredicatePattern, SubjectPattern, TriplePattern as ModelTriplePattern,
11};
12use crate::model::*;
13use crate::query::algebra::{AlgebraTriplePattern, TermPattern as AlgebraTermPattern};
14use crate::store::IndexedGraph;
15use crate::OxirsError;
16use std::collections::{HashMap, HashSet};
17use std::sync::Arc;
18
19/// Extended index statistics for pattern optimization
20#[derive(Debug, Default)]
21pub struct IndexStats {
22    /// Base statistics
23    base_stats: Arc<BaseIndexStats>,
24    /// Predicate occurrence counts
25    pub predicate_counts: std::sync::RwLock<HashMap<String, usize>>,
26    /// Total number of triples
27    pub total_triples: std::sync::atomic::AtomicUsize,
28    /// Subject cardinality estimates
29    pub subject_cardinality: std::sync::RwLock<HashMap<String, usize>>,
30    /// Object cardinality estimates  
31    pub object_cardinality: std::sync::RwLock<HashMap<String, usize>>,
32    /// Join selectivity cache
33    pub join_selectivity_cache: std::sync::RwLock<HashMap<String, f64>>,
34}
35
36impl IndexStats {
37    /// Create new index statistics
38    pub fn new() -> Self {
39        Self {
40            base_stats: Arc::new(BaseIndexStats::new()),
41            predicate_counts: std::sync::RwLock::new(HashMap::new()),
42            total_triples: std::sync::atomic::AtomicUsize::new(0),
43            subject_cardinality: std::sync::RwLock::new(HashMap::new()),
44            object_cardinality: std::sync::RwLock::new(HashMap::new()),
45            join_selectivity_cache: std::sync::RwLock::new(HashMap::new()),
46        }
47    }
48
49    /// Update predicate count
50    pub fn update_predicate_count(&self, predicate: &str, count: usize) {
51        if let Ok(mut counts) = self.predicate_counts.write() {
52            counts.insert(predicate.to_string(), count);
53        }
54    }
55
56    /// Set total triples
57    pub fn set_total_triples(&self, count: usize) {
58        self.total_triples
59            .store(count, std::sync::atomic::Ordering::Relaxed);
60    }
61
62    /// Update subject cardinality estimate
63    pub fn update_subject_cardinality(&self, predicate: &str, cardinality: usize) {
64        if let Ok(mut card) = self.subject_cardinality.write() {
65            card.insert(predicate.to_string(), cardinality);
66        }
67    }
68
69    /// Update object cardinality estimate
70    pub fn update_object_cardinality(&self, predicate: &str, cardinality: usize) {
71        if let Ok(mut card) = self.object_cardinality.write() {
72            card.insert(predicate.to_string(), cardinality);
73        }
74    }
75
76    /// Cache join selectivity
77    pub fn cache_join_selectivity(&self, pattern_pair: &str, selectivity: f64) {
78        if let Ok(mut cache) = self.join_selectivity_cache.write() {
79            cache.insert(pattern_pair.to_string(), selectivity);
80        }
81    }
82
83    /// Get cached join selectivity
84    pub fn get_join_selectivity(&self, pattern_pair: &str) -> Option<f64> {
85        match self.join_selectivity_cache.read() {
86            Ok(cache) => cache.get(pattern_pair).copied(),
87            _ => None,
88        }
89    }
90}
91
92/// Pattern matching optimizer that selects optimal indexes
93#[derive(Debug)]
94pub struct PatternOptimizer {
95    /// Index statistics for cost estimation
96    index_stats: Arc<IndexStats>,
97    /// Available index types
98    available_indexes: Vec<IndexType>,
99}
100
101/// Types of indexes available for optimization
102#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
103pub enum IndexType {
104    /// Subject-Predicate-Object index
105    SPO,
106    /// Predicate-Object-Subject index
107    POS,
108    /// Object-Subject-Predicate index
109    OSP,
110    /// Subject-Object-Predicate index (optional)
111    SOP,
112    /// Predicate-Subject-Object index (optional)
113    PSO,
114    /// Object-Predicate-Subject index (optional)
115    OPS,
116}
117
118/// Variable position in a triple pattern
119#[derive(Debug, Clone, Copy, PartialEq, Eq)]
120enum VarPosition {
121    Subject,
122    Predicate,
123    Object,
124}
125
126/// Filter expression for evaluation optimization
127#[derive(Debug, Clone)]
128pub enum FilterExpression {
129    /// Equality comparison
130    Equals(Variable, Term),
131    /// Less than comparison
132    LessThan(Variable, Term),
133    /// Greater than comparison
134    GreaterThan(Variable, Term),
135    /// String regex match
136    Regex(Variable, String),
137    /// In list
138    In(Variable, Vec<Term>),
139    /// Logical AND
140    And(Box<FilterExpression>, Box<FilterExpression>),
141    /// Logical OR
142    Or(Box<FilterExpression>, Box<FilterExpression>),
143    /// Logical NOT
144    Not(Box<FilterExpression>),
145}
146
147/// Pattern matching strategy
148#[derive(Debug, Clone)]
149pub struct PatternStrategy {
150    /// Index to use for this pattern
151    pub index_type: IndexType,
152    /// Estimated cost
153    pub estimated_cost: f64,
154    /// Selectivity estimate (0.0 to 1.0)
155    pub selectivity: f64,
156    /// Variables that will be bound after executing this pattern
157    pub bound_vars: HashSet<Variable>,
158    /// Associated filter expressions that can be pushed down
159    pub pushdown_filters: Vec<FilterExpression>,
160}
161
162/// Optimized pattern execution order
163#[derive(Debug, Clone)]
164pub struct OptimizedPatternPlan {
165    /// Ordered list of patterns with their strategies
166    pub patterns: Vec<(AlgebraTriplePattern, PatternStrategy)>,
167    /// Total estimated cost
168    pub total_cost: f64,
169    /// Variables bound at each step
170    pub binding_order: Vec<HashSet<Variable>>,
171}
172
173impl PatternOptimizer {
174    /// Create a new pattern optimizer
175    pub fn new(index_stats: Arc<IndexStats>) -> Self {
176        Self {
177            index_stats,
178            available_indexes: vec![IndexType::SPO, IndexType::POS, IndexType::OSP],
179        }
180    }
181
182    /// Optimize a set of triple patterns
183    pub fn optimize_patterns(
184        &self,
185        patterns: &[AlgebraTriplePattern],
186    ) -> Result<OptimizedPatternPlan, OxirsError> {
187        if patterns.is_empty() {
188            return Ok(OptimizedPatternPlan {
189                patterns: Vec::new(),
190                total_cost: 0.0,
191                binding_order: Vec::new(),
192            });
193        }
194
195        // Analyze each pattern individually
196        let mut pattern_strategies: Vec<(usize, Vec<PatternStrategy>)> = patterns
197            .iter()
198            .enumerate()
199            .map(|(idx, pattern)| {
200                let strategies = self.analyze_pattern(pattern);
201                (idx, strategies)
202            })
203            .collect();
204
205        // Sort patterns by their best selectivity
206        pattern_strategies.sort_by(|a, b| {
207            let best_a =
208                a.1.iter()
209                    .map(|s| s.selectivity)
210                    .min_by(|x, y| x.partial_cmp(y).unwrap_or(std::cmp::Ordering::Equal))
211                    .unwrap_or(1.0);
212            let best_b =
213                b.1.iter()
214                    .map(|s| s.selectivity)
215                    .min_by(|x, y| x.partial_cmp(y).unwrap_or(std::cmp::Ordering::Equal))
216                    .unwrap_or(1.0);
217            best_a
218                .partial_cmp(&best_b)
219                .unwrap_or(std::cmp::Ordering::Equal)
220        });
221
222        // Build execution order considering variable bindings
223        let mut ordered_patterns = Vec::new();
224        let mut bound_vars = HashSet::new();
225        let mut binding_order = Vec::new();
226        let mut total_cost = 0.0;
227
228        // First pattern - choose the most selective
229        let (first_idx, first_strategies) = &pattern_strategies[0];
230        let first_pattern = &patterns[*first_idx];
231        let first_strategy = self.select_best_strategy(first_strategies, &bound_vars)?;
232
233        ordered_patterns.push((first_pattern.clone(), first_strategy.clone()));
234        bound_vars.extend(first_strategy.bound_vars.clone());
235        binding_order.push(bound_vars.clone());
236        total_cost += first_strategy.estimated_cost;
237
238        // Process remaining patterns
239        let mut remaining: Vec<_> = pattern_strategies[1..].to_vec();
240
241        while !remaining.is_empty() {
242            // Find pattern with best cost considering current bindings
243            let (best_pos, best_idx, best_strategy) = remaining
244                .iter()
245                .enumerate()
246                .map(|(pos, (idx, strategies))| {
247                    let pattern = &patterns[*idx];
248                    let strategy =
249                        self.select_strategy_with_bindings(pattern, strategies, &bound_vars);
250                    (pos, idx, strategy)
251                })
252                .min_by(|(_, _, a), (_, _, b)| {
253                    a.estimated_cost
254                        .partial_cmp(&b.estimated_cost)
255                        .unwrap_or(std::cmp::Ordering::Equal)
256                })
257                .ok_or_else(|| OxirsError::Query("Failed to find next pattern".to_string()))?;
258
259            let pattern = &patterns[*best_idx];
260            ordered_patterns.push((pattern.clone(), best_strategy.clone()));
261            bound_vars.extend(best_strategy.bound_vars.clone());
262            binding_order.push(bound_vars.clone());
263            total_cost += best_strategy.estimated_cost;
264
265            remaining.remove(best_pos);
266        }
267
268        Ok(OptimizedPatternPlan {
269            patterns: ordered_patterns,
270            total_cost,
271            binding_order,
272        })
273    }
274
275    /// Analyze a single pattern and generate strategies
276    pub fn analyze_pattern(&self, pattern: &AlgebraTriplePattern) -> Vec<PatternStrategy> {
277        let mut strategies = Vec::new();
278
279        // Analyze which components are bound vs variable
280        let s_bound = !matches!(pattern.subject, AlgebraTermPattern::Variable(_));
281        let p_bound = !matches!(pattern.predicate, AlgebraTermPattern::Variable(_));
282        let o_bound = !matches!(pattern.object, AlgebraTermPattern::Variable(_));
283
284        // Generate strategies for each index type
285        for &index_type in &self.available_indexes {
286            let (cost, selectivity) =
287                self.estimate_index_cost(index_type, s_bound, p_bound, o_bound, pattern);
288
289            let bound_vars = self.extract_bound_vars(pattern);
290
291            strategies.push(PatternStrategy {
292                index_type,
293                estimated_cost: cost,
294                selectivity,
295                bound_vars,
296                pushdown_filters: Vec::new(), // Will be populated by filter optimization
297            });
298        }
299
300        strategies
301    }
302
303    /// Estimate cost of using a specific index
304    fn estimate_index_cost(
305        &self,
306        index_type: IndexType,
307        s_bound: bool,
308        p_bound: bool,
309        o_bound: bool,
310        pattern: &AlgebraTriplePattern,
311    ) -> (f64, f64) {
312        // Base costs for different access patterns
313        let base_cost = match index_type {
314            IndexType::SPO => {
315                if s_bound && p_bound && o_bound {
316                    1.0 // Exact lookup
317                } else if s_bound && p_bound {
318                    10.0 // Range scan on O
319                } else if s_bound {
320                    100.0 // Range scan on P,O
321                } else {
322                    10000.0 // Full scan
323                }
324            }
325            IndexType::POS => {
326                if p_bound && o_bound && s_bound {
327                    1.0
328                } else if p_bound && o_bound {
329                    10.0
330                } else if p_bound {
331                    100.0
332                } else {
333                    10000.0
334                }
335            }
336            IndexType::OSP => {
337                if o_bound && s_bound && p_bound {
338                    1.0
339                } else if o_bound && s_bound {
340                    10.0
341                } else if o_bound {
342                    100.0
343                } else {
344                    10000.0
345                }
346            }
347            _ => 10000.0, // Other indexes not implemented
348        };
349
350        // Adjust based on statistics
351        let selectivity = self.estimate_selectivity(pattern);
352        let adjusted_cost = base_cost * selectivity;
353
354        (adjusted_cost, selectivity)
355    }
356
357    /// Estimate selectivity of a pattern using advanced statistics
358    fn estimate_selectivity(&self, pattern: &AlgebraTriplePattern) -> f64 {
359        // Base selectivity
360        let mut selectivity: f64 = 1.0;
361        let total_triples = self
362            .index_stats
363            .total_triples
364            .load(std::sync::atomic::Ordering::Relaxed) as f64;
365
366        if total_triples == 0.0 {
367            return 0.001; // Default for empty dataset
368        }
369
370        // Subject selectivity
371        match &pattern.subject {
372            AlgebraTermPattern::NamedNode(_) | AlgebraTermPattern::BlankNode(_) => {
373                // Bound subject - highly selective
374                selectivity *= 0.001;
375            }
376            AlgebraTermPattern::Variable(_) => {
377                // Variable subject - depends on predicate cardinality
378                if let AlgebraTermPattern::NamedNode(pred) = &pattern.predicate {
379                    if let Ok(card) = self.index_stats.subject_cardinality.read() {
380                        if let Some(subj_card) = card.get(pred.as_str()) {
381                            selectivity *= (*subj_card as f64) / total_triples;
382                        }
383                    }
384                }
385            }
386            _ => {}
387        }
388
389        // Predicate selectivity (most important for triple stores)
390        match &pattern.predicate {
391            AlgebraTermPattern::NamedNode(pred) => {
392                if let Ok(counts) = self.index_stats.predicate_counts.read() {
393                    if let Some(pred_count) = counts.get(pred.as_str()) {
394                        selectivity *= (*pred_count as f64) / total_triples;
395                    } else {
396                        // Unknown predicate - assume very low frequency
397                        selectivity *= 0.001;
398                    }
399                }
400            }
401            AlgebraTermPattern::Variable(_) => {
402                // Variable predicate - less selective
403                selectivity *= 0.5;
404            }
405            _ => {}
406        }
407
408        // Object selectivity
409        match &pattern.object {
410            AlgebraTermPattern::Literal(_) => {
411                // Literals are usually very selective
412                selectivity *= 0.01;
413            }
414            AlgebraTermPattern::NamedNode(_) => {
415                // Named nodes moderately selective
416                selectivity *= 0.1;
417            }
418            AlgebraTermPattern::BlankNode(_) => {
419                // Blank nodes moderately selective
420                selectivity *= 0.1;
421            }
422            AlgebraTermPattern::Variable(_) => {
423                // Variable object - depends on predicate object cardinality
424                if let AlgebraTermPattern::NamedNode(pred) = &pattern.predicate {
425                    if let Ok(card) = self.index_stats.object_cardinality.read() {
426                        if let Some(obj_card) = card.get(pred.as_str()) {
427                            selectivity *= (*obj_card as f64) / total_triples;
428                        }
429                    }
430                }
431            }
432            AlgebraTermPattern::QuotedTriple(_) => {
433                // RDF-star quoted triples - moderately selective (similar to named nodes)
434                selectivity *= 0.1;
435            }
436        }
437
438        // Apply bounds and handle edge cases
439        selectivity.clamp(0.00001, 1.0)
440    }
441
442    /// Extract variables that will be bound by this pattern
443    fn extract_bound_vars(&self, pattern: &AlgebraTriplePattern) -> HashSet<Variable> {
444        let mut vars = HashSet::new();
445
446        if let AlgebraTermPattern::Variable(v) = &pattern.subject {
447            vars.insert(v.clone());
448        }
449        if let AlgebraTermPattern::Variable(v) = &pattern.predicate {
450            vars.insert(v.clone());
451        }
452        if let AlgebraTermPattern::Variable(v) = &pattern.object {
453            vars.insert(v.clone());
454        }
455
456        vars
457    }
458
459    /// Select best strategy from options
460    fn select_best_strategy(
461        &self,
462        strategies: &[PatternStrategy],
463        _bound_vars: &HashSet<Variable>,
464    ) -> Result<PatternStrategy, OxirsError> {
465        strategies
466            .iter()
467            .min_by(|a, b| {
468                a.estimated_cost
469                    .partial_cmp(&b.estimated_cost)
470                    .unwrap_or(std::cmp::Ordering::Equal)
471            })
472            .cloned()
473            .ok_or_else(|| OxirsError::Query("No valid strategy found".to_string()))
474    }
475
476    /// Select strategy considering already bound variables
477    fn select_strategy_with_bindings(
478        &self,
479        pattern: &AlgebraTriplePattern,
480        strategies: &[PatternStrategy],
481        bound_vars: &HashSet<Variable>,
482    ) -> PatternStrategy {
483        // Check which variables in pattern are already bound
484        let s_bound = match &pattern.subject {
485            AlgebraTermPattern::Variable(v) => bound_vars.contains(v),
486            _ => true,
487        };
488        let p_bound = match &pattern.predicate {
489            AlgebraTermPattern::Variable(v) => bound_vars.contains(v),
490            _ => true,
491        };
492        let o_bound = match &pattern.object {
493            AlgebraTermPattern::Variable(v) => bound_vars.contains(v),
494            _ => true,
495        };
496
497        // Re-evaluate strategies with bound variables
498        let mut best_strategy = strategies[0].clone();
499        let mut best_cost = f64::MAX;
500
501        for strategy in strategies {
502            let (adjusted_cost, _) =
503                self.estimate_index_cost(strategy.index_type, s_bound, p_bound, o_bound, pattern);
504
505            if adjusted_cost < best_cost {
506                best_cost = adjusted_cost;
507                best_strategy = strategy.clone();
508                best_strategy.estimated_cost = adjusted_cost;
509            }
510        }
511
512        best_strategy
513    }
514
515    /// Estimate join selectivity between two patterns
516    pub fn estimate_join_selectivity(
517        &self,
518        left: &AlgebraTriplePattern,
519        right: &AlgebraTriplePattern,
520    ) -> f64 {
521        // Create cache key for this pattern pair
522        let cache_key = format!("{left:?}|{right:?}");
523
524        // Check cache first
525        if let Some(cached) = self.index_stats.get_join_selectivity(&cache_key) {
526            return cached;
527        }
528
529        // Find common variables
530        let left_vars = self.extract_bound_vars(left);
531        let right_vars = self.extract_bound_vars(right);
532        let common_vars: HashSet<_> = left_vars.intersection(&right_vars).cloned().collect();
533
534        let selectivity = if common_vars.is_empty() {
535            // Cartesian product - very expensive
536            1.0
537        } else {
538            // Estimate based on type of join variables
539            let mut join_selectivity = 1.0;
540
541            for var in common_vars.iter() {
542                // Estimate selectivity based on variable position and pattern types
543                let var_selectivity = self.estimate_variable_join_selectivity(var, left, right);
544                join_selectivity *= var_selectivity;
545            }
546
547            // Apply correlation factor for multiple join variables
548            if common_vars.len() > 1 {
549                join_selectivity *= 0.1_f64.powf(common_vars.len() as f64 - 1.0);
550            }
551
552            join_selectivity
553        };
554
555        // Cache the result
556        self.index_stats
557            .cache_join_selectivity(&cache_key, selectivity);
558
559        selectivity.clamp(0.00001, 1.0)
560    }
561
562    /// Estimate selectivity for a variable join
563    fn estimate_variable_join_selectivity(
564        &self,
565        var: &Variable,
566        left: &AlgebraTriplePattern,
567        right: &AlgebraTriplePattern,
568    ) -> f64 {
569        // Find position of variable in each pattern
570        let left_pos = self.find_variable_position(var, left);
571        let right_pos = self.find_variable_position(var, right);
572
573        match (left_pos, right_pos) {
574            (Some(pos1), Some(pos2)) => {
575                // Subject-subject joins are usually more selective than object-object
576                match (pos1, pos2) {
577                    (VarPosition::Subject, VarPosition::Subject) => 0.01, // Very selective
578                    (VarPosition::Subject, VarPosition::Object) => 0.1,   // Moderately selective
579                    (VarPosition::Object, VarPosition::Subject) => 0.1,   // Moderately selective
580                    (VarPosition::Object, VarPosition::Object) => 0.2,    // Less selective
581                    (VarPosition::Predicate, _) | (_, VarPosition::Predicate) => 0.05, // Predicate joins rare but selective
582                }
583            }
584            _ => 1.0, // No actual join
585        }
586    }
587
588    /// Find position of variable in pattern
589    fn find_variable_position(
590        &self,
591        var: &Variable,
592        pattern: &AlgebraTriplePattern,
593    ) -> Option<VarPosition> {
594        if let AlgebraTermPattern::Variable(v) = &pattern.subject {
595            if v == var {
596                return Some(VarPosition::Subject);
597            }
598        }
599        if let AlgebraTermPattern::Variable(v) = &pattern.predicate {
600            if v == var {
601                return Some(VarPosition::Predicate);
602            }
603        }
604        if let AlgebraTermPattern::Variable(v) = &pattern.object {
605            if v == var {
606                return Some(VarPosition::Object);
607            }
608        }
609        None
610    }
611
612    /// Optimize filter expressions and determine pushdown opportunities
613    pub fn optimize_filters(
614        &self,
615        patterns: &[AlgebraTriplePattern],
616        filters: &[FilterExpression],
617    ) -> Vec<(usize, Vec<FilterExpression>)> {
618        let mut pushdown_map = Vec::new();
619
620        for (pattern_idx, pattern) in patterns.iter().enumerate() {
621            let mut pattern_filters = Vec::new();
622
623            // Find filters that can be pushed down to this pattern
624            for filter in filters {
625                if self.can_pushdown_filter(filter, pattern) {
626                    pattern_filters.push(filter.clone());
627                }
628            }
629
630            if !pattern_filters.is_empty() {
631                pushdown_map.push((pattern_idx, pattern_filters));
632            }
633        }
634
635        pushdown_map
636    }
637
638    /// Check if filter can be pushed down to pattern
639    fn can_pushdown_filter(
640        &self,
641        filter: &FilterExpression,
642        pattern: &AlgebraTriplePattern,
643    ) -> bool {
644        match filter {
645            FilterExpression::Equals(var, _)
646            | FilterExpression::LessThan(var, _)
647            | FilterExpression::GreaterThan(var, _)
648            | FilterExpression::Regex(var, _)
649            | FilterExpression::In(var, _) => {
650                // Can push down if pattern binds this variable
651                self.pattern_binds_variable(var, pattern)
652            }
653            FilterExpression::And(left, right) => {
654                // Can push down if either side can be pushed down
655                self.can_pushdown_filter(left, pattern) || self.can_pushdown_filter(right, pattern)
656            }
657            FilterExpression::Or(left, right) => {
658                // Can only push down if both sides can be pushed down
659                self.can_pushdown_filter(left, pattern) && self.can_pushdown_filter(right, pattern)
660            }
661            FilterExpression::Not(inner) => self.can_pushdown_filter(inner, pattern),
662        }
663    }
664
665    /// Check if pattern binds variable
666    fn pattern_binds_variable(&self, var: &Variable, pattern: &AlgebraTriplePattern) -> bool {
667        matches!(&pattern.subject, AlgebraTermPattern::Variable(v) if v == var)
668            || matches!(&pattern.predicate, AlgebraTermPattern::Variable(v) if v == var)
669            || matches!(&pattern.object, AlgebraTermPattern::Variable(v) if v == var)
670    }
671
672    /// Estimate filter selectivity
673    #[allow(clippy::only_used_in_recursion)]
674    pub fn estimate_filter_selectivity(&self, filter: &FilterExpression) -> f64 {
675        match filter {
676            FilterExpression::Equals(_, _) => 0.1, // Equality is selective
677            FilterExpression::LessThan(_, _) => 0.3, // Range filters moderately selective
678            FilterExpression::GreaterThan(_, _) => 0.3,
679            FilterExpression::Regex(_, _) => 0.2, // String matches moderately selective
680            FilterExpression::In(_, values) => {
681                // Selectivity depends on number of values
682                (values.len() as f64 * 0.1).min(0.9)
683            }
684            FilterExpression::And(left, right) => {
685                // AND is more selective
686                self.estimate_filter_selectivity(left) * self.estimate_filter_selectivity(right)
687            }
688            FilterExpression::Or(left, right) => {
689                // OR is less selective
690                let left_sel = self.estimate_filter_selectivity(left);
691                let right_sel = self.estimate_filter_selectivity(right);
692                left_sel + right_sel - (left_sel * right_sel)
693            }
694            FilterExpression::Not(inner) => {
695                // NOT inverts selectivity
696                1.0 - self.estimate_filter_selectivity(inner)
697            }
698        }
699    }
700
701    /// Get optimal index type for a pattern execution
702    pub fn get_optimal_index(
703        &self,
704        pattern: &ModelTriplePattern,
705        bound_vars: &HashSet<Variable>,
706    ) -> IndexType {
707        // Check which components are bound
708        let s_bound = pattern.subject.as_ref().is_some_and(|s| match s {
709            SubjectPattern::Variable(v) => bound_vars.contains(v),
710            _ => true,
711        });
712
713        let p_bound = pattern.predicate.as_ref().is_some_and(|p| match p {
714            PredicatePattern::Variable(v) => bound_vars.contains(v),
715            _ => true,
716        });
717
718        let o_bound = pattern.object.as_ref().is_some_and(|o| match o {
719            ObjectPattern::Variable(v) => bound_vars.contains(v),
720            _ => true,
721        });
722
723        // Select index based on bound components
724        match (s_bound, p_bound, o_bound) {
725            (true, true, _) => IndexType::SPO,
726            (true, false, true) => IndexType::SPO, // Could use SOP if available
727            (false, true, true) => IndexType::POS,
728            (true, false, false) => IndexType::SPO,
729            (false, true, false) => IndexType::POS,
730            (false, false, true) => IndexType::OSP,
731            (false, false, false) => IndexType::SPO, // Full scan, any index works
732        }
733    }
734}
735
736/// Pattern execution engine that uses optimized strategies
737pub struct PatternExecutor {
738    /// The indexed graph to query
739    graph: Arc<IndexedGraph>,
740    /// Pattern optimizer
741    optimizer: PatternOptimizer,
742}
743
744impl PatternExecutor {
745    /// Create new pattern executor
746    pub fn new(graph: Arc<IndexedGraph>, index_stats: Arc<IndexStats>) -> Self {
747        Self {
748            graph,
749            optimizer: PatternOptimizer::new(index_stats),
750        }
751    }
752
753    /// Execute an optimized pattern plan
754    pub fn execute_plan(
755        &self,
756        plan: &OptimizedPatternPlan,
757    ) -> Result<Vec<HashMap<Variable, Term>>, OxirsError> {
758        let mut results = vec![HashMap::new()];
759
760        for (pattern, strategy) in &plan.patterns {
761            results = self.execute_pattern_with_strategy(pattern, strategy, results)?;
762        }
763
764        Ok(results)
765    }
766
767    /// Execute a single pattern with given strategy
768    fn execute_pattern_with_strategy(
769        &self,
770        pattern: &AlgebraTriplePattern,
771        _strategy: &PatternStrategy,
772        bindings: Vec<HashMap<Variable, Term>>,
773    ) -> Result<Vec<HashMap<Variable, Term>>, OxirsError> {
774        let mut new_results = Vec::new();
775
776        for binding in bindings {
777            // Convert algebra pattern to model pattern with bindings
778            let bound_pattern = self.bind_pattern(pattern, &binding)?;
779
780            // Convert pattern types to concrete types for query
781            let subject = bound_pattern
782                .subject
783                .as_ref()
784                .and_then(|s| self.subject_pattern_to_subject(s));
785            let predicate = bound_pattern
786                .predicate
787                .as_ref()
788                .and_then(|p| self.predicate_pattern_to_predicate(p));
789            let object = bound_pattern
790                .object
791                .as_ref()
792                .and_then(|o| self.object_pattern_to_object(o));
793
794            // Query using selected index
795            let matches = self
796                .graph
797                .query(subject.as_ref(), predicate.as_ref(), object.as_ref());
798
799            // Extend bindings with new matches
800            for triple in matches {
801                let mut new_binding = binding.clone();
802
803                // Bind variables from matched triple
804                if let AlgebraTermPattern::Variable(v) = &pattern.subject {
805                    new_binding.insert(v.clone(), Term::from(triple.subject().clone()));
806                }
807                if let AlgebraTermPattern::Variable(v) = &pattern.predicate {
808                    new_binding.insert(v.clone(), Term::from(triple.predicate().clone()));
809                }
810                if let AlgebraTermPattern::Variable(v) = &pattern.object {
811                    new_binding.insert(v.clone(), Term::from(triple.object().clone()));
812                }
813
814                new_results.push(new_binding);
815            }
816        }
817
818        Ok(new_results)
819    }
820
821    /// Bind variables in pattern using current bindings
822    fn bind_pattern(
823        &self,
824        pattern: &AlgebraTriplePattern,
825        bindings: &HashMap<Variable, Term>,
826    ) -> Result<ModelTriplePattern, OxirsError> {
827        let subject = match &pattern.subject {
828            AlgebraTermPattern::Variable(v) => {
829                if let Some(term) = bindings.get(v) {
830                    Some(self.term_to_subject_pattern(term)?)
831                } else {
832                    None
833                }
834            }
835            AlgebraTermPattern::NamedNode(n) => Some(SubjectPattern::NamedNode(n.clone())),
836            AlgebraTermPattern::BlankNode(b) => Some(SubjectPattern::BlankNode(b.clone())),
837            AlgebraTermPattern::Literal(_) => {
838                return Err(OxirsError::Query("Literal cannot be subject".to_string()))
839            }
840            AlgebraTermPattern::QuotedTriple(_) => {
841                return Err(OxirsError::Query(
842                    "RDF-star quoted triples not yet fully supported".to_string(),
843                ))
844            }
845        };
846
847        let predicate = match &pattern.predicate {
848            AlgebraTermPattern::Variable(v) => {
849                if let Some(term) = bindings.get(v) {
850                    Some(self.term_to_predicate_pattern(term)?)
851                } else {
852                    None
853                }
854            }
855            AlgebraTermPattern::NamedNode(n) => Some(PredicatePattern::NamedNode(n.clone())),
856            _ => return Err(OxirsError::Query("Invalid predicate pattern".to_string())),
857        };
858
859        let object = match &pattern.object {
860            AlgebraTermPattern::Variable(v) => {
861                if let Some(term) = bindings.get(v) {
862                    Some(self.term_to_object_pattern(term)?)
863                } else {
864                    None
865                }
866            }
867            AlgebraTermPattern::NamedNode(n) => Some(ObjectPattern::NamedNode(n.clone())),
868            AlgebraTermPattern::BlankNode(b) => Some(ObjectPattern::BlankNode(b.clone())),
869            AlgebraTermPattern::Literal(l) => Some(ObjectPattern::Literal(l.clone())),
870            AlgebraTermPattern::QuotedTriple(_) => {
871                return Err(OxirsError::Query(
872                    "RDF-star quoted triples not yet fully supported".to_string(),
873                ))
874            }
875        };
876
877        Ok(ModelTriplePattern::new(subject, predicate, object))
878    }
879
880    /// Convert term to subject pattern
881    fn term_to_subject_pattern(&self, term: &Term) -> Result<SubjectPattern, OxirsError> {
882        match term {
883            Term::NamedNode(n) => Ok(SubjectPattern::NamedNode(n.clone())),
884            Term::BlankNode(b) => Ok(SubjectPattern::BlankNode(b.clone())),
885            _ => Err(OxirsError::Query("Invalid term for subject".to_string())),
886        }
887    }
888
889    /// Convert term to predicate pattern
890    fn term_to_predicate_pattern(&self, term: &Term) -> Result<PredicatePattern, OxirsError> {
891        match term {
892            Term::NamedNode(n) => Ok(PredicatePattern::NamedNode(n.clone())),
893            _ => Err(OxirsError::Query("Invalid term for predicate".to_string())),
894        }
895    }
896
897    /// Convert term to object pattern
898    fn term_to_object_pattern(&self, term: &Term) -> Result<ObjectPattern, OxirsError> {
899        match term {
900            Term::NamedNode(n) => Ok(ObjectPattern::NamedNode(n.clone())),
901            Term::BlankNode(b) => Ok(ObjectPattern::BlankNode(b.clone())),
902            Term::Literal(l) => Ok(ObjectPattern::Literal(l.clone())),
903            _ => Err(OxirsError::Query(
904                "Invalid term for object pattern".to_string(),
905            )),
906        }
907    }
908
909    /// Convert subject pattern to subject
910    fn subject_pattern_to_subject(&self, pattern: &SubjectPattern) -> Option<Subject> {
911        match pattern {
912            SubjectPattern::NamedNode(n) => Some(Subject::NamedNode(n.clone())),
913            SubjectPattern::BlankNode(b) => Some(Subject::BlankNode(b.clone())),
914            SubjectPattern::Variable(_) => None,
915        }
916    }
917
918    /// Convert predicate pattern to predicate
919    fn predicate_pattern_to_predicate(&self, pattern: &PredicatePattern) -> Option<Predicate> {
920        match pattern {
921            PredicatePattern::NamedNode(n) => Some(Predicate::NamedNode(n.clone())),
922            PredicatePattern::Variable(_) => None,
923        }
924    }
925
926    /// Convert object pattern to object
927    fn object_pattern_to_object(&self, pattern: &ObjectPattern) -> Option<Object> {
928        match pattern {
929            ObjectPattern::NamedNode(n) => Some(Object::NamedNode(n.clone())),
930            ObjectPattern::BlankNode(b) => Some(Object::BlankNode(b.clone())),
931            ObjectPattern::Literal(l) => Some(Object::Literal(l.clone())),
932            ObjectPattern::Variable(_) => None,
933        }
934    }
935}
936
937#[cfg(test)]
938mod tests {
939    use super::*;
940
941    #[test]
942    fn test_pattern_optimizer_creation() {
943        let stats = Arc::new(IndexStats::new());
944        let optimizer = PatternOptimizer::new(stats);
945
946        assert_eq!(optimizer.available_indexes.len(), 3);
947    }
948
949    #[test]
950    fn test_index_selection() {
951        let stats = Arc::new(IndexStats::new());
952        let optimizer = PatternOptimizer::new(stats);
953
954        // Pattern with bound subject
955        let pattern = ModelTriplePattern::new(
956            Some(SubjectPattern::NamedNode(
957                NamedNode::new("http://example.org/s").expect("valid IRI"),
958            )),
959            None,
960            None,
961        );
962
963        let bound_vars = HashSet::new();
964        let index = optimizer.get_optimal_index(&pattern, &bound_vars);
965
966        assert_eq!(index, IndexType::SPO);
967    }
968
969    #[test]
970    fn test_selectivity_estimation() {
971        let stats = Arc::new(IndexStats::new());
972        let optimizer = PatternOptimizer::new(stats);
973
974        let pattern = AlgebraTriplePattern::new(
975            AlgebraTermPattern::Variable(Variable::new("s").expect("valid variable name")),
976            AlgebraTermPattern::NamedNode(
977                NamedNode::new("http://example.org/type").expect("valid IRI"),
978            ),
979            AlgebraTermPattern::Literal(Literal::new("test")),
980        );
981
982        let selectivity = optimizer.estimate_selectivity(&pattern);
983
984        // Literal object should give low selectivity
985        assert!(selectivity < 0.2);
986    }
987
988    #[test]
989    fn test_pattern_optimization() {
990        let stats = Arc::new(IndexStats::new());
991        let optimizer = PatternOptimizer::new(stats);
992
993        let patterns = vec![
994            AlgebraTriplePattern::new(
995                AlgebraTermPattern::Variable(Variable::new("s").expect("valid variable name")),
996                AlgebraTermPattern::NamedNode(
997                    NamedNode::new("http://example.org/type").expect("valid IRI"),
998                ),
999                AlgebraTermPattern::NamedNode(
1000                    NamedNode::new("http://example.org/Person").expect("valid IRI"),
1001                ),
1002            ),
1003            AlgebraTriplePattern::new(
1004                AlgebraTermPattern::Variable(Variable::new("s").expect("valid variable name")),
1005                AlgebraTermPattern::NamedNode(
1006                    NamedNode::new("http://example.org/name").expect("valid IRI"),
1007                ),
1008                AlgebraTermPattern::Variable(Variable::new("name").expect("valid variable name")),
1009            ),
1010        ];
1011
1012        let plan = optimizer
1013            .optimize_patterns(&patterns)
1014            .expect("operation should succeed");
1015
1016        assert_eq!(plan.patterns.len(), 2);
1017        assert!(plan.total_cost > 0.0);
1018        assert_eq!(plan.binding_order.len(), 2);
1019    }
1020}