oxirs_core/query/
pattern_optimizer.rs

1//! Pattern matching optimization for query execution
2//!
3//! This module provides specialized pattern matching optimization that
4//! leverages multi-index strategies (SPO/POS/OSP) for efficient query execution.
5
6#![allow(dead_code)]
7
8use crate::indexing::IndexStats as BaseIndexStats;
9use crate::model::pattern::{
10    ObjectPattern, PredicatePattern, SubjectPattern, TriplePattern as ModelTriplePattern,
11};
12use crate::model::*;
13use crate::query::algebra::{AlgebraTriplePattern, TermPattern as AlgebraTermPattern};
14use crate::store::IndexedGraph;
15use crate::OxirsError;
16use std::collections::{HashMap, HashSet};
17use std::sync::Arc;
18
19/// Extended index statistics for pattern optimization
20#[derive(Debug, Default)]
21pub struct IndexStats {
22    /// Base statistics
23    base_stats: Arc<BaseIndexStats>,
24    /// Predicate occurrence counts
25    pub predicate_counts: std::sync::RwLock<HashMap<String, usize>>,
26    /// Total number of triples
27    pub total_triples: std::sync::atomic::AtomicUsize,
28    /// Subject cardinality estimates
29    pub subject_cardinality: std::sync::RwLock<HashMap<String, usize>>,
30    /// Object cardinality estimates  
31    pub object_cardinality: std::sync::RwLock<HashMap<String, usize>>,
32    /// Join selectivity cache
33    pub join_selectivity_cache: std::sync::RwLock<HashMap<String, f64>>,
34}
35
36impl IndexStats {
37    /// Create new index statistics
38    pub fn new() -> Self {
39        Self {
40            base_stats: Arc::new(BaseIndexStats::new()),
41            predicate_counts: std::sync::RwLock::new(HashMap::new()),
42            total_triples: std::sync::atomic::AtomicUsize::new(0),
43            subject_cardinality: std::sync::RwLock::new(HashMap::new()),
44            object_cardinality: std::sync::RwLock::new(HashMap::new()),
45            join_selectivity_cache: std::sync::RwLock::new(HashMap::new()),
46        }
47    }
48
49    /// Update predicate count
50    pub fn update_predicate_count(&self, predicate: &str, count: usize) {
51        if let Ok(mut counts) = self.predicate_counts.write() {
52            counts.insert(predicate.to_string(), count);
53        }
54    }
55
56    /// Set total triples
57    pub fn set_total_triples(&self, count: usize) {
58        self.total_triples
59            .store(count, std::sync::atomic::Ordering::Relaxed);
60    }
61
62    /// Update subject cardinality estimate
63    pub fn update_subject_cardinality(&self, predicate: &str, cardinality: usize) {
64        if let Ok(mut card) = self.subject_cardinality.write() {
65            card.insert(predicate.to_string(), cardinality);
66        }
67    }
68
69    /// Update object cardinality estimate
70    pub fn update_object_cardinality(&self, predicate: &str, cardinality: usize) {
71        if let Ok(mut card) = self.object_cardinality.write() {
72            card.insert(predicate.to_string(), cardinality);
73        }
74    }
75
76    /// Cache join selectivity
77    pub fn cache_join_selectivity(&self, pattern_pair: &str, selectivity: f64) {
78        if let Ok(mut cache) = self.join_selectivity_cache.write() {
79            cache.insert(pattern_pair.to_string(), selectivity);
80        }
81    }
82
83    /// Get cached join selectivity
84    pub fn get_join_selectivity(&self, pattern_pair: &str) -> Option<f64> {
85        match self.join_selectivity_cache.read() {
86            Ok(cache) => cache.get(pattern_pair).copied(),
87            _ => None,
88        }
89    }
90}
91
92/// Pattern matching optimizer that selects optimal indexes
93#[derive(Debug)]
94pub struct PatternOptimizer {
95    /// Index statistics for cost estimation
96    index_stats: Arc<IndexStats>,
97    /// Available index types
98    available_indexes: Vec<IndexType>,
99}
100
101/// Types of indexes available for optimization
102#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
103pub enum IndexType {
104    /// Subject-Predicate-Object index
105    SPO,
106    /// Predicate-Object-Subject index
107    POS,
108    /// Object-Subject-Predicate index
109    OSP,
110    /// Subject-Object-Predicate index (optional)
111    SOP,
112    /// Predicate-Subject-Object index (optional)
113    PSO,
114    /// Object-Predicate-Subject index (optional)
115    OPS,
116}
117
118/// Variable position in a triple pattern
119#[derive(Debug, Clone, Copy, PartialEq, Eq)]
120enum VarPosition {
121    Subject,
122    Predicate,
123    Object,
124}
125
126/// Filter expression for evaluation optimization
127#[derive(Debug, Clone)]
128pub enum FilterExpression {
129    /// Equality comparison
130    Equals(Variable, Term),
131    /// Less than comparison
132    LessThan(Variable, Term),
133    /// Greater than comparison
134    GreaterThan(Variable, Term),
135    /// String regex match
136    Regex(Variable, String),
137    /// In list
138    In(Variable, Vec<Term>),
139    /// Logical AND
140    And(Box<FilterExpression>, Box<FilterExpression>),
141    /// Logical OR
142    Or(Box<FilterExpression>, Box<FilterExpression>),
143    /// Logical NOT
144    Not(Box<FilterExpression>),
145}
146
147/// Pattern matching strategy
148#[derive(Debug, Clone)]
149pub struct PatternStrategy {
150    /// Index to use for this pattern
151    pub index_type: IndexType,
152    /// Estimated cost
153    pub estimated_cost: f64,
154    /// Selectivity estimate (0.0 to 1.0)
155    pub selectivity: f64,
156    /// Variables that will be bound after executing this pattern
157    pub bound_vars: HashSet<Variable>,
158    /// Associated filter expressions that can be pushed down
159    pub pushdown_filters: Vec<FilterExpression>,
160}
161
162/// Optimized pattern execution order
163#[derive(Debug, Clone)]
164pub struct OptimizedPatternPlan {
165    /// Ordered list of patterns with their strategies
166    pub patterns: Vec<(AlgebraTriplePattern, PatternStrategy)>,
167    /// Total estimated cost
168    pub total_cost: f64,
169    /// Variables bound at each step
170    pub binding_order: Vec<HashSet<Variable>>,
171}
172
173impl PatternOptimizer {
174    /// Create a new pattern optimizer
175    pub fn new(index_stats: Arc<IndexStats>) -> Self {
176        Self {
177            index_stats,
178            available_indexes: vec![IndexType::SPO, IndexType::POS, IndexType::OSP],
179        }
180    }
181
182    /// Optimize a set of triple patterns
183    pub fn optimize_patterns(
184        &self,
185        patterns: &[AlgebraTriplePattern],
186    ) -> Result<OptimizedPatternPlan, OxirsError> {
187        if patterns.is_empty() {
188            return Ok(OptimizedPatternPlan {
189                patterns: Vec::new(),
190                total_cost: 0.0,
191                binding_order: Vec::new(),
192            });
193        }
194
195        // Analyze each pattern individually
196        let mut pattern_strategies: Vec<(usize, Vec<PatternStrategy>)> = patterns
197            .iter()
198            .enumerate()
199            .map(|(idx, pattern)| {
200                let strategies = self.analyze_pattern(pattern);
201                (idx, strategies)
202            })
203            .collect();
204
205        // Sort patterns by their best selectivity
206        pattern_strategies.sort_by(|a, b| {
207            let best_a =
208                a.1.iter()
209                    .map(|s| s.selectivity)
210                    .min_by(|x, y| x.partial_cmp(y).unwrap())
211                    .unwrap_or(1.0);
212            let best_b =
213                b.1.iter()
214                    .map(|s| s.selectivity)
215                    .min_by(|x, y| x.partial_cmp(y).unwrap())
216                    .unwrap_or(1.0);
217            best_a.partial_cmp(&best_b).unwrap()
218        });
219
220        // Build execution order considering variable bindings
221        let mut ordered_patterns = Vec::new();
222        let mut bound_vars = HashSet::new();
223        let mut binding_order = Vec::new();
224        let mut total_cost = 0.0;
225
226        // First pattern - choose the most selective
227        let (first_idx, first_strategies) = &pattern_strategies[0];
228        let first_pattern = &patterns[*first_idx];
229        let first_strategy = self.select_best_strategy(first_strategies, &bound_vars)?;
230
231        ordered_patterns.push((first_pattern.clone(), first_strategy.clone()));
232        bound_vars.extend(first_strategy.bound_vars.clone());
233        binding_order.push(bound_vars.clone());
234        total_cost += first_strategy.estimated_cost;
235
236        // Process remaining patterns
237        let mut remaining: Vec<_> = pattern_strategies[1..].to_vec();
238
239        while !remaining.is_empty() {
240            // Find pattern with best cost considering current bindings
241            let (best_pos, best_idx, best_strategy) = remaining
242                .iter()
243                .enumerate()
244                .map(|(pos, (idx, strategies))| {
245                    let pattern = &patterns[*idx];
246                    let strategy =
247                        self.select_strategy_with_bindings(pattern, strategies, &bound_vars);
248                    (pos, idx, strategy)
249                })
250                .min_by(|(_, _, a), (_, _, b)| {
251                    a.estimated_cost.partial_cmp(&b.estimated_cost).unwrap()
252                })
253                .ok_or_else(|| OxirsError::Query("Failed to find next pattern".to_string()))?;
254
255            let pattern = &patterns[*best_idx];
256            ordered_patterns.push((pattern.clone(), best_strategy.clone()));
257            bound_vars.extend(best_strategy.bound_vars.clone());
258            binding_order.push(bound_vars.clone());
259            total_cost += best_strategy.estimated_cost;
260
261            remaining.remove(best_pos);
262        }
263
264        Ok(OptimizedPatternPlan {
265            patterns: ordered_patterns,
266            total_cost,
267            binding_order,
268        })
269    }
270
271    /// Analyze a single pattern and generate strategies
272    pub fn analyze_pattern(&self, pattern: &AlgebraTriplePattern) -> Vec<PatternStrategy> {
273        let mut strategies = Vec::new();
274
275        // Analyze which components are bound vs variable
276        let s_bound = !matches!(pattern.subject, AlgebraTermPattern::Variable(_));
277        let p_bound = !matches!(pattern.predicate, AlgebraTermPattern::Variable(_));
278        let o_bound = !matches!(pattern.object, AlgebraTermPattern::Variable(_));
279
280        // Generate strategies for each index type
281        for &index_type in &self.available_indexes {
282            let (cost, selectivity) =
283                self.estimate_index_cost(index_type, s_bound, p_bound, o_bound, pattern);
284
285            let bound_vars = self.extract_bound_vars(pattern);
286
287            strategies.push(PatternStrategy {
288                index_type,
289                estimated_cost: cost,
290                selectivity,
291                bound_vars,
292                pushdown_filters: Vec::new(), // Will be populated by filter optimization
293            });
294        }
295
296        strategies
297    }
298
299    /// Estimate cost of using a specific index
300    fn estimate_index_cost(
301        &self,
302        index_type: IndexType,
303        s_bound: bool,
304        p_bound: bool,
305        o_bound: bool,
306        pattern: &AlgebraTriplePattern,
307    ) -> (f64, f64) {
308        // Base costs for different access patterns
309        let base_cost = match index_type {
310            IndexType::SPO => {
311                if s_bound && p_bound && o_bound {
312                    1.0 // Exact lookup
313                } else if s_bound && p_bound {
314                    10.0 // Range scan on O
315                } else if s_bound {
316                    100.0 // Range scan on P,O
317                } else {
318                    10000.0 // Full scan
319                }
320            }
321            IndexType::POS => {
322                if p_bound && o_bound && s_bound {
323                    1.0
324                } else if p_bound && o_bound {
325                    10.0
326                } else if p_bound {
327                    100.0
328                } else {
329                    10000.0
330                }
331            }
332            IndexType::OSP => {
333                if o_bound && s_bound && p_bound {
334                    1.0
335                } else if o_bound && s_bound {
336                    10.0
337                } else if o_bound {
338                    100.0
339                } else {
340                    10000.0
341                }
342            }
343            _ => 10000.0, // Other indexes not implemented
344        };
345
346        // Adjust based on statistics
347        let selectivity = self.estimate_selectivity(pattern);
348        let adjusted_cost = base_cost * selectivity;
349
350        (adjusted_cost, selectivity)
351    }
352
353    /// Estimate selectivity of a pattern using advanced statistics
354    fn estimate_selectivity(&self, pattern: &AlgebraTriplePattern) -> f64 {
355        // Base selectivity
356        let mut selectivity: f64 = 1.0;
357        let total_triples = self
358            .index_stats
359            .total_triples
360            .load(std::sync::atomic::Ordering::Relaxed) as f64;
361
362        if total_triples == 0.0 {
363            return 0.001; // Default for empty dataset
364        }
365
366        // Subject selectivity
367        match &pattern.subject {
368            AlgebraTermPattern::NamedNode(_) | AlgebraTermPattern::BlankNode(_) => {
369                // Bound subject - highly selective
370                selectivity *= 0.001;
371            }
372            AlgebraTermPattern::Variable(_) => {
373                // Variable subject - depends on predicate cardinality
374                if let AlgebraTermPattern::NamedNode(pred) = &pattern.predicate {
375                    if let Ok(card) = self.index_stats.subject_cardinality.read() {
376                        if let Some(subj_card) = card.get(pred.as_str()) {
377                            selectivity *= (*subj_card as f64) / total_triples;
378                        }
379                    }
380                }
381            }
382            _ => {}
383        }
384
385        // Predicate selectivity (most important for triple stores)
386        match &pattern.predicate {
387            AlgebraTermPattern::NamedNode(pred) => {
388                if let Ok(counts) = self.index_stats.predicate_counts.read() {
389                    if let Some(pred_count) = counts.get(pred.as_str()) {
390                        selectivity *= (*pred_count as f64) / total_triples;
391                    } else {
392                        // Unknown predicate - assume very low frequency
393                        selectivity *= 0.001;
394                    }
395                }
396            }
397            AlgebraTermPattern::Variable(_) => {
398                // Variable predicate - less selective
399                selectivity *= 0.5;
400            }
401            _ => {}
402        }
403
404        // Object selectivity
405        match &pattern.object {
406            AlgebraTermPattern::Literal(_) => {
407                // Literals are usually very selective
408                selectivity *= 0.01;
409            }
410            AlgebraTermPattern::NamedNode(_) => {
411                // Named nodes moderately selective
412                selectivity *= 0.1;
413            }
414            AlgebraTermPattern::BlankNode(_) => {
415                // Blank nodes moderately selective
416                selectivity *= 0.1;
417            }
418            AlgebraTermPattern::Variable(_) => {
419                // Variable object - depends on predicate object cardinality
420                if let AlgebraTermPattern::NamedNode(pred) = &pattern.predicate {
421                    if let Ok(card) = self.index_stats.object_cardinality.read() {
422                        if let Some(obj_card) = card.get(pred.as_str()) {
423                            selectivity *= (*obj_card as f64) / total_triples;
424                        }
425                    }
426                }
427            }
428        }
429
430        // Apply bounds and handle edge cases
431        selectivity.clamp(0.00001, 1.0)
432    }
433
434    /// Extract variables that will be bound by this pattern
435    fn extract_bound_vars(&self, pattern: &AlgebraTriplePattern) -> HashSet<Variable> {
436        let mut vars = HashSet::new();
437
438        if let AlgebraTermPattern::Variable(v) = &pattern.subject {
439            vars.insert(v.clone());
440        }
441        if let AlgebraTermPattern::Variable(v) = &pattern.predicate {
442            vars.insert(v.clone());
443        }
444        if let AlgebraTermPattern::Variable(v) = &pattern.object {
445            vars.insert(v.clone());
446        }
447
448        vars
449    }
450
451    /// Select best strategy from options
452    fn select_best_strategy(
453        &self,
454        strategies: &[PatternStrategy],
455        _bound_vars: &HashSet<Variable>,
456    ) -> Result<PatternStrategy, OxirsError> {
457        strategies
458            .iter()
459            .min_by(|a, b| a.estimated_cost.partial_cmp(&b.estimated_cost).unwrap())
460            .cloned()
461            .ok_or_else(|| OxirsError::Query("No valid strategy found".to_string()))
462    }
463
464    /// Select strategy considering already bound variables
465    fn select_strategy_with_bindings(
466        &self,
467        pattern: &AlgebraTriplePattern,
468        strategies: &[PatternStrategy],
469        bound_vars: &HashSet<Variable>,
470    ) -> PatternStrategy {
471        // Check which variables in pattern are already bound
472        let s_bound = match &pattern.subject {
473            AlgebraTermPattern::Variable(v) => bound_vars.contains(v),
474            _ => true,
475        };
476        let p_bound = match &pattern.predicate {
477            AlgebraTermPattern::Variable(v) => bound_vars.contains(v),
478            _ => true,
479        };
480        let o_bound = match &pattern.object {
481            AlgebraTermPattern::Variable(v) => bound_vars.contains(v),
482            _ => true,
483        };
484
485        // Re-evaluate strategies with bound variables
486        let mut best_strategy = strategies[0].clone();
487        let mut best_cost = f64::MAX;
488
489        for strategy in strategies {
490            let (adjusted_cost, _) =
491                self.estimate_index_cost(strategy.index_type, s_bound, p_bound, o_bound, pattern);
492
493            if adjusted_cost < best_cost {
494                best_cost = adjusted_cost;
495                best_strategy = strategy.clone();
496                best_strategy.estimated_cost = adjusted_cost;
497            }
498        }
499
500        best_strategy
501    }
502
503    /// Estimate join selectivity between two patterns
504    pub fn estimate_join_selectivity(
505        &self,
506        left: &AlgebraTriplePattern,
507        right: &AlgebraTriplePattern,
508    ) -> f64 {
509        // Create cache key for this pattern pair
510        let cache_key = format!("{left:?}|{right:?}");
511
512        // Check cache first
513        if let Some(cached) = self.index_stats.get_join_selectivity(&cache_key) {
514            return cached;
515        }
516
517        // Find common variables
518        let left_vars = self.extract_bound_vars(left);
519        let right_vars = self.extract_bound_vars(right);
520        let common_vars: HashSet<_> = left_vars.intersection(&right_vars).cloned().collect();
521
522        let selectivity = if common_vars.is_empty() {
523            // Cartesian product - very expensive
524            1.0
525        } else {
526            // Estimate based on type of join variables
527            let mut join_selectivity = 1.0;
528
529            for var in common_vars.iter() {
530                // Estimate selectivity based on variable position and pattern types
531                let var_selectivity = self.estimate_variable_join_selectivity(var, left, right);
532                join_selectivity *= var_selectivity;
533            }
534
535            // Apply correlation factor for multiple join variables
536            if common_vars.len() > 1 {
537                join_selectivity *= 0.1_f64.powf(common_vars.len() as f64 - 1.0);
538            }
539
540            join_selectivity
541        };
542
543        // Cache the result
544        self.index_stats
545            .cache_join_selectivity(&cache_key, selectivity);
546
547        selectivity.clamp(0.00001, 1.0)
548    }
549
550    /// Estimate selectivity for a variable join
551    fn estimate_variable_join_selectivity(
552        &self,
553        var: &Variable,
554        left: &AlgebraTriplePattern,
555        right: &AlgebraTriplePattern,
556    ) -> f64 {
557        // Find position of variable in each pattern
558        let left_pos = self.find_variable_position(var, left);
559        let right_pos = self.find_variable_position(var, right);
560
561        match (left_pos, right_pos) {
562            (Some(pos1), Some(pos2)) => {
563                // Subject-subject joins are usually more selective than object-object
564                match (pos1, pos2) {
565                    (VarPosition::Subject, VarPosition::Subject) => 0.01, // Very selective
566                    (VarPosition::Subject, VarPosition::Object) => 0.1,   // Moderately selective
567                    (VarPosition::Object, VarPosition::Subject) => 0.1,   // Moderately selective
568                    (VarPosition::Object, VarPosition::Object) => 0.2,    // Less selective
569                    (VarPosition::Predicate, _) | (_, VarPosition::Predicate) => 0.05, // Predicate joins rare but selective
570                }
571            }
572            _ => 1.0, // No actual join
573        }
574    }
575
576    /// Find position of variable in pattern
577    fn find_variable_position(
578        &self,
579        var: &Variable,
580        pattern: &AlgebraTriplePattern,
581    ) -> Option<VarPosition> {
582        if let AlgebraTermPattern::Variable(v) = &pattern.subject {
583            if v == var {
584                return Some(VarPosition::Subject);
585            }
586        }
587        if let AlgebraTermPattern::Variable(v) = &pattern.predicate {
588            if v == var {
589                return Some(VarPosition::Predicate);
590            }
591        }
592        if let AlgebraTermPattern::Variable(v) = &pattern.object {
593            if v == var {
594                return Some(VarPosition::Object);
595            }
596        }
597        None
598    }
599
600    /// Optimize filter expressions and determine pushdown opportunities
601    pub fn optimize_filters(
602        &self,
603        patterns: &[AlgebraTriplePattern],
604        filters: &[FilterExpression],
605    ) -> Vec<(usize, Vec<FilterExpression>)> {
606        let mut pushdown_map = Vec::new();
607
608        for (pattern_idx, pattern) in patterns.iter().enumerate() {
609            let mut pattern_filters = Vec::new();
610
611            // Find filters that can be pushed down to this pattern
612            for filter in filters {
613                if self.can_pushdown_filter(filter, pattern) {
614                    pattern_filters.push(filter.clone());
615                }
616            }
617
618            if !pattern_filters.is_empty() {
619                pushdown_map.push((pattern_idx, pattern_filters));
620            }
621        }
622
623        pushdown_map
624    }
625
626    /// Check if filter can be pushed down to pattern
627    fn can_pushdown_filter(
628        &self,
629        filter: &FilterExpression,
630        pattern: &AlgebraTriplePattern,
631    ) -> bool {
632        match filter {
633            FilterExpression::Equals(var, _)
634            | FilterExpression::LessThan(var, _)
635            | FilterExpression::GreaterThan(var, _)
636            | FilterExpression::Regex(var, _)
637            | FilterExpression::In(var, _) => {
638                // Can push down if pattern binds this variable
639                self.pattern_binds_variable(var, pattern)
640            }
641            FilterExpression::And(left, right) => {
642                // Can push down if either side can be pushed down
643                self.can_pushdown_filter(left, pattern) || self.can_pushdown_filter(right, pattern)
644            }
645            FilterExpression::Or(left, right) => {
646                // Can only push down if both sides can be pushed down
647                self.can_pushdown_filter(left, pattern) && self.can_pushdown_filter(right, pattern)
648            }
649            FilterExpression::Not(inner) => self.can_pushdown_filter(inner, pattern),
650        }
651    }
652
653    /// Check if pattern binds variable
654    fn pattern_binds_variable(&self, var: &Variable, pattern: &AlgebraTriplePattern) -> bool {
655        matches!(&pattern.subject, AlgebraTermPattern::Variable(v) if v == var)
656            || matches!(&pattern.predicate, AlgebraTermPattern::Variable(v) if v == var)
657            || matches!(&pattern.object, AlgebraTermPattern::Variable(v) if v == var)
658    }
659
660    /// Estimate filter selectivity
661    #[allow(clippy::only_used_in_recursion)]
662    pub fn estimate_filter_selectivity(&self, filter: &FilterExpression) -> f64 {
663        match filter {
664            FilterExpression::Equals(_, _) => 0.1, // Equality is selective
665            FilterExpression::LessThan(_, _) => 0.3, // Range filters moderately selective
666            FilterExpression::GreaterThan(_, _) => 0.3,
667            FilterExpression::Regex(_, _) => 0.2, // String matches moderately selective
668            FilterExpression::In(_, values) => {
669                // Selectivity depends on number of values
670                (values.len() as f64 * 0.1).min(0.9)
671            }
672            FilterExpression::And(left, right) => {
673                // AND is more selective
674                self.estimate_filter_selectivity(left) * self.estimate_filter_selectivity(right)
675            }
676            FilterExpression::Or(left, right) => {
677                // OR is less selective
678                let left_sel = self.estimate_filter_selectivity(left);
679                let right_sel = self.estimate_filter_selectivity(right);
680                left_sel + right_sel - (left_sel * right_sel)
681            }
682            FilterExpression::Not(inner) => {
683                // NOT inverts selectivity
684                1.0 - self.estimate_filter_selectivity(inner)
685            }
686        }
687    }
688
689    /// Get optimal index type for a pattern execution
690    pub fn get_optimal_index(
691        &self,
692        pattern: &ModelTriplePattern,
693        bound_vars: &HashSet<Variable>,
694    ) -> IndexType {
695        // Check which components are bound
696        let s_bound = pattern.subject.as_ref().is_some_and(|s| match s {
697            SubjectPattern::Variable(v) => bound_vars.contains(v),
698            _ => true,
699        });
700
701        let p_bound = pattern.predicate.as_ref().is_some_and(|p| match p {
702            PredicatePattern::Variable(v) => bound_vars.contains(v),
703            _ => true,
704        });
705
706        let o_bound = pattern.object.as_ref().is_some_and(|o| match o {
707            ObjectPattern::Variable(v) => bound_vars.contains(v),
708            _ => true,
709        });
710
711        // Select index based on bound components
712        match (s_bound, p_bound, o_bound) {
713            (true, true, _) => IndexType::SPO,
714            (true, false, true) => IndexType::SPO, // Could use SOP if available
715            (false, true, true) => IndexType::POS,
716            (true, false, false) => IndexType::SPO,
717            (false, true, false) => IndexType::POS,
718            (false, false, true) => IndexType::OSP,
719            (false, false, false) => IndexType::SPO, // Full scan, any index works
720        }
721    }
722}
723
724/// Pattern execution engine that uses optimized strategies
725pub struct PatternExecutor {
726    /// The indexed graph to query
727    graph: Arc<IndexedGraph>,
728    /// Pattern optimizer
729    optimizer: PatternOptimizer,
730}
731
732impl PatternExecutor {
733    /// Create new pattern executor
734    pub fn new(graph: Arc<IndexedGraph>, index_stats: Arc<IndexStats>) -> Self {
735        Self {
736            graph,
737            optimizer: PatternOptimizer::new(index_stats),
738        }
739    }
740
741    /// Execute an optimized pattern plan
742    pub fn execute_plan(
743        &self,
744        plan: &OptimizedPatternPlan,
745    ) -> Result<Vec<HashMap<Variable, Term>>, OxirsError> {
746        let mut results = vec![HashMap::new()];
747
748        for (pattern, strategy) in &plan.patterns {
749            results = self.execute_pattern_with_strategy(pattern, strategy, results)?;
750        }
751
752        Ok(results)
753    }
754
755    /// Execute a single pattern with given strategy
756    fn execute_pattern_with_strategy(
757        &self,
758        pattern: &AlgebraTriplePattern,
759        _strategy: &PatternStrategy,
760        bindings: Vec<HashMap<Variable, Term>>,
761    ) -> Result<Vec<HashMap<Variable, Term>>, OxirsError> {
762        let mut new_results = Vec::new();
763
764        for binding in bindings {
765            // Convert algebra pattern to model pattern with bindings
766            let bound_pattern = self.bind_pattern(pattern, &binding)?;
767
768            // Convert pattern types to concrete types for query
769            let subject = bound_pattern
770                .subject
771                .as_ref()
772                .and_then(|s| self.subject_pattern_to_subject(s));
773            let predicate = bound_pattern
774                .predicate
775                .as_ref()
776                .and_then(|p| self.predicate_pattern_to_predicate(p));
777            let object = bound_pattern
778                .object
779                .as_ref()
780                .and_then(|o| self.object_pattern_to_object(o));
781
782            // Query using selected index
783            let matches = self
784                .graph
785                .query(subject.as_ref(), predicate.as_ref(), object.as_ref());
786
787            // Extend bindings with new matches
788            for triple in matches {
789                let mut new_binding = binding.clone();
790
791                // Bind variables from matched triple
792                if let AlgebraTermPattern::Variable(v) = &pattern.subject {
793                    new_binding.insert(v.clone(), Term::from(triple.subject().clone()));
794                }
795                if let AlgebraTermPattern::Variable(v) = &pattern.predicate {
796                    new_binding.insert(v.clone(), Term::from(triple.predicate().clone()));
797                }
798                if let AlgebraTermPattern::Variable(v) = &pattern.object {
799                    new_binding.insert(v.clone(), Term::from(triple.object().clone()));
800                }
801
802                new_results.push(new_binding);
803            }
804        }
805
806        Ok(new_results)
807    }
808
809    /// Bind variables in pattern using current bindings
810    fn bind_pattern(
811        &self,
812        pattern: &AlgebraTriplePattern,
813        bindings: &HashMap<Variable, Term>,
814    ) -> Result<ModelTriplePattern, OxirsError> {
815        let subject = match &pattern.subject {
816            AlgebraTermPattern::Variable(v) => {
817                if let Some(term) = bindings.get(v) {
818                    Some(self.term_to_subject_pattern(term)?)
819                } else {
820                    None
821                }
822            }
823            AlgebraTermPattern::NamedNode(n) => Some(SubjectPattern::NamedNode(n.clone())),
824            AlgebraTermPattern::BlankNode(b) => Some(SubjectPattern::BlankNode(b.clone())),
825            AlgebraTermPattern::Literal(_) => {
826                return Err(OxirsError::Query("Literal cannot be subject".to_string()))
827            }
828        };
829
830        let predicate = match &pattern.predicate {
831            AlgebraTermPattern::Variable(v) => {
832                if let Some(term) = bindings.get(v) {
833                    Some(self.term_to_predicate_pattern(term)?)
834                } else {
835                    None
836                }
837            }
838            AlgebraTermPattern::NamedNode(n) => Some(PredicatePattern::NamedNode(n.clone())),
839            _ => return Err(OxirsError::Query("Invalid predicate pattern".to_string())),
840        };
841
842        let object = match &pattern.object {
843            AlgebraTermPattern::Variable(v) => {
844                if let Some(term) = bindings.get(v) {
845                    Some(self.term_to_object_pattern(term)?)
846                } else {
847                    None
848                }
849            }
850            AlgebraTermPattern::NamedNode(n) => Some(ObjectPattern::NamedNode(n.clone())),
851            AlgebraTermPattern::BlankNode(b) => Some(ObjectPattern::BlankNode(b.clone())),
852            AlgebraTermPattern::Literal(l) => Some(ObjectPattern::Literal(l.clone())),
853        };
854
855        Ok(ModelTriplePattern::new(subject, predicate, object))
856    }
857
858    /// Convert term to subject pattern
859    fn term_to_subject_pattern(&self, term: &Term) -> Result<SubjectPattern, OxirsError> {
860        match term {
861            Term::NamedNode(n) => Ok(SubjectPattern::NamedNode(n.clone())),
862            Term::BlankNode(b) => Ok(SubjectPattern::BlankNode(b.clone())),
863            _ => Err(OxirsError::Query("Invalid term for subject".to_string())),
864        }
865    }
866
867    /// Convert term to predicate pattern
868    fn term_to_predicate_pattern(&self, term: &Term) -> Result<PredicatePattern, OxirsError> {
869        match term {
870            Term::NamedNode(n) => Ok(PredicatePattern::NamedNode(n.clone())),
871            _ => Err(OxirsError::Query("Invalid term for predicate".to_string())),
872        }
873    }
874
875    /// Convert term to object pattern
876    fn term_to_object_pattern(&self, term: &Term) -> Result<ObjectPattern, OxirsError> {
877        match term {
878            Term::NamedNode(n) => Ok(ObjectPattern::NamedNode(n.clone())),
879            Term::BlankNode(b) => Ok(ObjectPattern::BlankNode(b.clone())),
880            Term::Literal(l) => Ok(ObjectPattern::Literal(l.clone())),
881            _ => Err(OxirsError::Query(
882                "Invalid term for object pattern".to_string(),
883            )),
884        }
885    }
886
887    /// Convert subject pattern to subject
888    fn subject_pattern_to_subject(&self, pattern: &SubjectPattern) -> Option<Subject> {
889        match pattern {
890            SubjectPattern::NamedNode(n) => Some(Subject::NamedNode(n.clone())),
891            SubjectPattern::BlankNode(b) => Some(Subject::BlankNode(b.clone())),
892            SubjectPattern::Variable(_) => None,
893        }
894    }
895
896    /// Convert predicate pattern to predicate
897    fn predicate_pattern_to_predicate(&self, pattern: &PredicatePattern) -> Option<Predicate> {
898        match pattern {
899            PredicatePattern::NamedNode(n) => Some(Predicate::NamedNode(n.clone())),
900            PredicatePattern::Variable(_) => None,
901        }
902    }
903
904    /// Convert object pattern to object
905    fn object_pattern_to_object(&self, pattern: &ObjectPattern) -> Option<Object> {
906        match pattern {
907            ObjectPattern::NamedNode(n) => Some(Object::NamedNode(n.clone())),
908            ObjectPattern::BlankNode(b) => Some(Object::BlankNode(b.clone())),
909            ObjectPattern::Literal(l) => Some(Object::Literal(l.clone())),
910            ObjectPattern::Variable(_) => None,
911        }
912    }
913}
914
915#[cfg(test)]
916mod tests {
917    use super::*;
918
919    #[test]
920    fn test_pattern_optimizer_creation() {
921        let stats = Arc::new(IndexStats::new());
922        let optimizer = PatternOptimizer::new(stats);
923
924        assert_eq!(optimizer.available_indexes.len(), 3);
925    }
926
927    #[test]
928    fn test_index_selection() {
929        let stats = Arc::new(IndexStats::new());
930        let optimizer = PatternOptimizer::new(stats);
931
932        // Pattern with bound subject
933        let pattern = ModelTriplePattern::new(
934            Some(SubjectPattern::NamedNode(
935                NamedNode::new("http://example.org/s").unwrap(),
936            )),
937            None,
938            None,
939        );
940
941        let bound_vars = HashSet::new();
942        let index = optimizer.get_optimal_index(&pattern, &bound_vars);
943
944        assert_eq!(index, IndexType::SPO);
945    }
946
947    #[test]
948    fn test_selectivity_estimation() {
949        let stats = Arc::new(IndexStats::new());
950        let optimizer = PatternOptimizer::new(stats);
951
952        let pattern = AlgebraTriplePattern::new(
953            AlgebraTermPattern::Variable(Variable::new("s").unwrap()),
954            AlgebraTermPattern::NamedNode(NamedNode::new("http://example.org/type").unwrap()),
955            AlgebraTermPattern::Literal(Literal::new("test")),
956        );
957
958        let selectivity = optimizer.estimate_selectivity(&pattern);
959
960        // Literal object should give low selectivity
961        assert!(selectivity < 0.2);
962    }
963
964    #[test]
965    fn test_pattern_optimization() {
966        let stats = Arc::new(IndexStats::new());
967        let optimizer = PatternOptimizer::new(stats);
968
969        let patterns = vec![
970            AlgebraTriplePattern::new(
971                AlgebraTermPattern::Variable(Variable::new("s").unwrap()),
972                AlgebraTermPattern::NamedNode(NamedNode::new("http://example.org/type").unwrap()),
973                AlgebraTermPattern::NamedNode(NamedNode::new("http://example.org/Person").unwrap()),
974            ),
975            AlgebraTriplePattern::new(
976                AlgebraTermPattern::Variable(Variable::new("s").unwrap()),
977                AlgebraTermPattern::NamedNode(NamedNode::new("http://example.org/name").unwrap()),
978                AlgebraTermPattern::Variable(Variable::new("name").unwrap()),
979            ),
980        ];
981
982        let plan = optimizer.optimize_patterns(&patterns).unwrap();
983
984        assert_eq!(plan.patterns.len(), 2);
985        assert!(plan.total_cost > 0.0);
986        assert_eq!(plan.binding_order.len(), 2);
987    }
988}