oxirs_core/query/
pattern_unification.rs

1//! Pattern type unification for OxiRS query processing
2//!
3//! This module provides utilities to unify different pattern representations
4//! used across the algebra and model systems, resolving type conflicts and
5//! enabling seamless interoperability.
6
7use crate::model::*;
8use crate::query::algebra::{AlgebraTriplePattern, TermPattern as AlgebraTermPattern};
9use crate::OxirsError;
10use std::collections::HashSet;
11
12/// Unified pattern representation that can handle both algebra and model patterns
13#[derive(Debug, Clone, PartialEq, Eq, Hash)]
14pub struct UnifiedTriplePattern {
15    /// Subject pattern
16    pub subject: UnifiedTermPattern,
17    /// Predicate pattern  
18    pub predicate: UnifiedTermPattern,
19    /// Object pattern
20    pub object: UnifiedTermPattern,
21}
22
23/// Unified term pattern that works with both systems
24#[derive(Debug, Clone, PartialEq, Eq, Hash)]
25pub enum UnifiedTermPattern {
26    /// Named node (IRI)
27    NamedNode(NamedNode),
28    /// Blank node
29    BlankNode(BlankNode),
30    /// Literal value
31    Literal(Literal),
32    /// Variable
33    Variable(Variable),
34    /// Wildcard (matches anything)
35    Wildcard,
36}
37
38impl UnifiedTriplePattern {
39    /// Create a new unified triple pattern
40    pub fn new(
41        subject: UnifiedTermPattern,
42        predicate: UnifiedTermPattern,
43        object: UnifiedTermPattern,
44    ) -> Self {
45        Self {
46            subject,
47            predicate,
48            object,
49        }
50    }
51
52    /// Convert to algebra TriplePattern
53    pub fn to_algebra_pattern(&self) -> Result<AlgebraTriplePattern, OxirsError> {
54        let subject = self.subject.to_algebra_term_pattern()?;
55        let predicate = self.predicate.to_algebra_term_pattern()?;
56        let object = self.object.to_algebra_term_pattern()?;
57
58        Ok(AlgebraTriplePattern::new(subject, predicate, object))
59    }
60
61    /// Convert to model TriplePattern
62    pub fn to_model_pattern(&self) -> TriplePattern {
63        let subject = self.subject.to_model_subject_pattern();
64        let predicate = self.predicate.to_model_predicate_pattern();
65        let object = self.object.to_model_object_pattern();
66
67        TriplePattern::new(subject, predicate, object)
68    }
69
70    /// Create from algebra TriplePattern
71    pub fn from_algebra_pattern(pattern: &AlgebraTriplePattern) -> Self {
72        Self {
73            subject: UnifiedTermPattern::from_algebra_term(&pattern.subject),
74            predicate: UnifiedTermPattern::from_algebra_term(&pattern.predicate),
75            object: UnifiedTermPattern::from_algebra_term(&pattern.object),
76        }
77    }
78
79    /// Create from model TriplePattern
80    pub fn from_model_pattern(pattern: &TriplePattern) -> Self {
81        Self {
82            subject: pattern
83                .subject()
84                .map(UnifiedTermPattern::from_model_subject)
85                .unwrap_or(UnifiedTermPattern::Wildcard),
86            predicate: pattern
87                .predicate()
88                .map(UnifiedTermPattern::from_model_predicate)
89                .unwrap_or(UnifiedTermPattern::Wildcard),
90            object: pattern
91                .object()
92                .map(UnifiedTermPattern::from_model_object)
93                .unwrap_or(UnifiedTermPattern::Wildcard),
94        }
95    }
96
97    /// Extract all variables from this pattern
98    pub fn extract_variables(&self) -> HashSet<Variable> {
99        let mut vars = HashSet::new();
100
101        if let UnifiedTermPattern::Variable(v) = &self.subject {
102            vars.insert(v.clone());
103        }
104        if let UnifiedTermPattern::Variable(v) = &self.predicate {
105            vars.insert(v.clone());
106        }
107        if let UnifiedTermPattern::Variable(v) = &self.object {
108            vars.insert(v.clone());
109        }
110
111        vars
112    }
113
114    /// Check if this pattern matches a concrete triple
115    pub fn matches(&self, triple: &Triple) -> bool {
116        self.subject.matches_subject(triple.subject())
117            && self.predicate.matches_predicate(triple.predicate())
118            && self.object.matches_object(triple.object())
119    }
120
121    /// Get pattern selectivity estimate (0.0 = most selective, 1.0 = least selective)
122    pub fn selectivity_estimate(&self) -> f64 {
123        let subject_selectivity = self.subject.selectivity_factor();
124        let predicate_selectivity = self.predicate.selectivity_factor();
125        let object_selectivity = self.object.selectivity_factor();
126
127        // Combined selectivity using independence assumption
128        subject_selectivity * predicate_selectivity * object_selectivity
129    }
130}
131
132impl UnifiedTermPattern {
133    /// Convert to algebra TermPattern
134    pub fn to_algebra_term_pattern(&self) -> Result<AlgebraTermPattern, OxirsError> {
135        match self {
136            UnifiedTermPattern::NamedNode(nn) => Ok(AlgebraTermPattern::NamedNode(nn.clone())),
137            UnifiedTermPattern::BlankNode(bn) => Ok(AlgebraTermPattern::BlankNode(bn.clone())),
138            UnifiedTermPattern::Literal(lit) => Ok(AlgebraTermPattern::Literal(lit.clone())),
139            UnifiedTermPattern::Variable(var) => Ok(AlgebraTermPattern::Variable(var.clone())),
140            UnifiedTermPattern::Wildcard => Err(OxirsError::Query(
141                "Wildcard patterns cannot be converted to algebra representation".to_string(),
142            )),
143        }
144    }
145
146    /// Convert to model SubjectPattern
147    pub fn to_model_subject_pattern(&self) -> Option<SubjectPattern> {
148        match self {
149            UnifiedTermPattern::NamedNode(nn) => Some(SubjectPattern::NamedNode(nn.clone())),
150            UnifiedTermPattern::BlankNode(bn) => Some(SubjectPattern::BlankNode(bn.clone())),
151            UnifiedTermPattern::Variable(var) => Some(SubjectPattern::Variable(var.clone())),
152            UnifiedTermPattern::Literal(_) | UnifiedTermPattern::Wildcard => None,
153        }
154    }
155
156    /// Convert to model PredicatePattern
157    pub fn to_model_predicate_pattern(&self) -> Option<PredicatePattern> {
158        match self {
159            UnifiedTermPattern::NamedNode(nn) => Some(PredicatePattern::NamedNode(nn.clone())),
160            UnifiedTermPattern::Variable(var) => Some(PredicatePattern::Variable(var.clone())),
161            UnifiedTermPattern::BlankNode(_)
162            | UnifiedTermPattern::Literal(_)
163            | UnifiedTermPattern::Wildcard => None,
164        }
165    }
166
167    /// Convert to model ObjectPattern
168    pub fn to_model_object_pattern(&self) -> Option<ObjectPattern> {
169        match self {
170            UnifiedTermPattern::NamedNode(nn) => Some(ObjectPattern::NamedNode(nn.clone())),
171            UnifiedTermPattern::BlankNode(bn) => Some(ObjectPattern::BlankNode(bn.clone())),
172            UnifiedTermPattern::Literal(lit) => Some(ObjectPattern::Literal(lit.clone())),
173            UnifiedTermPattern::Variable(var) => Some(ObjectPattern::Variable(var.clone())),
174            UnifiedTermPattern::Wildcard => None,
175        }
176    }
177
178    /// Create from algebra TermPattern
179    pub fn from_algebra_term(term: &AlgebraTermPattern) -> Self {
180        match term {
181            AlgebraTermPattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
182            AlgebraTermPattern::BlankNode(bn) => UnifiedTermPattern::BlankNode(bn.clone()),
183            AlgebraTermPattern::Literal(lit) => UnifiedTermPattern::Literal(lit.clone()),
184            AlgebraTermPattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
185        }
186    }
187
188    /// Create from model SubjectPattern
189    pub fn from_model_subject(subject: &SubjectPattern) -> Self {
190        match subject {
191            SubjectPattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
192            SubjectPattern::BlankNode(bn) => UnifiedTermPattern::BlankNode(bn.clone()),
193            SubjectPattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
194        }
195    }
196
197    /// Create from model PredicatePattern
198    pub fn from_model_predicate(predicate: &PredicatePattern) -> Self {
199        match predicate {
200            PredicatePattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
201            PredicatePattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
202        }
203    }
204
205    /// Create from model ObjectPattern
206    pub fn from_model_object(object: &ObjectPattern) -> Self {
207        match object {
208            ObjectPattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
209            ObjectPattern::BlankNode(bn) => UnifiedTermPattern::BlankNode(bn.clone()),
210            ObjectPattern::Literal(lit) => UnifiedTermPattern::Literal(lit.clone()),
211            ObjectPattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
212        }
213    }
214
215    /// Check if this pattern matches a subject
216    pub fn matches_subject(&self, subject: &Subject) -> bool {
217        match (self, subject) {
218            (UnifiedTermPattern::NamedNode(pn), Subject::NamedNode(sn)) => pn == sn,
219            (UnifiedTermPattern::BlankNode(pb), Subject::BlankNode(sb)) => pb == sb,
220            (UnifiedTermPattern::Variable(_), _) | (UnifiedTermPattern::Wildcard, _) => true,
221            _ => false,
222        }
223    }
224
225    /// Check if this pattern matches a predicate
226    pub fn matches_predicate(&self, predicate: &Predicate) -> bool {
227        match (self, predicate) {
228            (UnifiedTermPattern::NamedNode(pn), Predicate::NamedNode(sn)) => pn == sn,
229            (UnifiedTermPattern::Variable(_), _) | (UnifiedTermPattern::Wildcard, _) => true,
230            _ => false,
231        }
232    }
233
234    /// Check if this pattern matches an object
235    pub fn matches_object(&self, object: &Object) -> bool {
236        match (self, object) {
237            (UnifiedTermPattern::NamedNode(pn), Object::NamedNode(on)) => pn == on,
238            (UnifiedTermPattern::BlankNode(pb), Object::BlankNode(ob)) => pb == ob,
239            (UnifiedTermPattern::Literal(pl), Object::Literal(ol)) => pl == ol,
240            (UnifiedTermPattern::Variable(_), _) | (UnifiedTermPattern::Wildcard, _) => true,
241            _ => false,
242        }
243    }
244
245    /// Get selectivity factor for cost estimation
246    pub fn selectivity_factor(&self) -> f64 {
247        match self {
248            UnifiedTermPattern::NamedNode(_) => 0.001, // Very selective
249            UnifiedTermPattern::BlankNode(_) => 0.01,  // Selective
250            UnifiedTermPattern::Literal(_) => 0.001,   // Very selective
251            UnifiedTermPattern::Variable(_) => 1.0,    // Not selective
252            UnifiedTermPattern::Wildcard => 1.0,       // Not selective
253        }
254    }
255}
256
257/// Pattern conversion utilities
258pub struct PatternConverter;
259
260impl PatternConverter {
261    /// Convert a vector of algebra patterns to model patterns
262    pub fn algebra_to_model_patterns(patterns: &[AlgebraTriplePattern]) -> Vec<TriplePattern> {
263        patterns
264            .iter()
265            .map(|p| UnifiedTriplePattern::from_algebra_pattern(p).to_model_pattern())
266            .collect()
267    }
268
269    /// Convert a vector of model patterns to algebra patterns
270    pub fn model_to_algebra_patterns(
271        patterns: &[TriplePattern],
272    ) -> Result<Vec<AlgebraTriplePattern>, OxirsError> {
273        patterns
274            .iter()
275            .map(|p| UnifiedTriplePattern::from_model_pattern(p).to_algebra_pattern())
276            .collect()
277    }
278
279    /// Extract all variables from a set of algebra patterns
280    pub fn extract_variables_from_algebra(patterns: &[AlgebraTriplePattern]) -> HashSet<Variable> {
281        patterns
282            .iter()
283            .flat_map(|p| UnifiedTriplePattern::from_algebra_pattern(p).extract_variables())
284            .collect()
285    }
286
287    /// Extract all variables from a set of model patterns
288    pub fn extract_variables_from_model(patterns: &[TriplePattern]) -> HashSet<Variable> {
289        patterns
290            .iter()
291            .flat_map(|p| UnifiedTriplePattern::from_model_pattern(p).extract_variables())
292            .collect()
293    }
294
295    /// Estimate combined selectivity for a set of patterns
296    pub fn estimate_pattern_selectivity(patterns: &[UnifiedTriplePattern]) -> f64 {
297        if patterns.is_empty() {
298            return 1.0;
299        }
300
301        patterns
302            .iter()
303            .map(|p| p.selectivity_estimate())
304            .fold(1.0, |acc, s| acc * s)
305    }
306}
307
308/// Query optimization utilities using unified patterns
309pub struct PatternOptimizer;
310
311impl PatternOptimizer {
312    /// Reorder patterns for optimal execution based on selectivity
313    pub fn optimize_pattern_order(patterns: &[UnifiedTriplePattern]) -> Vec<UnifiedTriplePattern> {
314        let mut sorted_patterns = patterns.to_vec();
315
316        // Sort by selectivity (most selective first)
317        sorted_patterns.sort_by(|a, b| {
318            a.selectivity_estimate()
319                .partial_cmp(&b.selectivity_estimate())
320                .unwrap_or(std::cmp::Ordering::Equal)
321        });
322
323        sorted_patterns
324    }
325
326    /// Find optimal join order for patterns
327    pub fn optimize_join_order(patterns: &[UnifiedTriplePattern]) -> Vec<usize> {
328        if patterns.is_empty() {
329            return Vec::new();
330        }
331
332        // Simple greedy algorithm: start with most selective pattern
333        let mut remaining: Vec<usize> = (0..patterns.len()).collect();
334        let mut order = Vec::new();
335
336        // Find most selective pattern as starting point
337        if let Some(min_idx) = remaining
338            .iter()
339            .min_by(|&&a, &&b| {
340                patterns[a]
341                    .selectivity_estimate()
342                    .partial_cmp(&patterns[b].selectivity_estimate())
343                    .unwrap_or(std::cmp::Ordering::Equal)
344            })
345            .copied()
346        {
347            order.push(min_idx);
348            remaining.retain(|&x| x != min_idx);
349        }
350
351        // Greedily add patterns that share most variables with already selected patterns
352        while !remaining.is_empty() {
353            let selected_vars: HashSet<Variable> = order
354                .iter()
355                .flat_map(|&i| patterns[i].extract_variables())
356                .collect();
357
358            if let Some(best_idx) = remaining
359                .iter()
360                .max_by_key(|&&i| {
361                    let pattern_vars = patterns[i].extract_variables();
362                    pattern_vars.intersection(&selected_vars).count()
363                })
364                .copied()
365            {
366                order.push(best_idx);
367                remaining.retain(|&x| x != best_idx);
368            } else {
369                // Fallback: add remaining patterns in selectivity order
370                order.extend(remaining);
371                break;
372            }
373        }
374
375        order
376    }
377}
378
379#[cfg(test)]
380mod tests {
381    use super::*;
382
383    #[test]
384    fn test_unified_pattern_conversion() {
385        // Create an algebra pattern
386        let algebra_pattern = AlgebraTriplePattern::new(
387            AlgebraTermPattern::Variable(Variable::new("s").unwrap()),
388            AlgebraTermPattern::NamedNode(NamedNode::new("http://example.org/pred").unwrap()),
389            AlgebraTermPattern::Literal(Literal::new("test")),
390        );
391
392        // Convert to unified pattern
393        let unified = UnifiedTriplePattern::from_algebra_pattern(&algebra_pattern);
394
395        // Convert back to algebra pattern
396        let converted_back = unified.to_algebra_pattern().unwrap();
397
398        assert_eq!(algebra_pattern, converted_back);
399    }
400
401    #[test]
402    fn test_pattern_selectivity() {
403        let patterns = vec![
404            UnifiedTriplePattern::new(
405                UnifiedTermPattern::Variable(Variable::new("s").unwrap()),
406                UnifiedTermPattern::Variable(Variable::new("p").unwrap()),
407                UnifiedTermPattern::Variable(Variable::new("o").unwrap()),
408            ),
409            UnifiedTriplePattern::new(
410                UnifiedTermPattern::NamedNode(NamedNode::new("http://example.org/s").unwrap()),
411                UnifiedTermPattern::NamedNode(NamedNode::new("http://example.org/p").unwrap()),
412                UnifiedTermPattern::Variable(Variable::new("o").unwrap()),
413            ),
414        ];
415
416        // Second pattern should be more selective
417        assert!(patterns[1].selectivity_estimate() < patterns[0].selectivity_estimate());
418    }
419
420    #[test]
421    fn test_pattern_optimization() {
422        let patterns = vec![
423            UnifiedTriplePattern::new(
424                UnifiedTermPattern::Variable(Variable::new("s").unwrap()),
425                UnifiedTermPattern::Variable(Variable::new("p").unwrap()),
426                UnifiedTermPattern::Variable(Variable::new("o").unwrap()),
427            ),
428            UnifiedTriplePattern::new(
429                UnifiedTermPattern::NamedNode(NamedNode::new("http://example.org/s").unwrap()),
430                UnifiedTermPattern::NamedNode(NamedNode::new("http://example.org/p").unwrap()),
431                UnifiedTermPattern::Variable(Variable::new("o").unwrap()),
432            ),
433        ];
434
435        let optimized = PatternOptimizer::optimize_pattern_order(&patterns);
436
437        // More selective pattern should come first
438        assert_eq!(optimized[0], patterns[1]);
439        assert_eq!(optimized[1], patterns[0]);
440    }
441}