Skip to main content

oxirs_core/query/
pattern_unification.rs

1//! Pattern type unification for OxiRS query processing
2//!
3//! This module provides utilities to unify different pattern representations
4//! used across the algebra and model systems, resolving type conflicts and
5//! enabling seamless interoperability.
6
7use crate::model::*;
8use crate::query::algebra::{AlgebraTriplePattern, TermPattern as AlgebraTermPattern};
9use crate::OxirsError;
10use std::collections::HashSet;
11
12/// Unified pattern representation that can handle both algebra and model patterns
13#[derive(Debug, Clone, PartialEq, Eq, Hash)]
14pub struct UnifiedTriplePattern {
15    /// Subject pattern
16    pub subject: UnifiedTermPattern,
17    /// Predicate pattern  
18    pub predicate: UnifiedTermPattern,
19    /// Object pattern
20    pub object: UnifiedTermPattern,
21}
22
23/// Unified term pattern that works with both systems
24#[derive(Debug, Clone, PartialEq, Eq, Hash)]
25pub enum UnifiedTermPattern {
26    /// Named node (IRI)
27    NamedNode(NamedNode),
28    /// Blank node
29    BlankNode(BlankNode),
30    /// Literal value
31    Literal(Literal),
32    /// Variable
33    Variable(Variable),
34    /// Wildcard (matches anything)
35    Wildcard,
36}
37
38impl UnifiedTriplePattern {
39    /// Create a new unified triple pattern
40    pub fn new(
41        subject: UnifiedTermPattern,
42        predicate: UnifiedTermPattern,
43        object: UnifiedTermPattern,
44    ) -> Self {
45        Self {
46            subject,
47            predicate,
48            object,
49        }
50    }
51
52    /// Convert to algebra TriplePattern
53    pub fn to_algebra_pattern(&self) -> Result<AlgebraTriplePattern, OxirsError> {
54        let subject = self.subject.to_algebra_term_pattern()?;
55        let predicate = self.predicate.to_algebra_term_pattern()?;
56        let object = self.object.to_algebra_term_pattern()?;
57
58        Ok(AlgebraTriplePattern::new(subject, predicate, object))
59    }
60
61    /// Convert to model TriplePattern
62    pub fn to_model_pattern(&self) -> TriplePattern {
63        let subject = self.subject.to_model_subject_pattern();
64        let predicate = self.predicate.to_model_predicate_pattern();
65        let object = self.object.to_model_object_pattern();
66
67        TriplePattern::new(subject, predicate, object)
68    }
69
70    /// Create from algebra TriplePattern
71    pub fn from_algebra_pattern(pattern: &AlgebraTriplePattern) -> Self {
72        Self {
73            subject: UnifiedTermPattern::from_algebra_term(&pattern.subject),
74            predicate: UnifiedTermPattern::from_algebra_term(&pattern.predicate),
75            object: UnifiedTermPattern::from_algebra_term(&pattern.object),
76        }
77    }
78
79    /// Create from model TriplePattern
80    pub fn from_model_pattern(pattern: &TriplePattern) -> Self {
81        Self {
82            subject: pattern
83                .subject()
84                .map(UnifiedTermPattern::from_model_subject)
85                .unwrap_or(UnifiedTermPattern::Wildcard),
86            predicate: pattern
87                .predicate()
88                .map(UnifiedTermPattern::from_model_predicate)
89                .unwrap_or(UnifiedTermPattern::Wildcard),
90            object: pattern
91                .object()
92                .map(UnifiedTermPattern::from_model_object)
93                .unwrap_or(UnifiedTermPattern::Wildcard),
94        }
95    }
96
97    /// Extract all variables from this pattern
98    pub fn extract_variables(&self) -> HashSet<Variable> {
99        let mut vars = HashSet::new();
100
101        if let UnifiedTermPattern::Variable(v) = &self.subject {
102            vars.insert(v.clone());
103        }
104        if let UnifiedTermPattern::Variable(v) = &self.predicate {
105            vars.insert(v.clone());
106        }
107        if let UnifiedTermPattern::Variable(v) = &self.object {
108            vars.insert(v.clone());
109        }
110
111        vars
112    }
113
114    /// Check if this pattern matches a concrete triple
115    pub fn matches(&self, triple: &Triple) -> bool {
116        self.subject.matches_subject(triple.subject())
117            && self.predicate.matches_predicate(triple.predicate())
118            && self.object.matches_object(triple.object())
119    }
120
121    /// Get pattern selectivity estimate (0.0 = most selective, 1.0 = least selective)
122    pub fn selectivity_estimate(&self) -> f64 {
123        let subject_selectivity = self.subject.selectivity_factor();
124        let predicate_selectivity = self.predicate.selectivity_factor();
125        let object_selectivity = self.object.selectivity_factor();
126
127        // Combined selectivity using independence assumption
128        subject_selectivity * predicate_selectivity * object_selectivity
129    }
130}
131
132impl UnifiedTermPattern {
133    /// Convert to algebra TermPattern
134    pub fn to_algebra_term_pattern(&self) -> Result<AlgebraTermPattern, OxirsError> {
135        match self {
136            UnifiedTermPattern::NamedNode(nn) => Ok(AlgebraTermPattern::NamedNode(nn.clone())),
137            UnifiedTermPattern::BlankNode(bn) => Ok(AlgebraTermPattern::BlankNode(bn.clone())),
138            UnifiedTermPattern::Literal(lit) => Ok(AlgebraTermPattern::Literal(lit.clone())),
139            UnifiedTermPattern::Variable(var) => Ok(AlgebraTermPattern::Variable(var.clone())),
140            UnifiedTermPattern::Wildcard => Err(OxirsError::Query(
141                "Wildcard patterns cannot be converted to algebra representation".to_string(),
142            )),
143        }
144    }
145
146    /// Convert to model SubjectPattern
147    pub fn to_model_subject_pattern(&self) -> Option<SubjectPattern> {
148        match self {
149            UnifiedTermPattern::NamedNode(nn) => Some(SubjectPattern::NamedNode(nn.clone())),
150            UnifiedTermPattern::BlankNode(bn) => Some(SubjectPattern::BlankNode(bn.clone())),
151            UnifiedTermPattern::Variable(var) => Some(SubjectPattern::Variable(var.clone())),
152            UnifiedTermPattern::Literal(_) | UnifiedTermPattern::Wildcard => None,
153        }
154    }
155
156    /// Convert to model PredicatePattern
157    pub fn to_model_predicate_pattern(&self) -> Option<PredicatePattern> {
158        match self {
159            UnifiedTermPattern::NamedNode(nn) => Some(PredicatePattern::NamedNode(nn.clone())),
160            UnifiedTermPattern::Variable(var) => Some(PredicatePattern::Variable(var.clone())),
161            UnifiedTermPattern::BlankNode(_)
162            | UnifiedTermPattern::Literal(_)
163            | UnifiedTermPattern::Wildcard => None,
164        }
165    }
166
167    /// Convert to model ObjectPattern
168    pub fn to_model_object_pattern(&self) -> Option<ObjectPattern> {
169        match self {
170            UnifiedTermPattern::NamedNode(nn) => Some(ObjectPattern::NamedNode(nn.clone())),
171            UnifiedTermPattern::BlankNode(bn) => Some(ObjectPattern::BlankNode(bn.clone())),
172            UnifiedTermPattern::Literal(lit) => Some(ObjectPattern::Literal(lit.clone())),
173            UnifiedTermPattern::Variable(var) => Some(ObjectPattern::Variable(var.clone())),
174            UnifiedTermPattern::Wildcard => None,
175        }
176    }
177
178    /// Create from algebra TermPattern
179    pub fn from_algebra_term(term: &AlgebraTermPattern) -> Self {
180        match term {
181            AlgebraTermPattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
182            AlgebraTermPattern::BlankNode(bn) => UnifiedTermPattern::BlankNode(bn.clone()),
183            AlgebraTermPattern::Literal(lit) => UnifiedTermPattern::Literal(lit.clone()),
184            AlgebraTermPattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
185            AlgebraTermPattern::QuotedTriple(_) => {
186                panic!("RDF-star quoted triples not yet supported in pattern unification")
187            }
188        }
189    }
190
191    /// Create from model SubjectPattern
192    pub fn from_model_subject(subject: &SubjectPattern) -> Self {
193        match subject {
194            SubjectPattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
195            SubjectPattern::BlankNode(bn) => UnifiedTermPattern::BlankNode(bn.clone()),
196            SubjectPattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
197        }
198    }
199
200    /// Create from model PredicatePattern
201    pub fn from_model_predicate(predicate: &PredicatePattern) -> Self {
202        match predicate {
203            PredicatePattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
204            PredicatePattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
205        }
206    }
207
208    /// Create from model ObjectPattern
209    pub fn from_model_object(object: &ObjectPattern) -> Self {
210        match object {
211            ObjectPattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
212            ObjectPattern::BlankNode(bn) => UnifiedTermPattern::BlankNode(bn.clone()),
213            ObjectPattern::Literal(lit) => UnifiedTermPattern::Literal(lit.clone()),
214            ObjectPattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
215        }
216    }
217
218    /// Check if this pattern matches a subject
219    pub fn matches_subject(&self, subject: &Subject) -> bool {
220        match (self, subject) {
221            (UnifiedTermPattern::NamedNode(pn), Subject::NamedNode(sn)) => pn == sn,
222            (UnifiedTermPattern::BlankNode(pb), Subject::BlankNode(sb)) => pb == sb,
223            (UnifiedTermPattern::Variable(_), _) | (UnifiedTermPattern::Wildcard, _) => true,
224            _ => false,
225        }
226    }
227
228    /// Check if this pattern matches a predicate
229    pub fn matches_predicate(&self, predicate: &Predicate) -> bool {
230        match (self, predicate) {
231            (UnifiedTermPattern::NamedNode(pn), Predicate::NamedNode(sn)) => pn == sn,
232            (UnifiedTermPattern::Variable(_), _) | (UnifiedTermPattern::Wildcard, _) => true,
233            _ => false,
234        }
235    }
236
237    /// Check if this pattern matches an object
238    pub fn matches_object(&self, object: &Object) -> bool {
239        match (self, object) {
240            (UnifiedTermPattern::NamedNode(pn), Object::NamedNode(on)) => pn == on,
241            (UnifiedTermPattern::BlankNode(pb), Object::BlankNode(ob)) => pb == ob,
242            (UnifiedTermPattern::Literal(pl), Object::Literal(ol)) => pl == ol,
243            (UnifiedTermPattern::Variable(_), _) | (UnifiedTermPattern::Wildcard, _) => true,
244            _ => false,
245        }
246    }
247
248    /// Get selectivity factor for cost estimation
249    pub fn selectivity_factor(&self) -> f64 {
250        match self {
251            UnifiedTermPattern::NamedNode(_) => 0.001, // Very selective
252            UnifiedTermPattern::BlankNode(_) => 0.01,  // Selective
253            UnifiedTermPattern::Literal(_) => 0.001,   // Very selective
254            UnifiedTermPattern::Variable(_) => 1.0,    // Not selective
255            UnifiedTermPattern::Wildcard => 1.0,       // Not selective
256        }
257    }
258}
259
260/// Pattern conversion utilities
261pub struct PatternConverter;
262
263impl PatternConverter {
264    /// Convert a vector of algebra patterns to model patterns
265    pub fn algebra_to_model_patterns(patterns: &[AlgebraTriplePattern]) -> Vec<TriplePattern> {
266        patterns
267            .iter()
268            .map(|p| UnifiedTriplePattern::from_algebra_pattern(p).to_model_pattern())
269            .collect()
270    }
271
272    /// Convert a vector of model patterns to algebra patterns
273    pub fn model_to_algebra_patterns(
274        patterns: &[TriplePattern],
275    ) -> Result<Vec<AlgebraTriplePattern>, OxirsError> {
276        patterns
277            .iter()
278            .map(|p| UnifiedTriplePattern::from_model_pattern(p).to_algebra_pattern())
279            .collect()
280    }
281
282    /// Extract all variables from a set of algebra patterns
283    pub fn extract_variables_from_algebra(patterns: &[AlgebraTriplePattern]) -> HashSet<Variable> {
284        patterns
285            .iter()
286            .flat_map(|p| UnifiedTriplePattern::from_algebra_pattern(p).extract_variables())
287            .collect()
288    }
289
290    /// Extract all variables from a set of model patterns
291    pub fn extract_variables_from_model(patterns: &[TriplePattern]) -> HashSet<Variable> {
292        patterns
293            .iter()
294            .flat_map(|p| UnifiedTriplePattern::from_model_pattern(p).extract_variables())
295            .collect()
296    }
297
298    /// Estimate combined selectivity for a set of patterns
299    pub fn estimate_pattern_selectivity(patterns: &[UnifiedTriplePattern]) -> f64 {
300        if patterns.is_empty() {
301            return 1.0;
302        }
303
304        patterns
305            .iter()
306            .map(|p| p.selectivity_estimate())
307            .fold(1.0, |acc, s| acc * s)
308    }
309}
310
311/// Query optimization utilities using unified patterns
312pub struct PatternOptimizer;
313
314impl PatternOptimizer {
315    /// Reorder patterns for optimal execution based on selectivity
316    pub fn optimize_pattern_order(patterns: &[UnifiedTriplePattern]) -> Vec<UnifiedTriplePattern> {
317        let mut sorted_patterns = patterns.to_vec();
318
319        // Sort by selectivity (most selective first)
320        sorted_patterns.sort_by(|a, b| {
321            a.selectivity_estimate()
322                .partial_cmp(&b.selectivity_estimate())
323                .unwrap_or(std::cmp::Ordering::Equal)
324        });
325
326        sorted_patterns
327    }
328
329    /// Find optimal join order for patterns
330    pub fn optimize_join_order(patterns: &[UnifiedTriplePattern]) -> Vec<usize> {
331        if patterns.is_empty() {
332            return Vec::new();
333        }
334
335        // Simple greedy algorithm: start with most selective pattern
336        let mut remaining: Vec<usize> = (0..patterns.len()).collect();
337        let mut order = Vec::new();
338
339        // Find most selective pattern as starting point
340        if let Some(min_idx) = remaining
341            .iter()
342            .min_by(|&&a, &&b| {
343                patterns[a]
344                    .selectivity_estimate()
345                    .partial_cmp(&patterns[b].selectivity_estimate())
346                    .unwrap_or(std::cmp::Ordering::Equal)
347            })
348            .copied()
349        {
350            order.push(min_idx);
351            remaining.retain(|&x| x != min_idx);
352        }
353
354        // Greedily add patterns that share most variables with already selected patterns
355        while !remaining.is_empty() {
356            let selected_vars: HashSet<Variable> = order
357                .iter()
358                .flat_map(|&i| patterns[i].extract_variables())
359                .collect();
360
361            if let Some(best_idx) = remaining
362                .iter()
363                .max_by_key(|&&i| {
364                    let pattern_vars = patterns[i].extract_variables();
365                    pattern_vars.intersection(&selected_vars).count()
366                })
367                .copied()
368            {
369                order.push(best_idx);
370                remaining.retain(|&x| x != best_idx);
371            } else {
372                // Fallback: add remaining patterns in selectivity order
373                order.extend(remaining);
374                break;
375            }
376        }
377
378        order
379    }
380}
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385
386    #[test]
387    fn test_unified_pattern_conversion() {
388        // Create an algebra pattern
389        let algebra_pattern = AlgebraTriplePattern::new(
390            AlgebraTermPattern::Variable(Variable::new("s").expect("valid variable name")),
391            AlgebraTermPattern::NamedNode(
392                NamedNode::new("http://example.org/pred").expect("valid IRI"),
393            ),
394            AlgebraTermPattern::Literal(Literal::new("test")),
395        );
396
397        // Convert to unified pattern
398        let unified = UnifiedTriplePattern::from_algebra_pattern(&algebra_pattern);
399
400        // Convert back to algebra pattern
401        let converted_back = unified
402            .to_algebra_pattern()
403            .expect("operation should succeed");
404
405        assert_eq!(algebra_pattern, converted_back);
406    }
407
408    #[test]
409    fn test_pattern_selectivity() {
410        let patterns = [
411            UnifiedTriplePattern::new(
412                UnifiedTermPattern::Variable(Variable::new("s").expect("valid variable name")),
413                UnifiedTermPattern::Variable(Variable::new("p").expect("valid variable name")),
414                UnifiedTermPattern::Variable(Variable::new("o").expect("valid variable name")),
415            ),
416            UnifiedTriplePattern::new(
417                UnifiedTermPattern::NamedNode(
418                    NamedNode::new("http://example.org/s").expect("valid IRI"),
419                ),
420                UnifiedTermPattern::NamedNode(
421                    NamedNode::new("http://example.org/p").expect("valid IRI"),
422                ),
423                UnifiedTermPattern::Variable(Variable::new("o").expect("valid variable name")),
424            ),
425        ];
426
427        // Second pattern should be more selective
428        assert!(patterns[1].selectivity_estimate() < patterns[0].selectivity_estimate());
429    }
430
431    #[test]
432    fn test_pattern_optimization() {
433        let patterns = vec![
434            UnifiedTriplePattern::new(
435                UnifiedTermPattern::Variable(Variable::new("s").expect("valid variable name")),
436                UnifiedTermPattern::Variable(Variable::new("p").expect("valid variable name")),
437                UnifiedTermPattern::Variable(Variable::new("o").expect("valid variable name")),
438            ),
439            UnifiedTriplePattern::new(
440                UnifiedTermPattern::NamedNode(
441                    NamedNode::new("http://example.org/s").expect("valid IRI"),
442                ),
443                UnifiedTermPattern::NamedNode(
444                    NamedNode::new("http://example.org/p").expect("valid IRI"),
445                ),
446                UnifiedTermPattern::Variable(Variable::new("o").expect("valid variable name")),
447            ),
448        ];
449
450        let optimized = PatternOptimizer::optimize_pattern_order(&patterns);
451
452        // More selective pattern should come first
453        assert_eq!(optimized[0], patterns[1]);
454        assert_eq!(optimized[1], patterns[0]);
455    }
456}