1use crate::model::*;
8use crate::query::algebra::{AlgebraTriplePattern, TermPattern as AlgebraTermPattern};
9use crate::OxirsError;
10use std::collections::HashSet;
11
12#[derive(Debug, Clone, PartialEq, Eq, Hash)]
14pub struct UnifiedTriplePattern {
15 pub subject: UnifiedTermPattern,
17 pub predicate: UnifiedTermPattern,
19 pub object: UnifiedTermPattern,
21}
22
23#[derive(Debug, Clone, PartialEq, Eq, Hash)]
25pub enum UnifiedTermPattern {
26 NamedNode(NamedNode),
28 BlankNode(BlankNode),
30 Literal(Literal),
32 Variable(Variable),
34 Wildcard,
36}
37
38impl UnifiedTriplePattern {
39 pub fn new(
41 subject: UnifiedTermPattern,
42 predicate: UnifiedTermPattern,
43 object: UnifiedTermPattern,
44 ) -> Self {
45 Self {
46 subject,
47 predicate,
48 object,
49 }
50 }
51
52 pub fn to_algebra_pattern(&self) -> Result<AlgebraTriplePattern, OxirsError> {
54 let subject = self.subject.to_algebra_term_pattern()?;
55 let predicate = self.predicate.to_algebra_term_pattern()?;
56 let object = self.object.to_algebra_term_pattern()?;
57
58 Ok(AlgebraTriplePattern::new(subject, predicate, object))
59 }
60
61 pub fn to_model_pattern(&self) -> TriplePattern {
63 let subject = self.subject.to_model_subject_pattern();
64 let predicate = self.predicate.to_model_predicate_pattern();
65 let object = self.object.to_model_object_pattern();
66
67 TriplePattern::new(subject, predicate, object)
68 }
69
70 pub fn from_algebra_pattern(pattern: &AlgebraTriplePattern) -> Self {
72 Self {
73 subject: UnifiedTermPattern::from_algebra_term(&pattern.subject),
74 predicate: UnifiedTermPattern::from_algebra_term(&pattern.predicate),
75 object: UnifiedTermPattern::from_algebra_term(&pattern.object),
76 }
77 }
78
79 pub fn from_model_pattern(pattern: &TriplePattern) -> Self {
81 Self {
82 subject: pattern
83 .subject()
84 .map(UnifiedTermPattern::from_model_subject)
85 .unwrap_or(UnifiedTermPattern::Wildcard),
86 predicate: pattern
87 .predicate()
88 .map(UnifiedTermPattern::from_model_predicate)
89 .unwrap_or(UnifiedTermPattern::Wildcard),
90 object: pattern
91 .object()
92 .map(UnifiedTermPattern::from_model_object)
93 .unwrap_or(UnifiedTermPattern::Wildcard),
94 }
95 }
96
97 pub fn extract_variables(&self) -> HashSet<Variable> {
99 let mut vars = HashSet::new();
100
101 if let UnifiedTermPattern::Variable(v) = &self.subject {
102 vars.insert(v.clone());
103 }
104 if let UnifiedTermPattern::Variable(v) = &self.predicate {
105 vars.insert(v.clone());
106 }
107 if let UnifiedTermPattern::Variable(v) = &self.object {
108 vars.insert(v.clone());
109 }
110
111 vars
112 }
113
114 pub fn matches(&self, triple: &Triple) -> bool {
116 self.subject.matches_subject(triple.subject())
117 && self.predicate.matches_predicate(triple.predicate())
118 && self.object.matches_object(triple.object())
119 }
120
121 pub fn selectivity_estimate(&self) -> f64 {
123 let subject_selectivity = self.subject.selectivity_factor();
124 let predicate_selectivity = self.predicate.selectivity_factor();
125 let object_selectivity = self.object.selectivity_factor();
126
127 subject_selectivity * predicate_selectivity * object_selectivity
129 }
130}
131
132impl UnifiedTermPattern {
133 pub fn to_algebra_term_pattern(&self) -> Result<AlgebraTermPattern, OxirsError> {
135 match self {
136 UnifiedTermPattern::NamedNode(nn) => Ok(AlgebraTermPattern::NamedNode(nn.clone())),
137 UnifiedTermPattern::BlankNode(bn) => Ok(AlgebraTermPattern::BlankNode(bn.clone())),
138 UnifiedTermPattern::Literal(lit) => Ok(AlgebraTermPattern::Literal(lit.clone())),
139 UnifiedTermPattern::Variable(var) => Ok(AlgebraTermPattern::Variable(var.clone())),
140 UnifiedTermPattern::Wildcard => Err(OxirsError::Query(
141 "Wildcard patterns cannot be converted to algebra representation".to_string(),
142 )),
143 }
144 }
145
146 pub fn to_model_subject_pattern(&self) -> Option<SubjectPattern> {
148 match self {
149 UnifiedTermPattern::NamedNode(nn) => Some(SubjectPattern::NamedNode(nn.clone())),
150 UnifiedTermPattern::BlankNode(bn) => Some(SubjectPattern::BlankNode(bn.clone())),
151 UnifiedTermPattern::Variable(var) => Some(SubjectPattern::Variable(var.clone())),
152 UnifiedTermPattern::Literal(_) | UnifiedTermPattern::Wildcard => None,
153 }
154 }
155
156 pub fn to_model_predicate_pattern(&self) -> Option<PredicatePattern> {
158 match self {
159 UnifiedTermPattern::NamedNode(nn) => Some(PredicatePattern::NamedNode(nn.clone())),
160 UnifiedTermPattern::Variable(var) => Some(PredicatePattern::Variable(var.clone())),
161 UnifiedTermPattern::BlankNode(_)
162 | UnifiedTermPattern::Literal(_)
163 | UnifiedTermPattern::Wildcard => None,
164 }
165 }
166
167 pub fn to_model_object_pattern(&self) -> Option<ObjectPattern> {
169 match self {
170 UnifiedTermPattern::NamedNode(nn) => Some(ObjectPattern::NamedNode(nn.clone())),
171 UnifiedTermPattern::BlankNode(bn) => Some(ObjectPattern::BlankNode(bn.clone())),
172 UnifiedTermPattern::Literal(lit) => Some(ObjectPattern::Literal(lit.clone())),
173 UnifiedTermPattern::Variable(var) => Some(ObjectPattern::Variable(var.clone())),
174 UnifiedTermPattern::Wildcard => None,
175 }
176 }
177
178 pub fn from_algebra_term(term: &AlgebraTermPattern) -> Self {
180 match term {
181 AlgebraTermPattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
182 AlgebraTermPattern::BlankNode(bn) => UnifiedTermPattern::BlankNode(bn.clone()),
183 AlgebraTermPattern::Literal(lit) => UnifiedTermPattern::Literal(lit.clone()),
184 AlgebraTermPattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
185 AlgebraTermPattern::QuotedTriple(_) => {
186 panic!("RDF-star quoted triples not yet supported in pattern unification")
187 }
188 }
189 }
190
191 pub fn from_model_subject(subject: &SubjectPattern) -> Self {
193 match subject {
194 SubjectPattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
195 SubjectPattern::BlankNode(bn) => UnifiedTermPattern::BlankNode(bn.clone()),
196 SubjectPattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
197 }
198 }
199
200 pub fn from_model_predicate(predicate: &PredicatePattern) -> Self {
202 match predicate {
203 PredicatePattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
204 PredicatePattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
205 }
206 }
207
208 pub fn from_model_object(object: &ObjectPattern) -> Self {
210 match object {
211 ObjectPattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
212 ObjectPattern::BlankNode(bn) => UnifiedTermPattern::BlankNode(bn.clone()),
213 ObjectPattern::Literal(lit) => UnifiedTermPattern::Literal(lit.clone()),
214 ObjectPattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
215 }
216 }
217
218 pub fn matches_subject(&self, subject: &Subject) -> bool {
220 match (self, subject) {
221 (UnifiedTermPattern::NamedNode(pn), Subject::NamedNode(sn)) => pn == sn,
222 (UnifiedTermPattern::BlankNode(pb), Subject::BlankNode(sb)) => pb == sb,
223 (UnifiedTermPattern::Variable(_), _) | (UnifiedTermPattern::Wildcard, _) => true,
224 _ => false,
225 }
226 }
227
228 pub fn matches_predicate(&self, predicate: &Predicate) -> bool {
230 match (self, predicate) {
231 (UnifiedTermPattern::NamedNode(pn), Predicate::NamedNode(sn)) => pn == sn,
232 (UnifiedTermPattern::Variable(_), _) | (UnifiedTermPattern::Wildcard, _) => true,
233 _ => false,
234 }
235 }
236
237 pub fn matches_object(&self, object: &Object) -> bool {
239 match (self, object) {
240 (UnifiedTermPattern::NamedNode(pn), Object::NamedNode(on)) => pn == on,
241 (UnifiedTermPattern::BlankNode(pb), Object::BlankNode(ob)) => pb == ob,
242 (UnifiedTermPattern::Literal(pl), Object::Literal(ol)) => pl == ol,
243 (UnifiedTermPattern::Variable(_), _) | (UnifiedTermPattern::Wildcard, _) => true,
244 _ => false,
245 }
246 }
247
248 pub fn selectivity_factor(&self) -> f64 {
250 match self {
251 UnifiedTermPattern::NamedNode(_) => 0.001, UnifiedTermPattern::BlankNode(_) => 0.01, UnifiedTermPattern::Literal(_) => 0.001, UnifiedTermPattern::Variable(_) => 1.0, UnifiedTermPattern::Wildcard => 1.0, }
257 }
258}
259
260pub struct PatternConverter;
262
263impl PatternConverter {
264 pub fn algebra_to_model_patterns(patterns: &[AlgebraTriplePattern]) -> Vec<TriplePattern> {
266 patterns
267 .iter()
268 .map(|p| UnifiedTriplePattern::from_algebra_pattern(p).to_model_pattern())
269 .collect()
270 }
271
272 pub fn model_to_algebra_patterns(
274 patterns: &[TriplePattern],
275 ) -> Result<Vec<AlgebraTriplePattern>, OxirsError> {
276 patterns
277 .iter()
278 .map(|p| UnifiedTriplePattern::from_model_pattern(p).to_algebra_pattern())
279 .collect()
280 }
281
282 pub fn extract_variables_from_algebra(patterns: &[AlgebraTriplePattern]) -> HashSet<Variable> {
284 patterns
285 .iter()
286 .flat_map(|p| UnifiedTriplePattern::from_algebra_pattern(p).extract_variables())
287 .collect()
288 }
289
290 pub fn extract_variables_from_model(patterns: &[TriplePattern]) -> HashSet<Variable> {
292 patterns
293 .iter()
294 .flat_map(|p| UnifiedTriplePattern::from_model_pattern(p).extract_variables())
295 .collect()
296 }
297
298 pub fn estimate_pattern_selectivity(patterns: &[UnifiedTriplePattern]) -> f64 {
300 if patterns.is_empty() {
301 return 1.0;
302 }
303
304 patterns
305 .iter()
306 .map(|p| p.selectivity_estimate())
307 .fold(1.0, |acc, s| acc * s)
308 }
309}
310
311pub struct PatternOptimizer;
313
314impl PatternOptimizer {
315 pub fn optimize_pattern_order(patterns: &[UnifiedTriplePattern]) -> Vec<UnifiedTriplePattern> {
317 let mut sorted_patterns = patterns.to_vec();
318
319 sorted_patterns.sort_by(|a, b| {
321 a.selectivity_estimate()
322 .partial_cmp(&b.selectivity_estimate())
323 .unwrap_or(std::cmp::Ordering::Equal)
324 });
325
326 sorted_patterns
327 }
328
329 pub fn optimize_join_order(patterns: &[UnifiedTriplePattern]) -> Vec<usize> {
331 if patterns.is_empty() {
332 return Vec::new();
333 }
334
335 let mut remaining: Vec<usize> = (0..patterns.len()).collect();
337 let mut order = Vec::new();
338
339 if let Some(min_idx) = remaining
341 .iter()
342 .min_by(|&&a, &&b| {
343 patterns[a]
344 .selectivity_estimate()
345 .partial_cmp(&patterns[b].selectivity_estimate())
346 .unwrap_or(std::cmp::Ordering::Equal)
347 })
348 .copied()
349 {
350 order.push(min_idx);
351 remaining.retain(|&x| x != min_idx);
352 }
353
354 while !remaining.is_empty() {
356 let selected_vars: HashSet<Variable> = order
357 .iter()
358 .flat_map(|&i| patterns[i].extract_variables())
359 .collect();
360
361 if let Some(best_idx) = remaining
362 .iter()
363 .max_by_key(|&&i| {
364 let pattern_vars = patterns[i].extract_variables();
365 pattern_vars.intersection(&selected_vars).count()
366 })
367 .copied()
368 {
369 order.push(best_idx);
370 remaining.retain(|&x| x != best_idx);
371 } else {
372 order.extend(remaining);
374 break;
375 }
376 }
377
378 order
379 }
380}
381
382#[cfg(test)]
383mod tests {
384 use super::*;
385
386 #[test]
387 fn test_unified_pattern_conversion() {
388 let algebra_pattern = AlgebraTriplePattern::new(
390 AlgebraTermPattern::Variable(Variable::new("s").expect("valid variable name")),
391 AlgebraTermPattern::NamedNode(
392 NamedNode::new("http://example.org/pred").expect("valid IRI"),
393 ),
394 AlgebraTermPattern::Literal(Literal::new("test")),
395 );
396
397 let unified = UnifiedTriplePattern::from_algebra_pattern(&algebra_pattern);
399
400 let converted_back = unified
402 .to_algebra_pattern()
403 .expect("operation should succeed");
404
405 assert_eq!(algebra_pattern, converted_back);
406 }
407
408 #[test]
409 fn test_pattern_selectivity() {
410 let patterns = [
411 UnifiedTriplePattern::new(
412 UnifiedTermPattern::Variable(Variable::new("s").expect("valid variable name")),
413 UnifiedTermPattern::Variable(Variable::new("p").expect("valid variable name")),
414 UnifiedTermPattern::Variable(Variable::new("o").expect("valid variable name")),
415 ),
416 UnifiedTriplePattern::new(
417 UnifiedTermPattern::NamedNode(
418 NamedNode::new("http://example.org/s").expect("valid IRI"),
419 ),
420 UnifiedTermPattern::NamedNode(
421 NamedNode::new("http://example.org/p").expect("valid IRI"),
422 ),
423 UnifiedTermPattern::Variable(Variable::new("o").expect("valid variable name")),
424 ),
425 ];
426
427 assert!(patterns[1].selectivity_estimate() < patterns[0].selectivity_estimate());
429 }
430
431 #[test]
432 fn test_pattern_optimization() {
433 let patterns = vec![
434 UnifiedTriplePattern::new(
435 UnifiedTermPattern::Variable(Variable::new("s").expect("valid variable name")),
436 UnifiedTermPattern::Variable(Variable::new("p").expect("valid variable name")),
437 UnifiedTermPattern::Variable(Variable::new("o").expect("valid variable name")),
438 ),
439 UnifiedTriplePattern::new(
440 UnifiedTermPattern::NamedNode(
441 NamedNode::new("http://example.org/s").expect("valid IRI"),
442 ),
443 UnifiedTermPattern::NamedNode(
444 NamedNode::new("http://example.org/p").expect("valid IRI"),
445 ),
446 UnifiedTermPattern::Variable(Variable::new("o").expect("valid variable name")),
447 ),
448 ];
449
450 let optimized = PatternOptimizer::optimize_pattern_order(&patterns);
451
452 assert_eq!(optimized[0], patterns[1]);
454 assert_eq!(optimized[1], patterns[0]);
455 }
456}