1use crate::model::*;
8use crate::query::algebra::{AlgebraTriplePattern, TermPattern as AlgebraTermPattern};
9use crate::OxirsError;
10use std::collections::HashSet;
11
12#[derive(Debug, Clone, PartialEq, Eq, Hash)]
14pub struct UnifiedTriplePattern {
15 pub subject: UnifiedTermPattern,
17 pub predicate: UnifiedTermPattern,
19 pub object: UnifiedTermPattern,
21}
22
23#[derive(Debug, Clone, PartialEq, Eq, Hash)]
25pub enum UnifiedTermPattern {
26 NamedNode(NamedNode),
28 BlankNode(BlankNode),
30 Literal(Literal),
32 Variable(Variable),
34 Wildcard,
36}
37
38impl UnifiedTriplePattern {
39 pub fn new(
41 subject: UnifiedTermPattern,
42 predicate: UnifiedTermPattern,
43 object: UnifiedTermPattern,
44 ) -> Self {
45 Self {
46 subject,
47 predicate,
48 object,
49 }
50 }
51
52 pub fn to_algebra_pattern(&self) -> Result<AlgebraTriplePattern, OxirsError> {
54 let subject = self.subject.to_algebra_term_pattern()?;
55 let predicate = self.predicate.to_algebra_term_pattern()?;
56 let object = self.object.to_algebra_term_pattern()?;
57
58 Ok(AlgebraTriplePattern::new(subject, predicate, object))
59 }
60
61 pub fn to_model_pattern(&self) -> TriplePattern {
63 let subject = self.subject.to_model_subject_pattern();
64 let predicate = self.predicate.to_model_predicate_pattern();
65 let object = self.object.to_model_object_pattern();
66
67 TriplePattern::new(subject, predicate, object)
68 }
69
70 pub fn from_algebra_pattern(pattern: &AlgebraTriplePattern) -> Self {
72 Self {
73 subject: UnifiedTermPattern::from_algebra_term(&pattern.subject),
74 predicate: UnifiedTermPattern::from_algebra_term(&pattern.predicate),
75 object: UnifiedTermPattern::from_algebra_term(&pattern.object),
76 }
77 }
78
79 pub fn from_model_pattern(pattern: &TriplePattern) -> Self {
81 Self {
82 subject: pattern
83 .subject()
84 .map(UnifiedTermPattern::from_model_subject)
85 .unwrap_or(UnifiedTermPattern::Wildcard),
86 predicate: pattern
87 .predicate()
88 .map(UnifiedTermPattern::from_model_predicate)
89 .unwrap_or(UnifiedTermPattern::Wildcard),
90 object: pattern
91 .object()
92 .map(UnifiedTermPattern::from_model_object)
93 .unwrap_or(UnifiedTermPattern::Wildcard),
94 }
95 }
96
97 pub fn extract_variables(&self) -> HashSet<Variable> {
99 let mut vars = HashSet::new();
100
101 if let UnifiedTermPattern::Variable(v) = &self.subject {
102 vars.insert(v.clone());
103 }
104 if let UnifiedTermPattern::Variable(v) = &self.predicate {
105 vars.insert(v.clone());
106 }
107 if let UnifiedTermPattern::Variable(v) = &self.object {
108 vars.insert(v.clone());
109 }
110
111 vars
112 }
113
114 pub fn matches(&self, triple: &Triple) -> bool {
116 self.subject.matches_subject(triple.subject())
117 && self.predicate.matches_predicate(triple.predicate())
118 && self.object.matches_object(triple.object())
119 }
120
121 pub fn selectivity_estimate(&self) -> f64 {
123 let subject_selectivity = self.subject.selectivity_factor();
124 let predicate_selectivity = self.predicate.selectivity_factor();
125 let object_selectivity = self.object.selectivity_factor();
126
127 subject_selectivity * predicate_selectivity * object_selectivity
129 }
130}
131
132impl UnifiedTermPattern {
133 pub fn to_algebra_term_pattern(&self) -> Result<AlgebraTermPattern, OxirsError> {
135 match self {
136 UnifiedTermPattern::NamedNode(nn) => Ok(AlgebraTermPattern::NamedNode(nn.clone())),
137 UnifiedTermPattern::BlankNode(bn) => Ok(AlgebraTermPattern::BlankNode(bn.clone())),
138 UnifiedTermPattern::Literal(lit) => Ok(AlgebraTermPattern::Literal(lit.clone())),
139 UnifiedTermPattern::Variable(var) => Ok(AlgebraTermPattern::Variable(var.clone())),
140 UnifiedTermPattern::Wildcard => Err(OxirsError::Query(
141 "Wildcard patterns cannot be converted to algebra representation".to_string(),
142 )),
143 }
144 }
145
146 pub fn to_model_subject_pattern(&self) -> Option<SubjectPattern> {
148 match self {
149 UnifiedTermPattern::NamedNode(nn) => Some(SubjectPattern::NamedNode(nn.clone())),
150 UnifiedTermPattern::BlankNode(bn) => Some(SubjectPattern::BlankNode(bn.clone())),
151 UnifiedTermPattern::Variable(var) => Some(SubjectPattern::Variable(var.clone())),
152 UnifiedTermPattern::Literal(_) | UnifiedTermPattern::Wildcard => None,
153 }
154 }
155
156 pub fn to_model_predicate_pattern(&self) -> Option<PredicatePattern> {
158 match self {
159 UnifiedTermPattern::NamedNode(nn) => Some(PredicatePattern::NamedNode(nn.clone())),
160 UnifiedTermPattern::Variable(var) => Some(PredicatePattern::Variable(var.clone())),
161 UnifiedTermPattern::BlankNode(_)
162 | UnifiedTermPattern::Literal(_)
163 | UnifiedTermPattern::Wildcard => None,
164 }
165 }
166
167 pub fn to_model_object_pattern(&self) -> Option<ObjectPattern> {
169 match self {
170 UnifiedTermPattern::NamedNode(nn) => Some(ObjectPattern::NamedNode(nn.clone())),
171 UnifiedTermPattern::BlankNode(bn) => Some(ObjectPattern::BlankNode(bn.clone())),
172 UnifiedTermPattern::Literal(lit) => Some(ObjectPattern::Literal(lit.clone())),
173 UnifiedTermPattern::Variable(var) => Some(ObjectPattern::Variable(var.clone())),
174 UnifiedTermPattern::Wildcard => None,
175 }
176 }
177
178 pub fn from_algebra_term(term: &AlgebraTermPattern) -> Self {
180 match term {
181 AlgebraTermPattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
182 AlgebraTermPattern::BlankNode(bn) => UnifiedTermPattern::BlankNode(bn.clone()),
183 AlgebraTermPattern::Literal(lit) => UnifiedTermPattern::Literal(lit.clone()),
184 AlgebraTermPattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
185 }
186 }
187
188 pub fn from_model_subject(subject: &SubjectPattern) -> Self {
190 match subject {
191 SubjectPattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
192 SubjectPattern::BlankNode(bn) => UnifiedTermPattern::BlankNode(bn.clone()),
193 SubjectPattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
194 }
195 }
196
197 pub fn from_model_predicate(predicate: &PredicatePattern) -> Self {
199 match predicate {
200 PredicatePattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
201 PredicatePattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
202 }
203 }
204
205 pub fn from_model_object(object: &ObjectPattern) -> Self {
207 match object {
208 ObjectPattern::NamedNode(nn) => UnifiedTermPattern::NamedNode(nn.clone()),
209 ObjectPattern::BlankNode(bn) => UnifiedTermPattern::BlankNode(bn.clone()),
210 ObjectPattern::Literal(lit) => UnifiedTermPattern::Literal(lit.clone()),
211 ObjectPattern::Variable(var) => UnifiedTermPattern::Variable(var.clone()),
212 }
213 }
214
215 pub fn matches_subject(&self, subject: &Subject) -> bool {
217 match (self, subject) {
218 (UnifiedTermPattern::NamedNode(pn), Subject::NamedNode(sn)) => pn == sn,
219 (UnifiedTermPattern::BlankNode(pb), Subject::BlankNode(sb)) => pb == sb,
220 (UnifiedTermPattern::Variable(_), _) | (UnifiedTermPattern::Wildcard, _) => true,
221 _ => false,
222 }
223 }
224
225 pub fn matches_predicate(&self, predicate: &Predicate) -> bool {
227 match (self, predicate) {
228 (UnifiedTermPattern::NamedNode(pn), Predicate::NamedNode(sn)) => pn == sn,
229 (UnifiedTermPattern::Variable(_), _) | (UnifiedTermPattern::Wildcard, _) => true,
230 _ => false,
231 }
232 }
233
234 pub fn matches_object(&self, object: &Object) -> bool {
236 match (self, object) {
237 (UnifiedTermPattern::NamedNode(pn), Object::NamedNode(on)) => pn == on,
238 (UnifiedTermPattern::BlankNode(pb), Object::BlankNode(ob)) => pb == ob,
239 (UnifiedTermPattern::Literal(pl), Object::Literal(ol)) => pl == ol,
240 (UnifiedTermPattern::Variable(_), _) | (UnifiedTermPattern::Wildcard, _) => true,
241 _ => false,
242 }
243 }
244
245 pub fn selectivity_factor(&self) -> f64 {
247 match self {
248 UnifiedTermPattern::NamedNode(_) => 0.001, UnifiedTermPattern::BlankNode(_) => 0.01, UnifiedTermPattern::Literal(_) => 0.001, UnifiedTermPattern::Variable(_) => 1.0, UnifiedTermPattern::Wildcard => 1.0, }
254 }
255}
256
257pub struct PatternConverter;
259
260impl PatternConverter {
261 pub fn algebra_to_model_patterns(patterns: &[AlgebraTriplePattern]) -> Vec<TriplePattern> {
263 patterns
264 .iter()
265 .map(|p| UnifiedTriplePattern::from_algebra_pattern(p).to_model_pattern())
266 .collect()
267 }
268
269 pub fn model_to_algebra_patterns(
271 patterns: &[TriplePattern],
272 ) -> Result<Vec<AlgebraTriplePattern>, OxirsError> {
273 patterns
274 .iter()
275 .map(|p| UnifiedTriplePattern::from_model_pattern(p).to_algebra_pattern())
276 .collect()
277 }
278
279 pub fn extract_variables_from_algebra(patterns: &[AlgebraTriplePattern]) -> HashSet<Variable> {
281 patterns
282 .iter()
283 .flat_map(|p| UnifiedTriplePattern::from_algebra_pattern(p).extract_variables())
284 .collect()
285 }
286
287 pub fn extract_variables_from_model(patterns: &[TriplePattern]) -> HashSet<Variable> {
289 patterns
290 .iter()
291 .flat_map(|p| UnifiedTriplePattern::from_model_pattern(p).extract_variables())
292 .collect()
293 }
294
295 pub fn estimate_pattern_selectivity(patterns: &[UnifiedTriplePattern]) -> f64 {
297 if patterns.is_empty() {
298 return 1.0;
299 }
300
301 patterns
302 .iter()
303 .map(|p| p.selectivity_estimate())
304 .fold(1.0, |acc, s| acc * s)
305 }
306}
307
308pub struct PatternOptimizer;
310
311impl PatternOptimizer {
312 pub fn optimize_pattern_order(patterns: &[UnifiedTriplePattern]) -> Vec<UnifiedTriplePattern> {
314 let mut sorted_patterns = patterns.to_vec();
315
316 sorted_patterns.sort_by(|a, b| {
318 a.selectivity_estimate()
319 .partial_cmp(&b.selectivity_estimate())
320 .unwrap_or(std::cmp::Ordering::Equal)
321 });
322
323 sorted_patterns
324 }
325
326 pub fn optimize_join_order(patterns: &[UnifiedTriplePattern]) -> Vec<usize> {
328 if patterns.is_empty() {
329 return Vec::new();
330 }
331
332 let mut remaining: Vec<usize> = (0..patterns.len()).collect();
334 let mut order = Vec::new();
335
336 if let Some(min_idx) = remaining
338 .iter()
339 .min_by(|&&a, &&b| {
340 patterns[a]
341 .selectivity_estimate()
342 .partial_cmp(&patterns[b].selectivity_estimate())
343 .unwrap_or(std::cmp::Ordering::Equal)
344 })
345 .copied()
346 {
347 order.push(min_idx);
348 remaining.retain(|&x| x != min_idx);
349 }
350
351 while !remaining.is_empty() {
353 let selected_vars: HashSet<Variable> = order
354 .iter()
355 .flat_map(|&i| patterns[i].extract_variables())
356 .collect();
357
358 if let Some(best_idx) = remaining
359 .iter()
360 .max_by_key(|&&i| {
361 let pattern_vars = patterns[i].extract_variables();
362 pattern_vars.intersection(&selected_vars).count()
363 })
364 .copied()
365 {
366 order.push(best_idx);
367 remaining.retain(|&x| x != best_idx);
368 } else {
369 order.extend(remaining);
371 break;
372 }
373 }
374
375 order
376 }
377}
378
379#[cfg(test)]
380mod tests {
381 use super::*;
382
383 #[test]
384 fn test_unified_pattern_conversion() {
385 let algebra_pattern = AlgebraTriplePattern::new(
387 AlgebraTermPattern::Variable(Variable::new("s").unwrap()),
388 AlgebraTermPattern::NamedNode(NamedNode::new("http://example.org/pred").unwrap()),
389 AlgebraTermPattern::Literal(Literal::new("test")),
390 );
391
392 let unified = UnifiedTriplePattern::from_algebra_pattern(&algebra_pattern);
394
395 let converted_back = unified.to_algebra_pattern().unwrap();
397
398 assert_eq!(algebra_pattern, converted_back);
399 }
400
401 #[test]
402 fn test_pattern_selectivity() {
403 let patterns = vec![
404 UnifiedTriplePattern::new(
405 UnifiedTermPattern::Variable(Variable::new("s").unwrap()),
406 UnifiedTermPattern::Variable(Variable::new("p").unwrap()),
407 UnifiedTermPattern::Variable(Variable::new("o").unwrap()),
408 ),
409 UnifiedTriplePattern::new(
410 UnifiedTermPattern::NamedNode(NamedNode::new("http://example.org/s").unwrap()),
411 UnifiedTermPattern::NamedNode(NamedNode::new("http://example.org/p").unwrap()),
412 UnifiedTermPattern::Variable(Variable::new("o").unwrap()),
413 ),
414 ];
415
416 assert!(patterns[1].selectivity_estimate() < patterns[0].selectivity_estimate());
418 }
419
420 #[test]
421 fn test_pattern_optimization() {
422 let patterns = vec![
423 UnifiedTriplePattern::new(
424 UnifiedTermPattern::Variable(Variable::new("s").unwrap()),
425 UnifiedTermPattern::Variable(Variable::new("p").unwrap()),
426 UnifiedTermPattern::Variable(Variable::new("o").unwrap()),
427 ),
428 UnifiedTriplePattern::new(
429 UnifiedTermPattern::NamedNode(NamedNode::new("http://example.org/s").unwrap()),
430 UnifiedTermPattern::NamedNode(NamedNode::new("http://example.org/p").unwrap()),
431 UnifiedTermPattern::Variable(Variable::new("o").unwrap()),
432 ),
433 ];
434
435 let optimized = PatternOptimizer::optimize_pattern_order(&patterns);
436
437 assert_eq!(optimized[0], patterns[1]);
439 assert_eq!(optimized[1], patterns[0]);
440 }
441}