1use crate::algebra::{Solution, Term, Variable};
7use anyhow::Result;
8use serde::{Deserialize, Serialize};
9use std::collections::{HashMap, HashSet, VecDeque};
10
11#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
13pub enum PropertyPath {
14 Direct(Term),
16
17 Inverse(Box<PropertyPath>),
19
20 Sequence(Box<PropertyPath>, Box<PropertyPath>),
22
23 Alternative(Box<PropertyPath>, Box<PropertyPath>),
25
26 ZeroOrMore(Box<PropertyPath>),
28
29 OneOrMore(Box<PropertyPath>),
31
32 ZeroOrOne(Box<PropertyPath>),
34
35 NegatedPropertySet(Vec<Term>),
37}
38
39impl std::fmt::Display for PropertyPath {
40 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41 match self {
42 PropertyPath::Direct(term) => write!(f, "{term:?}"),
43 PropertyPath::Inverse(path) => write!(f, "^{path}"),
44 PropertyPath::Sequence(left, right) => write!(f, "{left}/{right}"),
45 PropertyPath::Alternative(left, right) => write!(f, "{left}|{right}"),
46 PropertyPath::ZeroOrMore(path) => write!(f, "{path}*"),
47 PropertyPath::OneOrMore(path) => write!(f, "{path}+"),
48 PropertyPath::ZeroOrOne(path) => write!(f, "{path}?"),
49 PropertyPath::NegatedPropertySet(terms) => {
50 write!(f, "!(")?;
51 for (i, term) in terms.iter().enumerate() {
52 if i > 0 {
53 write!(f, "|")?;
54 }
55 write!(f, "{term:?}")?;
56 }
57 write!(f, ")")
58 }
59 }
60 }
61}
62
63#[derive(Debug, Clone)]
65pub struct PathContext {
66 pub max_length: usize,
68 pub max_nodes: usize,
70 pub cycle_detection: bool,
72 pub optimization_level: PathOptimization,
74}
75
76#[derive(Debug, Clone, PartialEq)]
78pub enum PathOptimization {
79 None,
80 Basic,
81 Advanced,
82}
83
84impl Default for PathContext {
85 fn default() -> Self {
86 Self {
87 max_length: 100,
88 max_nodes: 10000,
89 cycle_detection: true,
90 optimization_level: PathOptimization::Basic,
91 }
92 }
93}
94
95pub struct PropertyPathEvaluator {
97 context: PathContext,
98}
99
100#[derive(Debug, Clone)]
102pub struct PathResult {
103 pub bindings: Solution,
104 pub path_length: usize,
105 pub visited_nodes: usize,
106}
107
108pub trait PathDataset {
110 fn find_outgoing(&self, subject: &Term, predicate: &Term) -> Result<Vec<Term>>;
112
113 fn find_incoming(&self, predicate: &Term, object: &Term) -> Result<Vec<Term>>;
115
116 fn find_predicates(&self, subject: &Term, object: &Term) -> Result<Vec<Term>>;
118
119 fn get_predicates(&self) -> Result<Vec<Term>>;
121
122 fn contains_triple(&self, subject: &Term, predicate: &Term, object: &Term) -> Result<bool>;
124}
125
126impl PropertyPathEvaluator {
127 pub fn new() -> Self {
128 Self {
129 context: PathContext::default(),
130 }
131 }
132
133 pub fn with_context(context: PathContext) -> Self {
134 Self { context }
135 }
136
137 pub fn evaluate_path(
139 &self,
140 start: &Term,
141 path: &PropertyPath,
142 end: &Term,
143 dataset: &dyn PathDataset,
144 ) -> Result<bool> {
145 let mut visited = HashSet::new();
146 self.evaluate_path_recursive(start, path, end, dataset, &mut visited, 0)
147 }
148
149 pub fn find_reachable(
151 &self,
152 start: &Term,
153 path: &PropertyPath,
154 dataset: &dyn PathDataset,
155 ) -> Result<Vec<Term>> {
156 match path {
158 PropertyPath::Direct(predicate) => {
159 return dataset.find_outgoing(start, predicate);
160 }
161 PropertyPath::Inverse(inner_path) => {
162 if let PropertyPath::Direct(predicate) = inner_path.as_ref() {
163 return dataset.find_incoming(predicate, start);
164 }
165 }
166 _ => {}
167 }
168
169 let mut result = Vec::new();
171 let mut visited = HashSet::new();
172 let mut queue = VecDeque::new();
173
174 queue.push_back((start.clone(), 0));
175 visited.insert(start.clone());
176
177 while let Some((current, depth)) = queue.pop_front() {
178 if depth >= self.context.max_length {
179 continue;
180 }
181
182 if result.len() >= self.context.max_nodes {
183 break;
184 }
185
186 let reachable =
187 self.evaluate_path_step(¤t, path, dataset, &mut visited, depth)?;
188
189 for node in reachable {
190 if !visited.contains(&node) {
191 visited.insert(node.clone());
192 result.push(node.clone());
193 queue.push_back((node, depth + 1));
194 }
195 }
196 }
197
198 Ok(result)
199 }
200
201 pub fn evaluate_path_with_bindings(
203 &self,
204 start_var: Option<&Variable>,
205 start_term: Option<&Term>,
206 path: &PropertyPath,
207 end_var: Option<&Variable>,
208 end_term: Option<&Term>,
209 dataset: &dyn PathDataset,
210 ) -> Result<Solution> {
211 let mut solution = Vec::new();
212
213 match (start_term, end_term) {
214 (Some(start), Some(end)) => {
215 if self.evaluate_path(start, path, end, dataset)? {
217 let mut binding = HashMap::new();
218 if let Some(var) = start_var {
219 binding.insert(var.clone(), start.clone());
220 }
221 if let Some(var) = end_var {
222 binding.insert(var.clone(), end.clone());
223 }
224 solution.push(binding);
225 }
226 }
227 (Some(start), None) => {
228 let reachable = self.find_reachable(start, path, dataset)?;
230 for end in reachable {
231 let mut binding = HashMap::new();
232 if let Some(var) = start_var {
233 binding.insert(var.clone(), start.clone());
234 }
235 if let Some(var) = end_var {
236 binding.insert(var.clone(), end);
237 }
238 solution.push(binding);
239 }
240 }
241 (None, Some(end)) => {
242 let inverse_path = self.invert_path(path);
244 let reachable = self.find_reachable(end, &inverse_path, dataset)?;
245 for start in reachable {
246 let mut binding = HashMap::new();
247 if let Some(var) = start_var {
248 binding.insert(var.clone(), start);
249 }
250 if let Some(var) = end_var {
251 binding.insert(var.clone(), end.clone());
252 }
253 solution.push(binding);
254 }
255 }
256 (None, None) => {
257 solution = self.enumerate_all_paths(path, dataset)?;
259 }
260 }
261
262 Ok(solution)
263 }
264
265 fn evaluate_path_recursive(
267 &self,
268 current: &Term,
269 path: &PropertyPath,
270 target: &Term,
271 dataset: &dyn PathDataset,
272 visited: &mut HashSet<Term>,
273 depth: usize,
274 ) -> Result<bool> {
275 if depth >= self.context.max_length {
276 return Ok(false);
277 }
278
279 if self.context.cycle_detection && visited.contains(current) {
280 return Ok(false);
281 }
282
283 visited.insert(current.clone());
284
285 let result = match path {
286 PropertyPath::Direct(predicate) => dataset.contains_triple(current, predicate, target),
287 PropertyPath::Inverse(inner_path) => {
288 self.evaluate_path_recursive(target, inner_path, current, dataset, visited, depth)
289 }
290 PropertyPath::Sequence(first, second) => {
291 let intermediate_nodes =
293 self.evaluate_path_step(current, first, dataset, visited, depth)?;
294
295 for intermediate in intermediate_nodes {
296 if self.evaluate_path_recursive(
297 &intermediate,
298 second,
299 target,
300 dataset,
301 visited,
302 depth + 1,
303 )? {
304 return Ok(true);
305 }
306 }
307 Ok(false)
308 }
309 PropertyPath::Alternative(left, right) => Ok(self
310 .evaluate_path_recursive(current, left, target, dataset, visited, depth)?
311 || self
312 .evaluate_path_recursive(current, right, target, dataset, visited, depth)?),
313 PropertyPath::ZeroOrMore(inner_path) => {
314 if current == target {
316 return Ok(true);
317 }
318
319 self.evaluate_one_or_more(current, inner_path, target, dataset, visited, depth)
321 }
322 PropertyPath::OneOrMore(inner_path) => {
323 self.evaluate_one_or_more(current, inner_path, target, dataset, visited, depth)
324 }
325 PropertyPath::ZeroOrOne(inner_path) => {
326 if current == target {
328 return Ok(true);
329 }
330
331 self.evaluate_path_recursive(current, inner_path, target, dataset, visited, depth)
333 }
334 PropertyPath::NegatedPropertySet(predicates) => {
335 let all_predicates = dataset.get_predicates()?;
337 for predicate in all_predicates {
338 if !predicates.contains(&predicate)
339 && dataset.contains_triple(current, &predicate, target)?
340 {
341 return Ok(true);
342 }
343 }
344 Ok(false)
345 }
346 };
347
348 visited.remove(current);
349 result
350 }
351
352 #[allow(clippy::only_used_in_recursion)]
354 fn evaluate_path_step(
355 &self,
356 current: &Term,
357 path: &PropertyPath,
358 dataset: &dyn PathDataset,
359 visited: &mut HashSet<Term>,
360 depth: usize,
361 ) -> Result<Vec<Term>> {
362 match path {
363 PropertyPath::Direct(predicate) => dataset.find_outgoing(current, predicate),
364 PropertyPath::Inverse(inner_path) => {
365 match inner_path.as_ref() {
367 PropertyPath::Direct(predicate) => dataset.find_incoming(predicate, current),
368 _ => {
369 Ok(Vec::new())
371 }
372 }
373 }
374 PropertyPath::Sequence(first, second) => {
375 let mut result = Vec::new();
376 let intermediate_nodes =
377 self.evaluate_path_step(current, first, dataset, visited, depth)?;
378
379 for intermediate in intermediate_nodes {
380 let final_nodes = self.evaluate_path_step(
381 &intermediate,
382 second,
383 dataset,
384 visited,
385 depth + 1,
386 )?;
387 result.extend(final_nodes);
388 }
389
390 Ok(result)
391 }
392 PropertyPath::Alternative(left, right) => {
393 let mut result = self.evaluate_path_step(current, left, dataset, visited, depth)?;
394 let right_result =
395 self.evaluate_path_step(current, right, dataset, visited, depth)?;
396 result.extend(right_result);
397 Ok(result)
398 }
399 _ => {
400 Ok(Vec::new())
402 }
403 }
404 }
405
406 fn evaluate_one_or_more(
408 &self,
409 current: &Term,
410 path: &PropertyPath,
411 target: &Term,
412 dataset: &dyn PathDataset,
413 visited: &mut HashSet<Term>,
414 depth: usize,
415 ) -> Result<bool> {
416 let mut frontier = VecDeque::new();
417 let mut local_visited = HashSet::new();
418
419 frontier.push_back((current.clone(), depth));
420 local_visited.insert(current.clone());
421
422 while let Some((node, current_depth)) = frontier.pop_front() {
423 if current_depth >= self.context.max_length {
424 continue;
425 }
426
427 let reachable =
428 self.evaluate_path_step(&node, path, dataset, visited, current_depth)?;
429
430 for next_node in reachable {
431 if next_node == *target {
432 return Ok(true);
433 }
434
435 if !local_visited.contains(&next_node)
436 && current_depth < self.context.max_length - 1
437 {
438 local_visited.insert(next_node.clone());
439 frontier.push_back((next_node, current_depth + 1));
440 }
441 }
442 }
443
444 Ok(false)
445 }
446
447 #[allow(clippy::only_used_in_recursion)]
449 fn invert_path(&self, path: &PropertyPath) -> PropertyPath {
450 match path {
451 PropertyPath::Direct(term) => {
452 PropertyPath::Inverse(Box::new(PropertyPath::Direct(term.clone())))
453 }
454 PropertyPath::Inverse(inner) => *inner.clone(),
455 PropertyPath::Sequence(first, second) => PropertyPath::Sequence(
456 Box::new(self.invert_path(second)),
457 Box::new(self.invert_path(first)),
458 ),
459 PropertyPath::Alternative(left, right) => PropertyPath::Alternative(
460 Box::new(self.invert_path(left)),
461 Box::new(self.invert_path(right)),
462 ),
463 PropertyPath::ZeroOrMore(inner) => {
464 PropertyPath::ZeroOrMore(Box::new(self.invert_path(inner)))
465 }
466 PropertyPath::OneOrMore(inner) => {
467 PropertyPath::OneOrMore(Box::new(self.invert_path(inner)))
468 }
469 PropertyPath::ZeroOrOne(inner) => {
470 PropertyPath::ZeroOrOne(Box::new(self.invert_path(inner)))
471 }
472 PropertyPath::NegatedPropertySet(predicates) => {
473 PropertyPath::NegatedPropertySet(predicates.clone())
474 }
475 }
476 }
477
478 fn enumerate_all_paths(
480 &self,
481 _path: &PropertyPath,
482 _dataset: &dyn PathDataset,
483 ) -> Result<Solution> {
484 Ok(Vec::new())
487 }
488
489 pub fn optimize_path(&self, path: PropertyPath) -> PropertyPath {
491 match self.context.optimization_level {
492 PathOptimization::None => path,
493 PathOptimization::Basic => self.basic_path_optimization(path),
494 PathOptimization::Advanced => self.advanced_path_optimization(path),
495 }
496 }
497
498 #[allow(clippy::only_used_in_recursion)]
500 fn basic_path_optimization(&self, path: PropertyPath) -> PropertyPath {
501 match path {
502 PropertyPath::Sequence(first, second) => {
503 let opt_first = self.basic_path_optimization(*first);
504 let opt_second = self.basic_path_optimization(*second);
505
506 match (&opt_first, &opt_second) {
508 (PropertyPath::Sequence(a, b), PropertyPath::Sequence(c, d)) => {
509 PropertyPath::Sequence(
511 Box::new(PropertyPath::Sequence(a.clone(), b.clone())),
512 Box::new(PropertyPath::Sequence(c.clone(), d.clone())),
513 )
514 }
515 _ => PropertyPath::Sequence(Box::new(opt_first), Box::new(opt_second)),
516 }
517 }
518 PropertyPath::Alternative(left, right) => {
519 let opt_left = self.basic_path_optimization(*left);
520 let opt_right = self.basic_path_optimization(*right);
521 PropertyPath::Alternative(Box::new(opt_left), Box::new(opt_right))
522 }
523 PropertyPath::ZeroOrMore(inner) => {
524 let opt_inner = self.basic_path_optimization(*inner);
525 PropertyPath::ZeroOrMore(Box::new(opt_inner))
526 }
527 PropertyPath::OneOrMore(inner) => {
528 let opt_inner = self.basic_path_optimization(*inner);
529 PropertyPath::OneOrMore(Box::new(opt_inner))
530 }
531 PropertyPath::ZeroOrOne(inner) => {
532 let opt_inner = self.basic_path_optimization(*inner);
533 PropertyPath::ZeroOrOne(Box::new(opt_inner))
534 }
535 PropertyPath::Inverse(inner) => {
536 let opt_inner = self.basic_path_optimization(*inner);
537
538 if let PropertyPath::Inverse(inner_inner) = opt_inner {
540 *inner_inner
541 } else {
542 PropertyPath::Inverse(Box::new(opt_inner))
543 }
544 }
545 _ => path,
546 }
547 }
548
549 fn advanced_path_optimization(&self, path: PropertyPath) -> PropertyPath {
551 let basic_opt = self.basic_path_optimization(path);
552
553 match basic_opt {
554 PropertyPath::Alternative(left, right) => {
555 self.optimize_alternative_common_subexpressions(*left, *right)
557 }
558 PropertyPath::Sequence(first, second) => {
559 self.optimize_sequence_direct_paths(*first, *second)
561 }
562 _ => basic_opt,
563 }
564 }
565
566 fn optimize_alternative_common_subexpressions(
568 &self,
569 left: PropertyPath,
570 right: PropertyPath,
571 ) -> PropertyPath {
572 PropertyPath::Alternative(Box::new(left), Box::new(right))
574 }
575
576 fn optimize_sequence_direct_paths(
578 &self,
579 first: PropertyPath,
580 second: PropertyPath,
581 ) -> PropertyPath {
582 PropertyPath::Sequence(Box::new(first), Box::new(second))
584 }
585}
586
587impl Default for PropertyPathEvaluator {
588 fn default() -> Self {
589 Self::new()
590 }
591}
592
593#[macro_export]
595macro_rules! path_seq {
596 ($first:expr_2021, $second:expr_2021) => {
597 PropertyPath::Sequence(Box::new($first), Box::new($second))
598 };
599 ($first:expr_2021, $second:expr_2021, $($rest:expr_2021),+) => {
600 PropertyPath::Sequence(
601 Box::new($first),
602 Box::new(path_seq!($second, $($rest),+))
603 )
604 };
605}
606
607#[macro_export]
608macro_rules! path_alt {
609 ($first:expr_2021, $second:expr_2021) => {
610 PropertyPath::Alternative(Box::new($first), Box::new($second))
611 };
612 ($first:expr_2021, $second:expr_2021, $($rest:expr_2021),+) => {
613 PropertyPath::Alternative(
614 Box::new($first),
615 Box::new(path_alt!($second, $($rest),+))
616 )
617 };
618}
619
620#[macro_export]
621macro_rules! path_star {
622 ($path:expr_2021) => {
623 PropertyPath::ZeroOrMore(Box::new($path))
624 };
625}
626
627#[macro_export]
628macro_rules! path_plus {
629 ($path:expr_2021) => {
630 PropertyPath::OneOrMore(Box::new($path))
631 };
632}
633
634#[macro_export]
635macro_rules! path_opt {
636 ($path:expr_2021) => {
637 PropertyPath::ZeroOrOne(Box::new($path))
638 };
639}
640
641#[macro_export]
642macro_rules! path_inv {
643 ($path:expr_2021) => {
644 PropertyPath::Inverse(Box::new($path))
645 };
646}
647
648#[cfg(test)]
649mod tests {
650 use super::*;
651 use oxirs_core::model::NamedNode;
652
653 struct TestDataset {
654 triples: Vec<(Term, Term, Term)>,
655 }
656
657 impl TestDataset {
658 fn new() -> Self {
659 Self {
660 triples: vec![
661 (
662 Term::Iri(NamedNode::new_unchecked("http://example.org/person1")),
663 Term::Iri(NamedNode::new_unchecked("http://xmlns.com/foaf/0.1/knows")),
664 Term::Iri(NamedNode::new_unchecked("http://example.org/person2")),
665 ),
666 (
667 Term::Iri(NamedNode::new_unchecked("http://example.org/person2")),
668 Term::Iri(NamedNode::new_unchecked("http://xmlns.com/foaf/0.1/knows")),
669 Term::Iri(NamedNode::new_unchecked("http://example.org/person3")),
670 ),
671 ],
672 }
673 }
674 }
675
676 impl PathDataset for TestDataset {
677 fn find_outgoing(&self, subject: &Term, predicate: &Term) -> Result<Vec<Term>> {
678 let mut result = Vec::new();
679 for (s, p, o) in &self.triples {
680 if s == subject && p == predicate {
681 result.push(o.clone());
682 }
683 }
684 Ok(result)
685 }
686
687 fn find_incoming(&self, predicate: &Term, object: &Term) -> Result<Vec<Term>> {
688 let mut result = Vec::new();
689 for (s, p, o) in &self.triples {
690 if p == predicate && o == object {
691 result.push(s.clone());
692 }
693 }
694 Ok(result)
695 }
696
697 fn find_predicates(&self, subject: &Term, object: &Term) -> Result<Vec<Term>> {
698 let mut result = Vec::new();
699 for (s, p, o) in &self.triples {
700 if s == subject && o == object {
701 result.push(p.clone());
702 }
703 }
704 Ok(result)
705 }
706
707 fn get_predicates(&self) -> Result<Vec<Term>> {
708 let mut predicates: Vec<_> = self.triples.iter().map(|(_, p, _)| p.clone()).collect();
709 predicates.sort_by(|a, b| format!("{a:?}").cmp(&format!("{b:?}")));
710 predicates.dedup();
711 Ok(predicates)
712 }
713
714 fn contains_triple(&self, subject: &Term, predicate: &Term, object: &Term) -> Result<bool> {
715 Ok(self
716 .triples
717 .iter()
718 .any(|(s, p, o)| s == subject && p == predicate && o == object))
719 }
720 }
721
722 #[test]
723 fn test_direct_path() {
724 let evaluator = PropertyPathEvaluator::new();
725 let dataset = TestDataset::new();
726
727 let path = PropertyPath::Direct(Term::Iri(NamedNode::new_unchecked(
728 "http://xmlns.com/foaf/0.1/knows",
729 )));
730 let start = Term::Iri(NamedNode::new_unchecked("http://example.org/person1"));
731 let end = Term::Iri(NamedNode::new_unchecked("http://example.org/person2"));
732
733 assert!(evaluator
734 .evaluate_path(&start, &path, &end, &dataset)
735 .unwrap());
736 }
737
738 #[test]
739 fn test_sequence_path() {
740 let evaluator = PropertyPathEvaluator::new();
741 let dataset = TestDataset::new();
742
743 let knows = PropertyPath::Direct(Term::Iri(NamedNode::new_unchecked(
744 "http://xmlns.com/foaf/0.1/knows",
745 )));
746 let path = path_seq!(knows.clone(), knows);
747
748 let start = Term::Iri(NamedNode::new_unchecked("http://example.org/person1"));
749 let end = Term::Iri(NamedNode::new_unchecked("http://example.org/person3"));
750
751 assert!(evaluator
752 .evaluate_path(&start, &path, &end, &dataset)
753 .unwrap());
754 }
755
756 #[test]
757 fn test_find_reachable() {
758 let evaluator = PropertyPathEvaluator::new();
759 let dataset = TestDataset::new();
760
761 let path = PropertyPath::Direct(Term::Iri(NamedNode::new_unchecked(
762 "http://xmlns.com/foaf/0.1/knows",
763 )));
764 let start = Term::Iri(NamedNode::new_unchecked("http://example.org/person1"));
765
766 let reachable = evaluator.find_reachable(&start, &path, &dataset).unwrap();
767 assert_eq!(reachable.len(), 1);
768 assert_eq!(
769 reachable[0],
770 Term::Iri(NamedNode::new_unchecked("http://example.org/person2"))
771 );
772 }
773
774 #[test]
775 fn test_path_optimization() {
776 let evaluator = PropertyPathEvaluator::new();
777
778 let direct = PropertyPath::Direct(Term::Iri(NamedNode::new_unchecked(
779 "http://example.org/pred",
780 )));
781 let double_inverse =
782 PropertyPath::Inverse(Box::new(PropertyPath::Inverse(Box::new(direct.clone()))));
783
784 let optimized = evaluator.optimize_path(double_inverse);
785 assert_eq!(optimized, direct);
786 }
787}