use crate::algebra::{Solution, Term, Variable};
use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet, VecDeque};
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub enum PropertyPath {
Direct(Term),
Inverse(Box<PropertyPath>),
Sequence(Box<PropertyPath>, Box<PropertyPath>),
Alternative(Box<PropertyPath>, Box<PropertyPath>),
ZeroOrMore(Box<PropertyPath>),
OneOrMore(Box<PropertyPath>),
ZeroOrOne(Box<PropertyPath>),
NegatedPropertySet(Vec<Term>),
}
impl std::fmt::Display for PropertyPath {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
PropertyPath::Direct(term) => write!(f, "{term:?}"),
PropertyPath::Inverse(path) => write!(f, "^{path}"),
PropertyPath::Sequence(left, right) => write!(f, "{left}/{right}"),
PropertyPath::Alternative(left, right) => write!(f, "{left}|{right}"),
PropertyPath::ZeroOrMore(path) => write!(f, "{path}*"),
PropertyPath::OneOrMore(path) => write!(f, "{path}+"),
PropertyPath::ZeroOrOne(path) => write!(f, "{path}?"),
PropertyPath::NegatedPropertySet(terms) => {
write!(f, "!(")?;
for (i, term) in terms.iter().enumerate() {
if i > 0 {
write!(f, "|")?;
}
write!(f, "{term:?}")?;
}
write!(f, ")")
}
}
}
}
#[derive(Debug, Clone)]
pub struct PathContext {
pub max_length: usize,
pub max_nodes: usize,
pub cycle_detection: bool,
pub optimization_level: PathOptimization,
}
#[derive(Debug, Clone, PartialEq)]
pub enum PathOptimization {
None,
Basic,
Advanced,
}
impl Default for PathContext {
fn default() -> Self {
Self {
max_length: 100,
max_nodes: 10000,
cycle_detection: true,
optimization_level: PathOptimization::Basic,
}
}
}
pub struct PropertyPathEvaluator {
context: PathContext,
}
#[derive(Debug, Clone)]
pub struct PathResult {
pub bindings: Solution,
pub path_length: usize,
pub visited_nodes: usize,
}
pub trait PathDataset {
fn find_outgoing(&self, subject: &Term, predicate: &Term) -> Result<Vec<Term>>;
fn find_incoming(&self, predicate: &Term, object: &Term) -> Result<Vec<Term>>;
fn find_predicates(&self, subject: &Term, object: &Term) -> Result<Vec<Term>>;
fn get_predicates(&self) -> Result<Vec<Term>>;
fn contains_triple(&self, subject: &Term, predicate: &Term, object: &Term) -> Result<bool>;
}
impl PropertyPathEvaluator {
pub fn new() -> Self {
Self {
context: PathContext::default(),
}
}
pub fn with_context(context: PathContext) -> Self {
Self { context }
}
pub fn evaluate_path(
&self,
start: &Term,
path: &PropertyPath,
end: &Term,
dataset: &dyn PathDataset,
) -> Result<bool> {
let mut visited = HashSet::new();
self.evaluate_path_recursive(start, path, end, dataset, &mut visited, 0)
}
pub fn find_reachable(
&self,
start: &Term,
path: &PropertyPath,
dataset: &dyn PathDataset,
) -> Result<Vec<Term>> {
match path {
PropertyPath::Direct(predicate) => {
return dataset.find_outgoing(start, predicate);
}
PropertyPath::Inverse(inner_path) => {
if let PropertyPath::Direct(predicate) = inner_path.as_ref() {
return dataset.find_incoming(predicate, start);
}
}
_ => {}
}
let mut result = Vec::new();
let mut visited = HashSet::new();
let mut queue = VecDeque::new();
queue.push_back((start.clone(), 0));
visited.insert(start.clone());
while let Some((current, depth)) = queue.pop_front() {
if depth >= self.context.max_length {
continue;
}
if result.len() >= self.context.max_nodes {
break;
}
let reachable =
self.evaluate_path_step(¤t, path, dataset, &mut visited, depth)?;
for node in reachable {
if !visited.contains(&node) {
visited.insert(node.clone());
result.push(node.clone());
queue.push_back((node, depth + 1));
}
}
}
Ok(result)
}
pub fn evaluate_path_with_bindings(
&self,
start_var: Option<&Variable>,
start_term: Option<&Term>,
path: &PropertyPath,
end_var: Option<&Variable>,
end_term: Option<&Term>,
dataset: &dyn PathDataset,
) -> Result<Solution> {
let mut solution = Vec::new();
match (start_term, end_term) {
(Some(start), Some(end)) => {
if self.evaluate_path(start, path, end, dataset)? {
let mut binding = HashMap::new();
if let Some(var) = start_var {
binding.insert(var.clone(), start.clone());
}
if let Some(var) = end_var {
binding.insert(var.clone(), end.clone());
}
solution.push(binding);
}
}
(Some(start), None) => {
let reachable = self.find_reachable(start, path, dataset)?;
for end in reachable {
let mut binding = HashMap::new();
if let Some(var) = start_var {
binding.insert(var.clone(), start.clone());
}
if let Some(var) = end_var {
binding.insert(var.clone(), end);
}
solution.push(binding);
}
}
(None, Some(end)) => {
let inverse_path = self.invert_path(path);
let reachable = self.find_reachable(end, &inverse_path, dataset)?;
for start in reachable {
let mut binding = HashMap::new();
if let Some(var) = start_var {
binding.insert(var.clone(), start);
}
if let Some(var) = end_var {
binding.insert(var.clone(), end.clone());
}
solution.push(binding);
}
}
(None, None) => {
solution = self.enumerate_all_paths(path, dataset)?;
}
}
Ok(solution)
}
fn evaluate_path_recursive(
&self,
current: &Term,
path: &PropertyPath,
target: &Term,
dataset: &dyn PathDataset,
visited: &mut HashSet<Term>,
depth: usize,
) -> Result<bool> {
if depth >= self.context.max_length {
return Ok(false);
}
if self.context.cycle_detection && visited.contains(current) {
return Ok(false);
}
visited.insert(current.clone());
let result = match path {
PropertyPath::Direct(predicate) => dataset.contains_triple(current, predicate, target),
PropertyPath::Inverse(inner_path) => {
self.evaluate_path_recursive(target, inner_path, current, dataset, visited, depth)
}
PropertyPath::Sequence(first, second) => {
let intermediate_nodes =
self.evaluate_path_step(current, first, dataset, visited, depth)?;
for intermediate in intermediate_nodes {
if self.evaluate_path_recursive(
&intermediate,
second,
target,
dataset,
visited,
depth + 1,
)? {
return Ok(true);
}
}
Ok(false)
}
PropertyPath::Alternative(left, right) => Ok(self
.evaluate_path_recursive(current, left, target, dataset, visited, depth)?
|| self
.evaluate_path_recursive(current, right, target, dataset, visited, depth)?),
PropertyPath::ZeroOrMore(inner_path) => {
if current == target {
return Ok(true);
}
self.evaluate_one_or_more(current, inner_path, target, dataset, visited, depth)
}
PropertyPath::OneOrMore(inner_path) => {
self.evaluate_one_or_more(current, inner_path, target, dataset, visited, depth)
}
PropertyPath::ZeroOrOne(inner_path) => {
if current == target {
return Ok(true);
}
self.evaluate_path_recursive(current, inner_path, target, dataset, visited, depth)
}
PropertyPath::NegatedPropertySet(predicates) => {
let all_predicates = dataset.get_predicates()?;
for predicate in all_predicates {
if !predicates.contains(&predicate)
&& dataset.contains_triple(current, &predicate, target)?
{
return Ok(true);
}
}
Ok(false)
}
};
visited.remove(current);
result
}
#[allow(clippy::only_used_in_recursion)]
fn evaluate_path_step(
&self,
current: &Term,
path: &PropertyPath,
dataset: &dyn PathDataset,
visited: &mut HashSet<Term>,
depth: usize,
) -> Result<Vec<Term>> {
match path {
PropertyPath::Direct(predicate) => dataset.find_outgoing(current, predicate),
PropertyPath::Inverse(inner_path) => {
match inner_path.as_ref() {
PropertyPath::Direct(predicate) => dataset.find_incoming(predicate, current),
_ => {
Ok(Vec::new())
}
}
}
PropertyPath::Sequence(first, second) => {
let mut result = Vec::new();
let intermediate_nodes =
self.evaluate_path_step(current, first, dataset, visited, depth)?;
for intermediate in intermediate_nodes {
let final_nodes = self.evaluate_path_step(
&intermediate,
second,
dataset,
visited,
depth + 1,
)?;
result.extend(final_nodes);
}
Ok(result)
}
PropertyPath::Alternative(left, right) => {
let mut result = self.evaluate_path_step(current, left, dataset, visited, depth)?;
let right_result =
self.evaluate_path_step(current, right, dataset, visited, depth)?;
result.extend(right_result);
Ok(result)
}
_ => {
Ok(Vec::new())
}
}
}
fn evaluate_one_or_more(
&self,
current: &Term,
path: &PropertyPath,
target: &Term,
dataset: &dyn PathDataset,
visited: &mut HashSet<Term>,
depth: usize,
) -> Result<bool> {
let mut frontier = VecDeque::new();
let mut local_visited = HashSet::new();
frontier.push_back((current.clone(), depth));
local_visited.insert(current.clone());
while let Some((node, current_depth)) = frontier.pop_front() {
if current_depth >= self.context.max_length {
continue;
}
let reachable =
self.evaluate_path_step(&node, path, dataset, visited, current_depth)?;
for next_node in reachable {
if next_node == *target {
return Ok(true);
}
if !local_visited.contains(&next_node)
&& current_depth < self.context.max_length - 1
{
local_visited.insert(next_node.clone());
frontier.push_back((next_node, current_depth + 1));
}
}
}
Ok(false)
}
#[allow(clippy::only_used_in_recursion)]
fn invert_path(&self, path: &PropertyPath) -> PropertyPath {
match path {
PropertyPath::Direct(term) => {
PropertyPath::Inverse(Box::new(PropertyPath::Direct(term.clone())))
}
PropertyPath::Inverse(inner) => *inner.clone(),
PropertyPath::Sequence(first, second) => PropertyPath::Sequence(
Box::new(self.invert_path(second)),
Box::new(self.invert_path(first)),
),
PropertyPath::Alternative(left, right) => PropertyPath::Alternative(
Box::new(self.invert_path(left)),
Box::new(self.invert_path(right)),
),
PropertyPath::ZeroOrMore(inner) => {
PropertyPath::ZeroOrMore(Box::new(self.invert_path(inner)))
}
PropertyPath::OneOrMore(inner) => {
PropertyPath::OneOrMore(Box::new(self.invert_path(inner)))
}
PropertyPath::ZeroOrOne(inner) => {
PropertyPath::ZeroOrOne(Box::new(self.invert_path(inner)))
}
PropertyPath::NegatedPropertySet(predicates) => {
PropertyPath::NegatedPropertySet(predicates.clone())
}
}
}
fn enumerate_all_paths(
&self,
_path: &PropertyPath,
_dataset: &dyn PathDataset,
) -> Result<Solution> {
Ok(Vec::new())
}
pub fn optimize_path(&self, path: PropertyPath) -> PropertyPath {
match self.context.optimization_level {
PathOptimization::None => path,
PathOptimization::Basic => self.basic_path_optimization(path),
PathOptimization::Advanced => self.advanced_path_optimization(path),
}
}
#[allow(clippy::only_used_in_recursion)]
fn basic_path_optimization(&self, path: PropertyPath) -> PropertyPath {
match path {
PropertyPath::Sequence(first, second) => {
let opt_first = self.basic_path_optimization(*first);
let opt_second = self.basic_path_optimization(*second);
match (&opt_first, &opt_second) {
(PropertyPath::Sequence(a, b), PropertyPath::Sequence(c, d)) => {
PropertyPath::Sequence(
Box::new(PropertyPath::Sequence(a.clone(), b.clone())),
Box::new(PropertyPath::Sequence(c.clone(), d.clone())),
)
}
_ => PropertyPath::Sequence(Box::new(opt_first), Box::new(opt_second)),
}
}
PropertyPath::Alternative(left, right) => {
let opt_left = self.basic_path_optimization(*left);
let opt_right = self.basic_path_optimization(*right);
PropertyPath::Alternative(Box::new(opt_left), Box::new(opt_right))
}
PropertyPath::ZeroOrMore(inner) => {
let opt_inner = self.basic_path_optimization(*inner);
PropertyPath::ZeroOrMore(Box::new(opt_inner))
}
PropertyPath::OneOrMore(inner) => {
let opt_inner = self.basic_path_optimization(*inner);
PropertyPath::OneOrMore(Box::new(opt_inner))
}
PropertyPath::ZeroOrOne(inner) => {
let opt_inner = self.basic_path_optimization(*inner);
PropertyPath::ZeroOrOne(Box::new(opt_inner))
}
PropertyPath::Inverse(inner) => {
let opt_inner = self.basic_path_optimization(*inner);
if let PropertyPath::Inverse(inner_inner) = opt_inner {
*inner_inner
} else {
PropertyPath::Inverse(Box::new(opt_inner))
}
}
_ => path,
}
}
fn advanced_path_optimization(&self, path: PropertyPath) -> PropertyPath {
let basic_opt = self.basic_path_optimization(path);
match basic_opt {
PropertyPath::Alternative(left, right) => {
self.optimize_alternative_common_subexpressions(*left, *right)
}
PropertyPath::Sequence(first, second) => {
self.optimize_sequence_direct_paths(*first, *second)
}
_ => basic_opt,
}
}
fn optimize_alternative_common_subexpressions(
&self,
left: PropertyPath,
right: PropertyPath,
) -> PropertyPath {
PropertyPath::Alternative(Box::new(left), Box::new(right))
}
fn optimize_sequence_direct_paths(
&self,
first: PropertyPath,
second: PropertyPath,
) -> PropertyPath {
PropertyPath::Sequence(Box::new(first), Box::new(second))
}
}
impl Default for PropertyPathEvaluator {
fn default() -> Self {
Self::new()
}
}
#[macro_export]
macro_rules! path_seq {
($first:expr_2021, $second:expr_2021) => {
PropertyPath::Sequence(Box::new($first), Box::new($second))
};
($first:expr_2021, $second:expr_2021, $($rest:expr_2021),+) => {
PropertyPath::Sequence(
Box::new($first),
Box::new(path_seq!($second, $($rest),+))
)
};
}
#[macro_export]
macro_rules! path_alt {
($first:expr_2021, $second:expr_2021) => {
PropertyPath::Alternative(Box::new($first), Box::new($second))
};
($first:expr_2021, $second:expr_2021, $($rest:expr_2021),+) => {
PropertyPath::Alternative(
Box::new($first),
Box::new(path_alt!($second, $($rest),+))
)
};
}
#[macro_export]
macro_rules! path_star {
($path:expr_2021) => {
PropertyPath::ZeroOrMore(Box::new($path))
};
}
#[macro_export]
macro_rules! path_plus {
($path:expr_2021) => {
PropertyPath::OneOrMore(Box::new($path))
};
}
#[macro_export]
macro_rules! path_opt {
($path:expr_2021) => {
PropertyPath::ZeroOrOne(Box::new($path))
};
}
#[macro_export]
macro_rules! path_inv {
($path:expr_2021) => {
PropertyPath::Inverse(Box::new($path))
};
}
#[cfg(test)]
mod tests {
use super::*;
use oxirs_core::model::NamedNode;
struct TestDataset {
triples: Vec<(Term, Term, Term)>,
}
impl TestDataset {
fn new() -> Self {
Self {
triples: vec![
(
Term::Iri(NamedNode::new_unchecked("http://example.org/person1")),
Term::Iri(NamedNode::new_unchecked("http://xmlns.com/foaf/0.1/knows")),
Term::Iri(NamedNode::new_unchecked("http://example.org/person2")),
),
(
Term::Iri(NamedNode::new_unchecked("http://example.org/person2")),
Term::Iri(NamedNode::new_unchecked("http://xmlns.com/foaf/0.1/knows")),
Term::Iri(NamedNode::new_unchecked("http://example.org/person3")),
),
],
}
}
}
impl PathDataset for TestDataset {
fn find_outgoing(&self, subject: &Term, predicate: &Term) -> Result<Vec<Term>> {
let mut result = Vec::new();
for (s, p, o) in &self.triples {
if s == subject && p == predicate {
result.push(o.clone());
}
}
Ok(result)
}
fn find_incoming(&self, predicate: &Term, object: &Term) -> Result<Vec<Term>> {
let mut result = Vec::new();
for (s, p, o) in &self.triples {
if p == predicate && o == object {
result.push(s.clone());
}
}
Ok(result)
}
fn find_predicates(&self, subject: &Term, object: &Term) -> Result<Vec<Term>> {
let mut result = Vec::new();
for (s, p, o) in &self.triples {
if s == subject && o == object {
result.push(p.clone());
}
}
Ok(result)
}
fn get_predicates(&self) -> Result<Vec<Term>> {
let mut predicates: Vec<_> = self.triples.iter().map(|(_, p, _)| p.clone()).collect();
predicates.sort_by(|a, b| format!("{a:?}").cmp(&format!("{b:?}")));
predicates.dedup();
Ok(predicates)
}
fn contains_triple(&self, subject: &Term, predicate: &Term, object: &Term) -> Result<bool> {
Ok(self
.triples
.iter()
.any(|(s, p, o)| s == subject && p == predicate && o == object))
}
}
#[test]
fn test_direct_path() {
let evaluator = PropertyPathEvaluator::new();
let dataset = TestDataset::new();
let path = PropertyPath::Direct(Term::Iri(NamedNode::new_unchecked(
"http://xmlns.com/foaf/0.1/knows",
)));
let start = Term::Iri(NamedNode::new_unchecked("http://example.org/person1"));
let end = Term::Iri(NamedNode::new_unchecked("http://example.org/person2"));
assert!(evaluator
.evaluate_path(&start, &path, &end, &dataset)
.unwrap());
}
#[test]
fn test_sequence_path() {
let evaluator = PropertyPathEvaluator::new();
let dataset = TestDataset::new();
let knows = PropertyPath::Direct(Term::Iri(NamedNode::new_unchecked(
"http://xmlns.com/foaf/0.1/knows",
)));
let path = path_seq!(knows.clone(), knows);
let start = Term::Iri(NamedNode::new_unchecked("http://example.org/person1"));
let end = Term::Iri(NamedNode::new_unchecked("http://example.org/person3"));
assert!(evaluator
.evaluate_path(&start, &path, &end, &dataset)
.unwrap());
}
#[test]
fn test_find_reachable() {
let evaluator = PropertyPathEvaluator::new();
let dataset = TestDataset::new();
let path = PropertyPath::Direct(Term::Iri(NamedNode::new_unchecked(
"http://xmlns.com/foaf/0.1/knows",
)));
let start = Term::Iri(NamedNode::new_unchecked("http://example.org/person1"));
let reachable = evaluator.find_reachable(&start, &path, &dataset).unwrap();
assert_eq!(reachable.len(), 1);
assert_eq!(
reachable[0],
Term::Iri(NamedNode::new_unchecked("http://example.org/person2"))
);
}
#[test]
fn test_path_optimization() {
let evaluator = PropertyPathEvaluator::new();
let direct = PropertyPath::Direct(Term::Iri(NamedNode::new_unchecked(
"http://example.org/pred",
)));
let double_inverse =
PropertyPath::Inverse(Box::new(PropertyPath::Inverse(Box::new(direct.clone()))));
let optimized = evaluator.optimize_path(double_inverse);
assert_eq!(optimized, direct);
}
}