use std::collections::HashSet;
use std::collections::VecDeque;
use crate::id::id_from_value;
use crate::id::id_into_value;
use crate::id::RawId;
use crate::id::ID_LEN;
use crate::query::intersectionconstraint::IntersectionConstraint;
use crate::query::Binding;
use crate::query::Constraint;
use crate::query::Query;
use crate::query::TriblePattern;
use crate::query::Variable;
use crate::query::VariableContext;
use crate::query::VariableId;
use crate::query::VariableSet;
use crate::trible::TribleSet;
use crate::inline::encodings::genid::GenId;
use crate::inline::Inline;
use crate::inline::RawInline;
use crate::inline::IntoInline;
#[derive(Clone)]
pub enum PathOp {
Attr(RawId),
Concat,
Union,
Star,
Plus,
Optional,
Inverse,
}
#[derive(Clone)]
enum PathExpr {
Attr(RawId),
InverseAttr(RawId),
Concat(Box<PathExpr>, Box<PathExpr>),
Union(Box<PathExpr>, Box<PathExpr>),
Star(Box<PathExpr>),
Plus(Box<PathExpr>),
Optional(Box<PathExpr>),
}
impl PathExpr {
fn from_postfix(ops: &[PathOp]) -> Self {
let mut stack: Vec<PathExpr> = Vec::new();
for op in ops {
match op {
PathOp::Attr(id) => stack.push(PathExpr::Attr(*id)),
PathOp::Concat => {
let b = stack.pop().unwrap();
let a = stack.pop().unwrap();
stack.push(PathExpr::Concat(Box::new(a), Box::new(b)));
}
PathOp::Union => {
let b = stack.pop().unwrap();
let a = stack.pop().unwrap();
stack.push(PathExpr::Union(Box::new(a), Box::new(b)));
}
PathOp::Star => {
let a = stack.pop().unwrap();
stack.push(PathExpr::Star(Box::new(a)));
}
PathOp::Plus => {
let a = stack.pop().unwrap();
stack.push(PathExpr::Plus(Box::new(a)));
}
PathOp::Optional => {
let a = stack.pop().unwrap();
stack.push(PathExpr::Optional(Box::new(a)));
}
PathOp::Inverse => {
let a = stack.pop().unwrap();
stack.push(invert(a));
}
}
}
normalize(stack.pop().unwrap())
}
fn build_constraint(
&self,
set: &TribleSet,
ctx: &mut VariableContext,
start: Variable<GenId>,
constraints: &mut Vec<Box<dyn Constraint<'static> + 'static>>,
) -> Variable<GenId> {
match self {
PathExpr::Attr(attr_id) => {
let a = ctx.next_variable::<GenId>();
let dest = ctx.next_variable::<GenId>();
constraints.push(Box::new(a.is(attr_id.to_inline())));
constraints.push(Box::new(set.pattern(start, a, dest)));
dest
}
PathExpr::InverseAttr(attr_id) => {
let a = ctx.next_variable::<GenId>();
let dest = ctx.next_variable::<GenId>();
constraints.push(Box::new(a.is(attr_id.to_inline())));
constraints.push(Box::new(set.pattern(dest, a, start)));
dest
}
PathExpr::Concat(lhs, rhs) => {
let mid = lhs.build_constraint(set, ctx, start, constraints);
rhs.build_constraint(set, ctx, mid, constraints)
}
PathExpr::Union(..)
| PathExpr::Star(..)
| PathExpr::Plus(..)
| PathExpr::Optional(..) => {
unreachable!("closures, unions, and optionals handled at eval_from level")
}
}
}
}
fn invert(expr: PathExpr) -> PathExpr {
match expr {
PathExpr::Attr(a) => PathExpr::InverseAttr(a),
PathExpr::InverseAttr(a) => PathExpr::Attr(a),
PathExpr::Concat(lhs, rhs) => PathExpr::Concat(Box::new(invert(*rhs)), Box::new(invert(*lhs))),
PathExpr::Union(lhs, rhs) => PathExpr::Union(Box::new(invert(*lhs)), Box::new(invert(*rhs))),
PathExpr::Star(body) => PathExpr::Star(Box::new(invert(*body))),
PathExpr::Plus(body) => PathExpr::Plus(Box::new(invert(*body))),
PathExpr::Optional(body) => PathExpr::Optional(Box::new(invert(*body))),
}
}
fn normalize(expr: PathExpr) -> PathExpr {
match expr {
PathExpr::Attr(a) => PathExpr::Attr(a),
PathExpr::InverseAttr(a) => PathExpr::InverseAttr(a),
PathExpr::Concat(lhs, rhs) => {
let l = normalize(*lhs);
let r = normalize(*rhs);
distribute_concat(l, r)
}
PathExpr::Union(lhs, rhs) => {
PathExpr::Union(Box::new(normalize(*lhs)), Box::new(normalize(*rhs)))
}
PathExpr::Star(body) => PathExpr::Star(Box::new(normalize(*body))),
PathExpr::Plus(body) => PathExpr::Plus(Box::new(normalize(*body))),
PathExpr::Optional(body) => PathExpr::Optional(Box::new(normalize(*body))),
}
}
fn distribute_concat(l: PathExpr, r: PathExpr) -> PathExpr {
match (l, r) {
(PathExpr::Union(a, b), c) => PathExpr::Union(
Box::new(distribute_concat(*a, c.clone())),
Box::new(distribute_concat(*b, c)),
),
(a, PathExpr::Union(b, c)) => PathExpr::Union(
Box::new(distribute_concat(a.clone(), *b)),
Box::new(distribute_concat(a, *c)),
),
(PathExpr::Optional(a), c) => PathExpr::Union(
Box::new(c.clone()),
Box::new(distribute_concat(*a, c)),
),
(a, PathExpr::Optional(b)) => PathExpr::Union(
Box::new(a.clone()),
Box::new(distribute_concat(a, *b)),
),
(l, r) => PathExpr::Concat(Box::new(l), Box::new(r)),
}
}
fn build_join(
set: &TribleSet,
expr: &PathExpr,
start: &RawId,
) -> (
IntersectionConstraint<Box<dyn Constraint<'static>>>,
VariableId,
) {
let mut ctx = VariableContext::new();
let start_var = ctx.next_variable::<GenId>();
let mut constraints: Vec<Box<dyn Constraint<'static> + 'static>> = Vec::new();
constraints.push(Box::new(start_var.is(start.to_inline())));
let dest_var = expr.build_constraint(set, &mut ctx, start_var, &mut constraints);
(IntersectionConstraint::new(constraints), dest_var.index)
}
fn eval_attr(set: &TribleSet, attr: &RawId, start: &RawId) -> HashSet<RawId> {
let mut results = HashSet::new();
let mut prefix = [0u8; ID_LEN * 2];
prefix[..ID_LEN].copy_from_slice(start);
prefix[ID_LEN..].copy_from_slice(attr);
set.eav
.infixes::<{ ID_LEN * 2 }, 32, _>(&prefix, |value: &[u8; 32]| {
if value[..ID_LEN] == [0; ID_LEN] {
let dest: RawId = value[ID_LEN..].try_into().unwrap();
results.insert(dest);
}
});
results
}
fn eval_attr_inverse(set: &TribleSet, attr: &RawId, start: &RawId) -> HashSet<RawId> {
let mut results = HashSet::new();
let start_value = id_into_value(start);
let mut prefix = [0u8; 32 + ID_LEN];
prefix[..32].copy_from_slice(&start_value);
prefix[32..].copy_from_slice(attr);
set.vae
.infixes::<{ 32 + ID_LEN }, ID_LEN, _>(&prefix, |entity: &[u8; ID_LEN]| {
results.insert(*entity);
});
results
}
fn has_unbounded_closure(expr: &PathExpr) -> bool {
match expr {
PathExpr::Plus(_) | PathExpr::Star(_) => true,
PathExpr::Attr(_) | PathExpr::InverseAttr(_) => false,
PathExpr::Concat(a, b) | PathExpr::Union(a, b) => {
has_unbounded_closure(a) || has_unbounded_closure(b)
}
PathExpr::Optional(body) => has_unbounded_closure(body),
}
}
fn eval_from(set: &TribleSet, expr: &PathExpr, start: &RawId) -> HashSet<RawId> {
match expr {
PathExpr::Attr(attr) => eval_attr(set, attr, start),
PathExpr::InverseAttr(attr) => eval_attr_inverse(set, attr, start),
PathExpr::Concat(lhs, rhs) => {
if has_unbounded_closure(lhs) || has_unbounded_closure(rhs) {
let mut results = HashSet::new();
for mid in eval_from(set, lhs, start) {
results.extend(eval_from(set, rhs, &mid));
}
return results;
}
let (constraint, dest_idx) = build_join(set, expr, start);
Query::new(constraint, move |binding: &Binding| {
let raw = binding.get(dest_idx)?;
id_from_value(raw)
})
.collect()
}
PathExpr::Union(lhs, rhs) => {
let mut results = eval_from(set, lhs, start);
results.extend(eval_from(set, rhs, start));
results
}
PathExpr::Plus(body) => {
let mut visited: HashSet<RawId> = HashSet::new();
let mut results: HashSet<RawId> = HashSet::new();
let mut frontier: VecDeque<RawId> = VecDeque::new();
frontier.push_back(*start);
visited.insert(*start);
while let Some(node) = frontier.pop_front() {
for dest in eval_from(set, body, &node) {
results.insert(dest);
if visited.insert(dest) {
frontier.push_back(dest);
}
}
}
results
}
PathExpr::Star(body) => {
let mut results = eval_from(set, &PathExpr::Plus(body.clone()), start);
results.insert(*start);
results
}
PathExpr::Optional(body) => {
let mut results = eval_from(set, body, start);
results.insert(*start);
results
}
}
}
fn has_path(set: &TribleSet, expr: &PathExpr, from: &RawId, to: &RawId) -> bool {
match expr {
PathExpr::Attr(attr) => eval_attr(set, attr, from).contains(to),
PathExpr::InverseAttr(attr) => eval_attr_inverse(set, attr, from).contains(to),
PathExpr::Concat(lhs, rhs) if has_unbounded_closure(lhs) || has_unbounded_closure(rhs) => {
for mid in eval_from(set, lhs, from) {
if has_path(set, rhs, &mid, to) {
return true;
}
}
false
}
PathExpr::Concat(_, _) => {
let (constraint, dest_idx) = build_join(set, expr, from);
Query::new(constraint, move |binding: &Binding| {
let raw = binding.get(dest_idx)?;
id_from_value(raw)
})
.any(|dest| dest == *to)
}
PathExpr::Union(lhs, rhs) => has_path(set, lhs, from, to) || has_path(set, rhs, from, to),
PathExpr::Plus(body) => {
let mut visited: HashSet<RawId> = HashSet::new();
let mut frontier: VecDeque<RawId> = VecDeque::new();
frontier.push_back(*from);
visited.insert(*from);
while let Some(node) = frontier.pop_front() {
for dest in eval_from(set, body, &node) {
if dest == *to {
return true;
}
if visited.insert(dest) {
frontier.push_back(dest);
}
}
}
false
}
PathExpr::Star(body) => {
if from == to {
return true;
}
has_path(set, &PathExpr::Plus(body.clone()), from, to)
}
PathExpr::Optional(body) => {
if from == to {
return true;
}
has_path(set, body, from, to)
}
}
}
const RPQ_ESTIMATE_DEPTH: usize = 5;
fn bounded_eval_from(
set: &TribleSet,
expr: &PathExpr,
start: &RawId,
depth: usize,
) -> HashSet<RawId> {
match expr {
PathExpr::Attr(attr) => eval_attr(set, attr, start),
PathExpr::InverseAttr(attr) => eval_attr_inverse(set, attr, start),
PathExpr::Concat(lhs, rhs) => {
let mut results = HashSet::new();
for mid in bounded_eval_from(set, lhs, start, depth) {
results.extend(bounded_eval_from(set, rhs, &mid, depth));
}
results
}
PathExpr::Union(lhs, rhs) => {
let mut results = bounded_eval_from(set, lhs, start, depth);
results.extend(bounded_eval_from(set, rhs, start, depth));
results
}
PathExpr::Plus(body) => {
let mut results: HashSet<RawId> = HashSet::new();
let mut visited: HashSet<RawId> = HashSet::new();
let mut frontier: Vec<RawId> = vec![*start];
visited.insert(*start);
for _ in 0..depth {
let mut next: Vec<RawId> = Vec::new();
for node in &frontier {
for dest in bounded_eval_from(set, body, node, depth) {
results.insert(dest);
if visited.insert(dest) {
next.push(dest);
}
}
}
if next.is_empty() {
break;
}
frontier = next;
}
results
}
PathExpr::Star(body) => {
let mut results = bounded_eval_from(
set,
&PathExpr::Plus(body.clone()),
start,
depth,
);
results.insert(*start);
results
}
PathExpr::Optional(body) => {
let mut results = bounded_eval_from(set, body, start, depth);
results.insert(*start);
results
}
}
}
fn estimate_from(set: &TribleSet, expr: &PathExpr, start: &RawId) -> usize {
let body = match expr {
PathExpr::Star(inner) | PathExpr::Plus(inner) | PathExpr::Optional(inner) => {
inner.as_ref()
}
other => other,
};
match body {
PathExpr::Attr(attr) => {
let mut prefix = [0u8; ID_LEN * 2];
prefix[..ID_LEN].copy_from_slice(start);
prefix[ID_LEN..].copy_from_slice(attr);
set.eav.segmented_len(&prefix) as usize
}
PathExpr::InverseAttr(attr) => {
let start_value = id_into_value(start);
let mut prefix = [0u8; 32 + ID_LEN];
prefix[..32].copy_from_slice(&start_value);
prefix[32..].copy_from_slice(attr);
set.vae.segmented_len(&prefix) as usize
}
PathExpr::Union(lhs, rhs) => {
estimate_from(set, lhs, start) + estimate_from(set, rhs, start)
}
_ if has_unbounded_closure(body) => {
bounded_eval_from(set, body, start, RPQ_ESTIMATE_DEPTH).len()
}
_ => {
let (constraint, dest_idx) = build_join(set, body, start);
let mut binding = Binding::default();
let start_inline: Inline<GenId> = start.to_inline();
binding.set(0, &start_inline.raw);
constraint.estimate(dest_idx, &binding).unwrap_or(0)
}
}
}
pub struct RegularPathConstraint {
start: VariableId,
end: VariableId,
expr: PathExpr,
set: TribleSet,
}
impl RegularPathConstraint {
pub fn new(
set: TribleSet,
start: Variable<GenId>,
end: Variable<GenId>,
ops: &[PathOp],
) -> Self {
let expr = PathExpr::from_postfix(ops);
RegularPathConstraint {
start: start.index,
end: end.index,
expr,
set,
}
}
fn all_nodes(&self) -> Vec<RawInline> {
let mut node_set: HashSet<RawInline> = HashSet::new();
for t in self.set.iter() {
let v = &t.data[32..64];
if v[..ID_LEN] == [0; ID_LEN] {
let dest: RawId = v[ID_LEN..].try_into().unwrap();
node_set.insert(id_into_value(&dest));
let e: RawId = t.data[..ID_LEN].try_into().unwrap();
node_set.insert(id_into_value(&e));
}
}
node_set.into_iter().collect()
}
}
impl<'a> Constraint<'a> for RegularPathConstraint {
fn variables(&self) -> VariableSet {
let mut vars = VariableSet::new_empty();
vars.set(self.start);
vars.set(self.end);
vars
}
fn estimate(&self, variable: VariableId, binding: &Binding) -> Option<usize> {
if variable == self.end {
if let Some(start_val) = binding.get(self.start) {
if let Some(start_id) = id_from_value(start_val) {
return Some(estimate_from(&self.set, &self.expr, &start_id).max(1));
}
return Some(0);
}
Some(self.set.len())
} else if variable == self.start {
Some(self.set.len())
} else {
None
}
}
fn propose(&self, variable: VariableId, binding: &Binding, proposals: &mut Vec<RawInline>) {
if variable == self.end {
if let Some(start_val) = binding.get(self.start) {
if let Some(start_id) = id_from_value(start_val) {
let reachable = eval_from(&self.set, &self.expr, &start_id);
proposals.extend(reachable.iter().map(id_into_value));
}
return;
}
}
if variable == self.start {
if let Some(end_val) = binding.get(self.end) {
if let Some(end_id) = id_from_value(end_val) {
let mut candidates = self.all_nodes();
candidates.push(id_into_value(&end_id));
proposals.extend(candidates.into_iter().filter(|v| {
id_from_value(v)
.map_or(false, |sid| has_path(&self.set, &self.expr, &sid, &end_id))
}));
}
return;
}
}
if variable == self.start || variable == self.end {
proposals.extend(self.all_nodes());
}
}
fn confirm(&self, variable: VariableId, binding: &Binding, proposals: &mut Vec<RawInline>) {
if variable == self.start {
if let Some(end_val) = binding.get(self.end) {
if let Some(end_id) = id_from_value(end_val) {
proposals.retain(|v| {
id_from_value(v)
.map_or(false, |sid| has_path(&self.set, &self.expr, &sid, &end_id))
});
} else {
proposals.clear();
}
}
} else if variable == self.end {
if let Some(start_val) = binding.get(self.start) {
if let Some(start_id) = id_from_value(start_val) {
proposals.retain(|v| {
id_from_value(v).map_or(false, |eid| {
has_path(&self.set, &self.expr, &start_id, &eid)
})
});
} else {
proposals.clear();
}
}
}
}
}