Skip to main content

triblespace_core/query/
regularpathconstraint.rs

1use std::collections::HashSet;
2use std::collections::VecDeque;
3
4use crate::id::id_from_value;
5use crate::id::id_into_value;
6use crate::id::RawId;
7use crate::id::ID_LEN;
8use crate::query::intersectionconstraint::IntersectionConstraint;
9use crate::query::Binding;
10use crate::query::Constraint;
11use crate::query::Query;
12use crate::query::TriblePattern;
13use crate::query::Variable;
14use crate::query::VariableContext;
15use crate::query::VariableId;
16use crate::query::VariableSet;
17use crate::trible::TribleSet;
18use crate::value::schemas::genid::GenId;
19use crate::value::RawValue;
20use crate::value::ToValue;
21
22// ── Path expression types ────────────────────────────────────────────────
23
24/// Postfix-encoded path operations (used by the [`path!`](crate::path) macro).
25///
26/// The macro compiles a path expression into a sequence of these
27/// operations. [`RegularPathConstraint::new`] converts the postfix
28/// sequence into a tree for evaluation.
29#[derive(Clone)]
30pub enum PathOp {
31    /// Single-attribute hop: traverse the given attribute.
32    Attr(RawId),
33    /// Concatenation: compose the two preceding sub-expressions.
34    Concat,
35    /// Alternation: match either of the two preceding sub-expressions.
36    Union,
37    /// Reflexive-transitive closure (`*`): zero or more repetitions.
38    Star,
39    /// Transitive closure (`+`): one or more repetitions.
40    Plus,
41}
42
43/// Tree-structured path expression for recursive evaluation.
44#[derive(Clone)]
45enum PathExpr {
46    Attr(RawId),
47    Concat(Box<PathExpr>, Box<PathExpr>),
48    Union(Box<PathExpr>, Box<PathExpr>),
49    Star(Box<PathExpr>),
50    Plus(Box<PathExpr>),
51}
52
53impl PathExpr {
54    fn from_postfix(ops: &[PathOp]) -> Self {
55        let mut stack: Vec<PathExpr> = Vec::new();
56        for op in ops {
57            match op {
58                PathOp::Attr(id) => stack.push(PathExpr::Attr(*id)),
59                PathOp::Concat => {
60                    let b = stack.pop().unwrap();
61                    let a = stack.pop().unwrap();
62                    stack.push(PathExpr::Concat(Box::new(a), Box::new(b)));
63                }
64                PathOp::Union => {
65                    let b = stack.pop().unwrap();
66                    let a = stack.pop().unwrap();
67                    stack.push(PathExpr::Union(Box::new(a), Box::new(b)));
68                }
69                PathOp::Star => {
70                    let a = stack.pop().unwrap();
71                    stack.push(PathExpr::Star(Box::new(a)));
72                }
73                PathOp::Plus => {
74                    let a = stack.pop().unwrap();
75                    stack.push(PathExpr::Plus(Box::new(a)));
76                }
77            }
78        }
79        stack.pop().unwrap()
80    }
81
82    /// Build constraints for this expression, returning the destination variable.
83    /// Allocates fresh variables from `ctx` and pushes constraints.
84    fn build_constraint(
85        &self,
86        set: &TribleSet,
87        ctx: &mut VariableContext,
88        start: Variable<GenId>,
89        constraints: &mut Vec<Box<dyn Constraint<'static> + 'static>>,
90    ) -> Variable<GenId> {
91        match self {
92            PathExpr::Attr(attr_id) => {
93                let a = ctx.next_variable::<GenId>();
94                let dest = ctx.next_variable::<GenId>();
95                constraints.push(Box::new(a.is(attr_id.to_value())));
96                constraints.push(Box::new(set.pattern(start, a, dest)));
97                dest
98            }
99            PathExpr::Concat(lhs, rhs) => {
100                let mid = lhs.build_constraint(set, ctx, start, constraints);
101                rhs.build_constraint(set, ctx, mid, constraints)
102            }
103            PathExpr::Union(..) | PathExpr::Star(..) | PathExpr::Plus(..) => {
104                unreachable!("closures and unions handled at eval_from level")
105            }
106        }
107    }
108}
109
110/// Build the WCO join constraint for a non-closure expression with a bound start,
111/// returning the constraint and the destination variable index.
112fn build_join(
113    set: &TribleSet,
114    expr: &PathExpr,
115    start: &RawId,
116) -> (IntersectionConstraint<Box<dyn Constraint<'static>>>, VariableId) {
117    let mut ctx = VariableContext::new();
118    let start_var = ctx.next_variable::<GenId>();
119    let mut constraints: Vec<Box<dyn Constraint<'static> + 'static>> = Vec::new();
120    constraints.push(Box::new(start_var.is(start.to_value())));
121    let dest_var = expr.build_constraint(set, &mut ctx, start_var, &mut constraints);
122    (IntersectionConstraint::new(constraints), dest_var.index)
123}
124
125// ── Recursive path evaluator ─────────────────────────────────────────────
126
127/// Evaluate a path expression from a bound start node, returning all
128/// reachable endpoints. Uses the WCO join engine for Attr/Concat bodies
129/// and BFS for transitive closures.
130/// Single-attribute hop via direct index scan. No query engine overhead.
131fn eval_attr(set: &TribleSet, attr: &RawId, start: &RawId) -> HashSet<RawId> {
132    let mut results = HashSet::new();
133    let mut prefix = [0u8; ID_LEN * 2];
134    prefix[..ID_LEN].copy_from_slice(start);
135    prefix[ID_LEN..].copy_from_slice(attr);
136    set.eav
137        .infixes::<{ ID_LEN * 2 }, 32, _>(&prefix, |value: &[u8; 32]| {
138            if value[..ID_LEN] == [0; ID_LEN] {
139                let dest: RawId = value[ID_LEN..].try_into().unwrap();
140                results.insert(dest);
141            }
142        });
143    results
144}
145
146fn eval_from(set: &TribleSet, expr: &PathExpr, start: &RawId) -> HashSet<RawId> {
147    match expr {
148        PathExpr::Attr(attr) => eval_attr(set, attr, start),
149        PathExpr::Concat(_, _) => {
150            let (constraint, dest_idx) = build_join(set, expr, start);
151            Query::new(constraint, move |binding: &Binding| {
152                let raw = binding.get(dest_idx)?;
153                id_from_value(raw)
154            })
155            .collect()
156        }
157        PathExpr::Union(lhs, rhs) => {
158            let mut results = eval_from(set, lhs, start);
159            results.extend(eval_from(set, rhs, start));
160            results
161        }
162        PathExpr::Plus(body) => {
163            let mut visited: HashSet<RawId> = HashSet::new();
164            let mut results: HashSet<RawId> = HashSet::new();
165            let mut frontier: VecDeque<RawId> = VecDeque::new();
166            frontier.push_back(*start);
167            visited.insert(*start);
168
169            while let Some(node) = frontier.pop_front() {
170                for dest in eval_from(set, body, &node) {
171                    results.insert(dest);
172                    if visited.insert(dest) {
173                        frontier.push_back(dest);
174                    }
175                }
176            }
177            results
178        }
179        PathExpr::Star(body) => {
180            let mut results = eval_from(set, &PathExpr::Plus(body.clone()), start);
181            results.insert(*start);
182            results
183        }
184    }
185}
186
187fn has_path(set: &TribleSet, expr: &PathExpr, from: &RawId, to: &RawId) -> bool {
188    match expr {
189        PathExpr::Attr(attr) => eval_attr(set, attr, from).contains(to),
190        PathExpr::Concat(_, _) => {
191            let (constraint, dest_idx) = build_join(set, expr, from);
192            Query::new(constraint, move |binding: &Binding| {
193                let raw = binding.get(dest_idx)?;
194                id_from_value(raw)
195            })
196            .any(|dest| dest == *to)
197        }
198        PathExpr::Union(lhs, rhs) => {
199            has_path(set, lhs, from, to) || has_path(set, rhs, from, to)
200        }
201        PathExpr::Plus(body) => {
202            let mut visited: HashSet<RawId> = HashSet::new();
203            let mut frontier: VecDeque<RawId> = VecDeque::new();
204            frontier.push_back(*from);
205            visited.insert(*from);
206
207            while let Some(node) = frontier.pop_front() {
208                for dest in eval_from(set, body, &node) {
209                    if dest == *to {
210                        return true;
211                    }
212                    if visited.insert(dest) {
213                        frontier.push_back(dest);
214                    }
215                }
216            }
217            false
218        }
219        PathExpr::Star(body) => {
220            if from == to {
221                return true;
222            }
223            has_path(set, &PathExpr::Plus(body.clone()), from, to)
224        }
225    }
226}
227
228/// Shallow estimate: build the one-step constraint and ask it for the
229/// destination variable's cardinality with the start bound.
230fn estimate_from(set: &TribleSet, expr: &PathExpr, start: &RawId) -> usize {
231    // Unwrap closure to get the body for estimation.
232    let body = match expr {
233        PathExpr::Star(inner) | PathExpr::Plus(inner) => inner.as_ref(),
234        other => other,
235    };
236    match body {
237        PathExpr::Attr(attr) => {
238            let mut prefix = [0u8; ID_LEN * 2];
239            prefix[..ID_LEN].copy_from_slice(start);
240            prefix[ID_LEN..].copy_from_slice(attr);
241            set.eav.segmented_len(&prefix) as usize
242        }
243        PathExpr::Union(lhs, rhs) => {
244            estimate_from(set, lhs, start) + estimate_from(set, rhs, start)
245        }
246        _ => {
247            let (constraint, dest_idx) = build_join(set, body, start);
248            let mut binding = Binding::default();
249            binding.set(0, &start.to_value().raw);
250            constraint.estimate(dest_idx, &binding).unwrap_or(0)
251        }
252    }
253}
254
255// ── Constraint ───────────────────────────────────────────────────────────
256
257/// Constrains two variables to be connected by a regular path expression.
258///
259/// Created by the [`path!`](crate::path) macro. The path expression
260/// supports concatenation, alternation (`|`), transitive closure (`+`),
261/// and reflexive-transitive closure (`*`). Single-attribute hops use
262/// direct index scans; multi-step paths use the WCO join engine for
263/// concatenation and BFS for closures.
264///
265/// When the start variable is bound, propose enumerates all reachable
266/// endpoints. When the end is bound, confirm checks reachability.
267pub struct RegularPathConstraint {
268    start: VariableId,
269    end: VariableId,
270    expr: PathExpr,
271    set: TribleSet,
272}
273
274impl RegularPathConstraint {
275    /// Creates a path constraint from `start` to `end` over the given
276    /// postfix-encoded path operations.
277    pub fn new(
278        set: TribleSet,
279        start: Variable<GenId>,
280        end: Variable<GenId>,
281        ops: &[PathOp],
282    ) -> Self {
283        let expr = PathExpr::from_postfix(ops);
284        RegularPathConstraint {
285            start: start.index,
286            end: end.index,
287            expr,
288            set,
289        }
290    }
291
292    /// Lazily collect all GenId nodes in the TribleSet.
293    /// Only called when neither start nor end is bound.
294    fn all_nodes(&self) -> Vec<RawValue> {
295        let mut node_set: HashSet<RawValue> = HashSet::new();
296        for t in self.set.iter() {
297            let v = &t.data[32..64];
298            if v[..ID_LEN] == [0; ID_LEN] {
299                let dest: RawId = v[ID_LEN..].try_into().unwrap();
300                node_set.insert(id_into_value(&dest));
301                let e: RawId = t.data[..ID_LEN].try_into().unwrap();
302                node_set.insert(id_into_value(&e));
303            }
304        }
305        node_set.into_iter().collect()
306    }
307}
308
309impl<'a> Constraint<'a> for RegularPathConstraint {
310    fn variables(&self) -> VariableSet {
311        let mut vars = VariableSet::new_empty();
312        vars.set(self.start);
313        vars.set(self.end);
314        vars
315    }
316
317    fn estimate(&self, variable: VariableId, binding: &Binding) -> Option<usize> {
318        if variable == self.end {
319            if let Some(start_val) = binding.get(self.start) {
320                if let Some(start_id) = id_from_value(start_val) {
321                    return Some(estimate_from(&self.set, &self.expr, &start_id).max(1));
322                }
323                return Some(0);
324            }
325            Some(self.set.len())
326        } else if variable == self.start {
327            Some(self.set.len())
328        } else {
329            None
330        }
331    }
332
333    fn propose(&self, variable: VariableId, binding: &Binding, proposals: &mut Vec<RawValue>) {
334        if variable == self.end {
335            if let Some(start_val) = binding.get(self.start) {
336                if let Some(start_id) = id_from_value(start_val) {
337                    let reachable = eval_from(&self.set, &self.expr, &start_id);
338                    proposals.extend(reachable.iter().map(id_into_value));
339                }
340                return;
341            }
342        }
343        if variable == self.start || variable == self.end {
344            proposals.extend(self.all_nodes());
345        }
346    }
347
348    fn confirm(&self, variable: VariableId, binding: &Binding, proposals: &mut Vec<RawValue>) {
349        if variable == self.start {
350            if let Some(end_val) = binding.get(self.end) {
351                if let Some(end_id) = id_from_value(end_val) {
352                    proposals.retain(|v| {
353                        id_from_value(v)
354                            .map_or(false, |sid| has_path(&self.set, &self.expr, &sid, &end_id))
355                    });
356                } else {
357                    proposals.clear();
358                }
359            }
360        } else if variable == self.end {
361            if let Some(start_val) = binding.get(self.start) {
362                if let Some(start_id) = id_from_value(start_val) {
363                    proposals.retain(|v| {
364                        id_from_value(v)
365                            .map_or(false, |eid| has_path(&self.set, &self.expr, &start_id, &eid))
366                    });
367                } else {
368                    proposals.clear();
369                }
370            }
371        }
372    }
373}