Skip to main content

shifty_engine/
path.rs

1//! Relational evaluation of the path algebra `⟦π⟧^G` (doc 00 §2, Table 1).
2//!
3//! Two mutually-recursive functions implement the relation by following it
4//! forward ([`succ`]) or backward ([`pred`]); `Inverse` simply swaps between
5//! them. This is the naive reference evaluation — correctness over speed.
6//!
7//! Both are generic over a [`PathBackend`]: the leaf `Path::Pred` arms dispatch
8//! through the trait's two adjacency lookups, so the same evaluator runs over a
9//! linear `oxrdf::Graph` (inference's growing graph) or the dictionary-encoded
10//! [`FrozenIndexedDataset`] (the report and algebra validation paths). Every
11//! other arm (`Id`/`Seq`/`Alt`/`Star`/`Inverse`) is pure combinator logic and is
12//! backend-agnostic.
13
14use crate::frozen::{FrozenIndexedDataset, GraphSel};
15use oxrdf::{Graph, NamedNode, NamedOrBlankNode, Term};
16use shifty_algebra::Path;
17use std::collections::{BTreeSet, HashSet};
18
19/// Storage backend for path evaluation. Exposes the three adjacency primitives
20/// `succ`/`pred`/`sh:closed` need, so path evaluation can dispatch to either a
21/// linear `oxrdf::Graph` or the indexed [`FrozenIndexedDataset`].
22pub trait PathBackend {
23    /// `{ o | (subject, predicate, o) ∈ G }` — forward step.
24    fn objects(&self, subject: &Term, predicate: &NamedNode) -> HashSet<Term>;
25    /// `{ s | (s, predicate, object) ∈ G }` — backward step.
26    fn subjects(&self, predicate: &NamedNode, object: &Term) -> HashSet<Term>;
27    /// Predicates of the outgoing triples of `subject` (used by `sh:closed`).
28    fn out_predicates(&self, subject: &Term) -> BTreeSet<NamedNode>;
29}
30
31/// `{ u | (node, u) ∈ ⟦path⟧ }` — nodes reachable forward from `node`.
32pub fn succ<B: PathBackend + ?Sized>(g: &B, node: &Term, path: &Path) -> HashSet<Term> {
33    match path {
34        Path::Id => once(node.clone()),
35        Path::Pred(q) => g.objects(node, q),
36        Path::Inverse(p) => pred(g, node, p),
37        Path::Seq(ps) => fold_seq(g, node, ps, succ),
38        Path::Alt(ps) => ps.iter().flat_map(|p| succ(g, node, p)).collect(),
39        Path::Star(p) => closure(g, node, |g, n| succ(g, n, p)),
40    }
41}
42
43/// `{ u | (u, node) ∈ ⟦path⟧ }` — nodes that reach `node`.
44pub fn pred<B: PathBackend + ?Sized>(g: &B, node: &Term, path: &Path) -> HashSet<Term> {
45    match path {
46        Path::Id => once(node.clone()),
47        Path::Pred(q) => g.subjects(q, node),
48        Path::Inverse(p) => succ(g, node, p),
49        // (u, node) ∈ ⟦p1·…·pk⟧ : peel predicates from the right.
50        Path::Seq(ps) => {
51            let mut cursor = once(node.clone());
52            for p in ps.iter().rev() {
53                cursor = cursor.iter().flat_map(|x| pred(g, x, p)).collect();
54            }
55            cursor
56        }
57        Path::Alt(ps) => ps.iter().flat_map(|p| pred(g, node, p)).collect(),
58        Path::Star(p) => closure(g, node, |g, n| pred(g, n, p)),
59    }
60}
61
62fn fold_seq<B: PathBackend + ?Sized>(
63    g: &B,
64    node: &Term,
65    ps: &[Path],
66    step: fn(&B, &Term, &Path) -> HashSet<Term>,
67) -> HashSet<Term> {
68    let mut cursor = once(node.clone());
69    for p in ps {
70        cursor = cursor.iter().flat_map(|x| step(g, x, p)).collect();
71    }
72    cursor
73}
74
75/// Reflexive-transitive closure of a one-step function (for `π*`).
76fn closure<B, F>(g: &B, node: &Term, step: F) -> HashSet<Term>
77where
78    B: PathBackend + ?Sized,
79    F: Fn(&B, &Term) -> HashSet<Term>,
80{
81    let mut result = once(node.clone());
82    let mut frontier = vec![node.clone()];
83    while let Some(x) = frontier.pop() {
84        for y in step(g, &x) {
85            if result.insert(y.clone()) {
86                frontier.push(y);
87            }
88        }
89    }
90    result
91}
92
93fn once(t: Term) -> HashSet<Term> {
94    let mut s = HashSet::with_capacity(1);
95    s.insert(t);
96    s
97}
98
99// ── Backends ─────────────────────────────────────────────────────────────────
100
101/// Linear backend over oxrdf's B-tree indexes. Used by inference, whose graph
102/// grows during forward chaining and so cannot share an immutable snapshot.
103impl PathBackend for Graph {
104    fn objects(&self, subject: &Term, predicate: &NamedNode) -> HashSet<Term> {
105        match node_of(subject) {
106            Some(s) => self
107                .objects_for_subject_predicate(&s, predicate.as_ref())
108                .map(|t| t.into_owned())
109                .collect(),
110            None => HashSet::new(),
111        }
112    }
113
114    fn subjects(&self, predicate: &NamedNode, object: &Term) -> HashSet<Term> {
115        self.subjects_for_predicate_object(predicate.as_ref(), object)
116            .map(|s| term_of(s.into_owned()))
117            .collect()
118    }
119
120    fn out_predicates(&self, subject: &Term) -> BTreeSet<NamedNode> {
121        let mut preds = BTreeSet::new();
122        if let Some(n) = node_of(subject) {
123            for t in self.triples_for_subject(&n) {
124                preds.insert(t.predicate.into_owned());
125            }
126        }
127        preds
128    }
129}
130
131/// Indexed backend over the dictionary-encoded post-inference snapshot. Built
132/// once at the inference→validation boundary and shared across focus nodes; the
133/// `u32`-keyed sorted indexes replace per-call term hashing and B-tree walks.
134/// Unknown terms intern to fresh ids that match no stored triple — exactly the
135/// empty-result semantics path evaluation needs.
136impl PathBackend for FrozenIndexedDataset {
137    fn objects(&self, subject: &Term, predicate: &NamedNode) -> HashSet<Term> {
138        let s = self.intern(subject);
139        let p = self.intern(&Term::NamedNode(predicate.clone()));
140        self.scan(Some(s), Some(p), None, GraphSel::Default)
141            .map(|t| self.externalize_id(t[2]))
142            .collect()
143    }
144
145    fn subjects(&self, predicate: &NamedNode, object: &Term) -> HashSet<Term> {
146        let p = self.intern(&Term::NamedNode(predicate.clone()));
147        let o = self.intern(object);
148        self.scan(None, Some(p), Some(o), GraphSel::Default)
149            .map(|t| self.externalize_id(t[0]))
150            .collect()
151    }
152
153    fn out_predicates(&self, subject: &Term) -> BTreeSet<NamedNode> {
154        let s = self.intern(subject);
155        self.scan(Some(s), None, None, GraphSel::Default)
156            .filter_map(|t| match self.externalize_id(t[1]) {
157                Term::NamedNode(p) => Some(p),
158                _ => None,
159            })
160            .collect()
161    }
162}
163
164/// A term usable in subject position, if it is not a literal.
165pub fn node_of(term: &Term) -> Option<NamedOrBlankNode> {
166    match term {
167        Term::NamedNode(n) => Some(NamedOrBlankNode::NamedNode(n.clone())),
168        Term::BlankNode(b) => Some(NamedOrBlankNode::BlankNode(b.clone())),
169        Term::Literal(_) => None,
170    }
171}
172
173pub fn term_of(node: NamedOrBlankNode) -> Term {
174    match node {
175        NamedOrBlankNode::NamedNode(n) => Term::NamedNode(n),
176        NamedOrBlankNode::BlankNode(b) => Term::BlankNode(b),
177    }
178}
179
180#[cfg(test)]
181mod tests {
182    use super::*;
183    use oxrdf::{NamedNode, Triple};
184
185    fn nn(iri: &str) -> NamedNode {
186        NamedNode::new(iri).unwrap()
187    }
188
189    fn t(s: &str, p: &str, o: &str) -> Triple {
190        Triple::new(nn(s), nn(p), nn(o))
191    }
192
193    fn term(iri: &str) -> Term {
194        Term::NamedNode(nn(iri))
195    }
196
197    /// `a -p-> b -p-> c -q-> d`, with a side branch `a -p-> e`.
198    fn sample_graph() -> Graph {
199        let mut g = Graph::new();
200        for tr in [
201            t("http://ex/a", "http://ex/p", "http://ex/b"),
202            t("http://ex/a", "http://ex/p", "http://ex/e"),
203            t("http://ex/b", "http://ex/p", "http://ex/c"),
204            t("http://ex/c", "http://ex/q", "http://ex/d"),
205        ] {
206            g.insert(tr.as_ref());
207        }
208        g
209    }
210
211    fn p(iri: &str) -> Path {
212        Path::Pred(nn(iri))
213    }
214
215    /// The path shapes exercised by the Graph-vs-Frozen differential below.
216    fn sample_paths() -> Vec<Path> {
217        let pp = || p("http://ex/p");
218        let qq = || p("http://ex/q");
219        vec![
220            Path::Id,
221            pp(),
222            Path::Inverse(Box::new(pp())),
223            Path::Seq(vec![pp(), pp()]),
224            Path::Seq(vec![pp(), qq()]),
225            Path::Alt(vec![pp(), qq()]),
226            Path::Star(Box::new(pp())),
227            Path::Seq(vec![Path::Star(Box::new(pp())), qq()]),
228            Path::Inverse(Box::new(Path::Seq(vec![pp(), pp()]))),
229        ]
230    }
231
232    #[test]
233    fn graph_and_frozen_backends_agree() {
234        let g = sample_graph();
235        let frozen = FrozenIndexedDataset::from_graph(&g);
236        let nodes = ["http://ex/a", "http://ex/b", "http://ex/c", "http://ex/d"];
237        for path in sample_paths() {
238            for node in nodes {
239                let n = term(node);
240                assert_eq!(
241                    succ(&g, &n, &path),
242                    succ(&frozen, &n, &path),
243                    "succ mismatch at {node} for {path:?}"
244                );
245                assert_eq!(
246                    pred(&g, &n, &path),
247                    pred(&frozen, &n, &path),
248                    "pred mismatch at {node} for {path:?}"
249                );
250            }
251        }
252    }
253
254    #[test]
255    fn frozen_succ_pred_basics() {
256        let g = sample_graph();
257        let frozen = FrozenIndexedDataset::from_graph(&g);
258        let a = term("http://ex/a");
259        assert_eq!(
260            succ(&frozen, &a, &p("http://ex/p")),
261            HashSet::from([term("http://ex/b"), term("http://ex/e")])
262        );
263        let c = term("http://ex/c");
264        assert_eq!(
265            pred(&frozen, &c, &p("http://ex/p")),
266            HashSet::from([term("http://ex/b")])
267        );
268    }
269
270    #[test]
271    fn star_is_reflexive_transitive_on_both_backends() {
272        let g = sample_graph();
273        let frozen = FrozenIndexedDataset::from_graph(&g);
274        let a = term("http://ex/a");
275        let star = Path::Star(Box::new(p("http://ex/p")));
276        let expected = HashSet::from([
277            term("http://ex/a"),
278            term("http://ex/b"),
279            term("http://ex/e"),
280            term("http://ex/c"),
281        ]);
282        assert_eq!(succ(&g, &a, &star), expected);
283        assert_eq!(succ(&frozen, &a, &star), expected);
284    }
285
286    #[test]
287    fn out_predicates_agree() {
288        let g = sample_graph();
289        let frozen = FrozenIndexedDataset::from_graph(&g);
290        let a = term("http://ex/a");
291        assert_eq!(g.out_predicates(&a), BTreeSet::from([nn("http://ex/p")]));
292        assert_eq!(
293            frozen.out_predicates(&a),
294            BTreeSet::from([nn("http://ex/p")])
295        );
296        // Literal subject → no outgoing predicates on either backend.
297        let lit = Term::Literal(oxrdf::Literal::new_simple_literal("x"));
298        assert!(g.out_predicates(&lit).is_empty());
299        assert!(frozen.out_predicates(&lit).is_empty());
300    }
301}