Skip to main content

shifty_engine/
witness.rs

1//! The witnessing evaluator (`docs/06-repair.md` §5) — the structured, lossless
2//! sibling of [`explain`](crate::validate). For a focus node that fails a
3//! statement it returns a [`Witness`]: the failed sub-DAG of `φ`, pruned to
4//! exactly what did not hold, with the structural gap at each node. Its dual,
5//! [`SatTrace`], records *why* a shape currently holds, so a `Not(φ)` failure can
6//! be repaired by breaking `φ`. The two are mutually recursive through `Not`.
7//!
8//! This is the input to repair synthesis; it makes no repair decisions. It reuses
9//! the [`ShapeEvaluator`] satisfaction oracle (`holds`) and its gfp back-edge
10//! guard verbatim, so witnessing agrees with validation by construction.
11
12use crate::frozen::FrozenIndexedDataset;
13use crate::path::{PathBackend, node_of, succ};
14use crate::sparql::SparqlExecutor;
15use crate::validate::{NonStratifiable, ShapeEvaluator, focus_nodes_with, uses_shapes_graph};
16use oxrdf::{Graph, NamedNode, Term, Triple};
17use serde::{Deserialize, Serialize};
18use shifty_algebra::{Path, Schema, Shape, ShapeId};
19use shifty_opt::analyze;
20use std::collections::{BTreeSet, HashSet, VecDeque};
21
22/// Why one focus node failed one statement: the failed sub-structure of `φ`,
23/// pruned to exactly the parts that did not hold. The input to repair synthesis.
24#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
25pub struct FocusWitness {
26    pub focus: Term,
27    /// Index of the violated `(selector, shape)` statement in the schema.
28    pub statement: usize,
29    pub failure: Witness,
30}
31
32/// The relational (pairwise) leaf constraints — distinct from value-type atoms.
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
34pub enum RelKind {
35    Eq,
36    Disj,
37    Lt,
38    Le,
39    UniqueLang,
40}
41
42/// The existing triples that make a node a `π`-successor of its parent: the
43/// edges a deletion would cut. The deletion-side dual of path materialization.
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
45pub enum PathSupport {
46    /// Reflexive (`Id`): the node reached itself; nothing to cut.
47    Empty,
48    /// A single triple edge whose removal breaks this step.
49    Edge(Triple),
50    /// A chain (`Seq`/`Star` expansion): every edge present; cut any one to break.
51    Chain(Vec<PathSupport>),
52    /// Parallel witnessing chains (`Alt`): cut all to break.
53    Alt(Vec<PathSupport>),
54}
55
56/// The failed sub-structure of `φ` (additive direction: what to *add*).
57#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
58pub enum Witness {
59    /// A value-type leaf failed at `node` (TestConst / TestType / TestKind).
60    /// `produced_by` names the triples that made `node` a value (`Some` for a
61    /// value-scoped atom, `None` for one on the focus itself).
62    Atom {
63        shape: ShapeId,
64        node: Term,
65        reached_by: Path,
66        produced_by: Option<PathSupport>,
67    },
68    /// A relational leaf failed (Eq / Disj / Lt / Le / UniqueLang): `lhs`/`rhs`
69    /// carry the two compared value-sets with their support, `offending` the
70    /// witnessing pairs/members.
71    Relational {
72        shape: ShapeId,
73        node: Term,
74        kind: RelKind,
75        lhs: Vec<(Term, PathSupport)>,
76        rhs: Vec<(Term, PathSupport)>,
77        offending: Vec<(Term, Term)>,
78    },
79    /// `closed(Q)` failed: these (predicate, object) pairs are not allowed.
80    Closed {
81        shape: ShapeId,
82        node: Term,
83        offenders: Vec<(NamedNode, Term)>,
84    },
85    /// `¬φ` failed because `φ` holds at `node` — it must be falsified.
86    Not {
87        shape: ShapeId,
88        node: Term,
89        inner: Box<SatTrace>,
90    },
91    /// Conjunction: every listed child failed and ALL must be repaired.
92    All {
93        shape: ShapeId,
94        node: Term,
95        failed: Vec<Witness>,
96    },
97    /// Disjunction: no branch held; repairing ANY ONE suffices.
98    Any {
99        shape: ShapeId,
100        node: Term,
101        branches: Vec<Witness>,
102    },
103    /// `∃≥min π.q` under-satisfied: `have` values match, `min` required.
104    CountLow {
105        shape: ShapeId,
106        node: Term,
107        path: Path,
108        qualifier: ShapeId,
109        have: u64,
110        min: u64,
111    },
112    /// `∃≤max π.q` over-satisfied. `matched` pairs each counted value with its
113    /// support (so deletion cuts the right edges for `Seq`/`Star` paths).
114    /// `per_value` is populated only for the `∀`-encoding (`∃≤0 π.¬inner`).
115    CountHigh {
116        shape: ShapeId,
117        node: Term,
118        path: Path,
119        qualifier: ShapeId,
120        matched: Vec<(Term, PathSupport)>,
121        max: u64,
122        per_value: Vec<(Term, Witness)>,
123    },
124    /// Opaque SPARQL — no algebraic witness.
125    Opaque { shape: ShapeId, node: Term },
126}
127
128/// Why `φ` currently *holds* at a node (deletive direction: what to *delete*).
129/// The dual of [`Witness`].
130#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
131pub enum SatTrace {
132    /// `⊤` — vacuously true; no graph edit falsifies it.
133    Irrefutable { shape: ShapeId },
134    /// A value-type leaf holds at `node`; `produced_by` names the edges to cut.
135    Atom {
136        shape: ShapeId,
137        node: Term,
138        reached_by: Path,
139        produced_by: PathSupport,
140    },
141    /// Conjunction holds because ALL children hold ⟹ break ANY ONE.
142    AllHeld {
143        shape: ShapeId,
144        node: Term,
145        children: Vec<SatTrace>,
146    },
147    /// Disjunction holds because these branches hold ⟹ break EVERY one.
148    AnyHeld {
149        shape: ShapeId,
150        node: Term,
151        satisfied: Vec<SatTrace>,
152    },
153    /// `∃[min..max] π.q` holds. `matches` carries each counted value with its
154    /// q-support.
155    CountHeld {
156        shape: ShapeId,
157        node: Term,
158        path: Path,
159        qualifier: ShapeId,
160        matches: Vec<(Term, SatTrace)>,
161        min: Option<u64>,
162        max: Option<u64>,
163    },
164    /// `¬φ` holds because `φ` fails ⟹ make `φ` hold. Flips to the additive side.
165    NotHeld {
166        shape: ShapeId,
167        node: Term,
168        inner_fails: Box<Witness>,
169    },
170    /// Holds but cannot be falsified by data deletion in scope (closed / relational
171    /// / opaque SPARQL).
172    Blocked {
173        shape: ShapeId,
174        node: Term,
175        reason: BlockReason,
176    },
177    /// Support reached only through a gfp back-edge: coinductively assumed true,
178    /// with no finite set of facts to delete.
179    Coinductive { shape: ShapeId, node: Term },
180}
181
182/// Why a holding shape admits no data-deletion repair.
183#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
184pub enum BlockReason {
185    OpaqueSparql,
186    /// Falsifying `closed(Q)` would need *adding* a disallowed predicate.
187    ClosedNeedsAdd,
188    /// Relational falsification is not synthesized in this cut.
189    Unsupported,
190}
191
192type Stack = HashSet<(ShapeId, Term)>;
193
194/// Strat-check and build the SPARQL executor shared by every witnessing entry
195/// point. The caller derives `backend`/`ShapeEvaluator` from the returned
196/// executor — those borrow it, so they can't be bundled in here.
197fn prepare(data: &Graph, schema: &Schema) -> Result<SparqlExecutor, NonStratifiable> {
198    let strat = analyze(&schema.arena);
199    if !strat.stratifiable {
200        let components = strat
201            .strata
202            .iter()
203            .filter(|s| !s.stratifiable)
204            .map(|s| s.shapes.clone())
205            .collect();
206        return Err(NonStratifiable { components });
207    }
208
209    let uses_shapes = uses_shapes_graph(&schema.arena);
210    let frozen = if uses_shapes {
211        FrozenIndexedDataset::from_graphs(data, data)
212    } else {
213        FrozenIndexedDataset::from_graph(data)
214    };
215    Ok(SparqlExecutor::from_frozen(frozen, uses_shapes))
216}
217
218/// Witness every `(focus, statement)` that fails, mirroring `validate`'s driver.
219pub fn witness_violations(
220    data: &Graph,
221    schema: &Schema,
222) -> Result<Vec<FocusWitness>, NonStratifiable> {
223    let sparql = prepare(data, schema)?;
224    let backend = sparql
225        .frozen()
226        .expect("witness executor always has a frozen dataset");
227    let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
228
229    let mut out = Vec::new();
230    for (i, st) in schema.statements.iter().enumerate() {
231        for v in focus_nodes_with(data, backend, &st.selector, &schema.arena, &sparql) {
232            let mut stack = Stack::new();
233            if let Some(failure) =
234                witness(&mut evaluator, &v, st.shape, &Path::Id, None, &mut stack)
235            {
236                out.push(FocusWitness {
237                    focus: v,
238                    statement: i,
239                    failure,
240                });
241            }
242        }
243    }
244    Ok(out)
245}
246
247/// The arena slot a named shape IRI refers to, if the schema names one. `iri` is
248/// matched bare (no angle brackets), the form stored in [`Schema::names`].
249pub fn shape_id_for_iri(schema: &Schema, iri: &str) -> Option<ShapeId> {
250    schema
251        .names
252        .iter()
253        .find_map(|(id, name)| (name == iri).then_some(*id))
254}
255
256/// Witness only the `(focus, statement)` violations whose statement targets
257/// `shape` — the shape-scoped sibling of [`witness_violations`]. Returns the
258/// *failing* foci with their [`Witness`] trees; passing foci are the domain of
259/// [`satisfy_shape`]. Use [`shape_id_for_iri`] to resolve an IRI to its
260/// `ShapeId`.
261pub fn witness_shape(
262    data: &Graph,
263    schema: &Schema,
264    shape: ShapeId,
265) -> Result<Vec<FocusWitness>, NonStratifiable> {
266    let sparql = prepare(data, schema)?;
267    let backend = sparql
268        .frozen()
269        .expect("witness executor always has a frozen dataset");
270    let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
271
272    let mut out = Vec::new();
273    for (i, st) in schema.statements.iter().enumerate() {
274        if st.shape != shape {
275            continue;
276        }
277        for v in focus_nodes_with(data, backend, &st.selector, &schema.arena, &sparql) {
278            let mut stack = Stack::new();
279            if let Some(failure) =
280                witness(&mut evaluator, &v, st.shape, &Path::Id, None, &mut stack)
281            {
282                out.push(FocusWitness {
283                    focus: v,
284                    statement: i,
285                    failure,
286                });
287            }
288        }
289    }
290    Ok(out)
291}
292
293/// Why one focus node *satisfies* one statement: the [`SatTrace`] recording why
294/// `φ` holds, including the values matched along each checked path. The
295/// satisfaction-side dual of [`FocusWitness`].
296#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
297pub struct FocusSat {
298    pub focus: Term,
299    /// Index of the satisfied `(selector, shape)` statement in the schema.
300    pub statement: usize,
301    pub trace: SatTrace,
302}
303
304/// Trace every `(focus, statement)` that *holds* for a statement targeting
305/// `shape` — the dual of [`witness_shape`]. Each [`FocusSat`] carries why the
306/// node conforms, down to the values matched along every checked path (see
307/// [`SatTrace`]). Use [`shape_id_for_iri`] to resolve an IRI to its `ShapeId`.
308pub fn satisfy_shape(
309    data: &Graph,
310    schema: &Schema,
311    shape: ShapeId,
312) -> Result<Vec<FocusSat>, NonStratifiable> {
313    let sparql = prepare(data, schema)?;
314    let backend = sparql
315        .frozen()
316        .expect("witness executor always has a frozen dataset");
317    let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
318
319    let mut out = Vec::new();
320    for (i, st) in schema.statements.iter().enumerate() {
321        if st.shape != shape {
322            continue;
323        }
324        for v in focus_nodes_with(data, backend, &st.selector, &schema.arena, &sparql) {
325            let mut stack = Stack::new();
326            if let Some(trace) =
327                sat_trace(&mut evaluator, &v, st.shape, &Path::Id, None, &mut stack)
328            {
329                out.push(FocusSat {
330                    focus: v,
331                    statement: i,
332                    trace,
333                });
334            }
335        }
336    }
337    Ok(out)
338}
339
340/// Witness one specific `node` against one specific `shape` (by id) — the building
341/// block for repairing a `ConformsTo` hole: bind it to a node, then witness that
342/// node against the sub-shape and synthesize its repair. Returns `Ok(None)` when
343/// the node already conforms (nothing to build), `Ok(Some(_))` otherwise.
344///
345/// The `statement` field of the returned [`FocusWitness`] is a sentinel
346/// (`usize::MAX`): this is not a top-level statement, and synthesis ignores it.
347pub fn witness_node(
348    data: &Graph,
349    schema: &Schema,
350    node: &Term,
351    shape: ShapeId,
352) -> Result<Option<FocusWitness>, NonStratifiable> {
353    let sparql = prepare(data, schema)?;
354    let backend = sparql
355        .frozen()
356        .expect("witness executor always has a frozen dataset");
357    let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
358
359    let mut stack = Stack::new();
360    Ok(
361        witness(&mut evaluator, node, shape, &Path::Id, None, &mut stack).map(|failure| {
362            FocusWitness {
363                focus: node.clone(),
364                statement: usize::MAX,
365                failure,
366            }
367        }),
368    )
369}
370
371/// Reasons `φ` (slot `id`) fails at `node`. `None` ⟺ it holds (incl. on a gfp
372/// back-edge). `reached_by` is the structured path from the focus; `produced_by`
373/// is how `node` was reached from its parent value (for replace-in-place).
374fn witness(
375    eval: &mut ShapeEvaluator<'_>,
376    node: &Term,
377    id: ShapeId,
378    reached_by: &Path,
379    produced_by: Option<&PathSupport>,
380    stack: &mut Stack,
381) -> Option<Witness> {
382    let key = (id, node.clone());
383    if !stack.insert(key.clone()) {
384        return None; // back-edge ⇒ assume conforming (gfp)
385    }
386    if eval.holds(node, id) {
387        stack.remove(&key);
388        return None; // holds ⇒ nothing to repair
389    }
390    let shape = eval.arena().get(id).clone();
391    let result = match shape {
392        Shape::Top | Shape::Pending => None,
393        // `sh:severity` is transparent to witnessing: repair the wrapped shape.
394        Shape::Annotated { shape, .. } => {
395            let inner = witness(eval, node, shape, reached_by, produced_by, stack);
396            stack.remove(&key);
397            return inner;
398        }
399        Shape::TestConst(_) | Shape::TestType(_) | Shape::TestKind(_) => Some(Witness::Atom {
400            shape: id,
401            node: node.clone(),
402            reached_by: reached_by.clone(),
403            produced_by: produced_by.cloned(),
404        }),
405        Shape::Eq(..) | Shape::Disj(..) | Shape::Lt(..) | Shape::Le(..) | Shape::UniqueLang(_) => {
406            Some(relational_witness(eval, node, id, &shape))
407        }
408        Shape::Closed(ref q) => {
409            let offenders = closed_offenders(eval.backend(), node, q);
410            (!offenders.is_empty()).then(|| Witness::Closed {
411                shape: id,
412                node: node.clone(),
413                offenders,
414            })
415        }
416        Shape::Not(c) => {
417            sat_trace(eval, node, c, reached_by, produced_by, stack).map(|t| Witness::Not {
418                shape: id,
419                node: node.clone(),
420                inner: Box::new(t),
421            })
422        }
423        Shape::And(cs) => {
424            let failed: Vec<Witness> = cs
425                .iter()
426                .filter_map(|c| witness(eval, node, *c, reached_by, produced_by, stack))
427                .collect();
428            (!failed.is_empty()).then(|| Witness::All {
429                shape: id,
430                node: node.clone(),
431                failed,
432            })
433        }
434        Shape::Or(cs) => {
435            // Or failed ⟹ every disjunct failed.
436            let branches: Vec<Witness> = cs
437                .iter()
438                .filter_map(|c| witness(eval, node, *c, reached_by, produced_by, stack))
439                .collect();
440            (!branches.is_empty()).then(|| Witness::Any {
441                shape: id,
442                node: node.clone(),
443                branches,
444            })
445        }
446        Shape::Count {
447            path,
448            min,
449            max,
450            qualifier,
451        } => count_witness(
452            eval, node, id, &path, min, max, qualifier, reached_by, stack,
453        ),
454        Shape::Sparql(_) => Some(Witness::Opaque {
455            shape: id,
456            node: node.clone(),
457        }),
458    };
459    stack.remove(&key);
460    result
461}
462
463#[allow(clippy::too_many_arguments)]
464fn count_witness(
465    eval: &mut ShapeEvaluator<'_>,
466    node: &Term,
467    id: ShapeId,
468    path: &Path,
469    min: Option<u64>,
470    max: Option<u64>,
471    qualifier: ShapeId,
472    reached_by: &Path,
473    stack: &mut Stack,
474) -> Option<Witness> {
475    let values: Vec<Term> = succ(eval.backend(), node, path).into_iter().collect();
476    let matched: Vec<Term> = values
477        .into_iter()
478        .filter(|u| eval.holds(u, qualifier))
479        .collect();
480    let n = matched.len() as u64;
481
482    if let Some(m) = min
483        && n < m
484    {
485        return Some(Witness::CountLow {
486            shape: id,
487            node: node.clone(),
488            path: path.clone(),
489            qualifier,
490            have: n,
491            min: m,
492        });
493    }
494    if let Some(m) = max
495        && n > m
496    {
497        let matched_sup: Vec<(Term, PathSupport)> = matched
498            .iter()
499            .map(|u| {
500                (
501                    u.clone(),
502                    path_support(eval.backend(), node, path, u).unwrap_or(PathSupport::Empty),
503                )
504            })
505            .collect();
506        // ∀-encoding `∃≤0 π.¬inner`: drill into each offending value.
507        let per_value = if m == 0 {
508            if let Shape::Not(inner) = eval.arena().get(qualifier).clone() {
509                let reached = Path::seq(vec![reached_by.clone(), path.clone()]);
510                let mut pv = Vec::new();
511                for u in &matched {
512                    let ps = path_support(eval.backend(), node, path, u);
513                    if let Some(w) = witness(eval, u, inner, &reached, ps.as_ref(), stack) {
514                        pv.push((u.clone(), w));
515                    }
516                }
517                pv
518            } else {
519                Vec::new()
520            }
521        } else {
522            Vec::new()
523        };
524        return Some(Witness::CountHigh {
525            shape: id,
526            node: node.clone(),
527            path: path.clone(),
528            qualifier,
529            matched: matched_sup,
530            max: m,
531            per_value,
532        });
533    }
534    None
535}
536
537fn relational_witness(
538    eval: &ShapeEvaluator<'_>,
539    node: &Term,
540    id: ShapeId,
541    shape: &Shape,
542) -> Witness {
543    let g = eval.backend();
544    let (kind, lpath, rpred) = match shape {
545        Shape::Eq(p, q) => (RelKind::Eq, p.clone(), Some(q.clone())),
546        Shape::Disj(p, q) => (RelKind::Disj, p.clone(), Some(q.clone())),
547        Shape::Lt(p, q) => (RelKind::Lt, p.clone(), Some(q.clone())),
548        Shape::Le(p, q) => (RelKind::Le, p.clone(), Some(q.clone())),
549        Shape::UniqueLang(p) => (RelKind::UniqueLang, p.clone(), None),
550        _ => unreachable!("relational_witness on non-relational shape"),
551    };
552    let with_support = |g: &dyn PathBackend, p: &Path| -> Vec<(Term, PathSupport)> {
553        succ(g, node, p)
554            .into_iter()
555            .map(|v| {
556                let s = path_support(g, node, p, &v).unwrap_or(PathSupport::Empty);
557                (v, s)
558            })
559            .collect()
560    };
561    let lhs = with_support(g, &lpath);
562    let rhs = match &rpred {
563        Some(q) => with_support(g, &Path::Pred(q.clone())),
564        None => Vec::new(),
565    };
566    let offending = offending_pairs(kind, &lhs, &rhs);
567    Witness::Relational {
568        shape: id,
569        node: node.clone(),
570        kind,
571        lhs,
572        rhs,
573        offending,
574    }
575}
576
577/// The witnessing pairs/members for a failed relational constraint.
578fn offending_pairs(
579    kind: RelKind,
580    lhs: &[(Term, PathSupport)],
581    rhs: &[(Term, PathSupport)],
582) -> Vec<(Term, Term)> {
583    let lvals: Vec<&Term> = lhs.iter().map(|(v, _)| v).collect();
584    let rvals: Vec<&Term> = rhs.iter().map(|(v, _)| v).collect();
585    match kind {
586        RelKind::Eq => lvals
587            .iter()
588            .filter(|v| !rvals.contains(v))
589            .chain(rvals.iter().filter(|v| !lvals.contains(v)))
590            .map(|v| ((*v).clone(), (*v).clone()))
591            .collect(),
592        RelKind::Disj => lvals
593            .iter()
594            .filter(|v| rvals.contains(v))
595            .map(|v| ((*v).clone(), (*v).clone()))
596            .collect(),
597        RelKind::Lt | RelKind::Le => {
598            // pairs (a, b) that fail a < b (resp. a ≤ b)
599            let mut bad = Vec::new();
600            for a in &lvals {
601                for b in &rvals {
602                    let ok = match crate::value::compare_terms(a, b) {
603                        Some(std::cmp::Ordering::Less) => true,
604                        Some(std::cmp::Ordering::Equal) => kind == RelKind::Le,
605                        _ => false,
606                    };
607                    if !ok {
608                        bad.push(((*a).clone(), (*b).clone()));
609                    }
610                }
611            }
612            bad
613        }
614        RelKind::UniqueLang => {
615            // pairs of lhs values sharing a language tag
616            let mut bad = Vec::new();
617            for i in 0..lvals.len() {
618                for j in (i + 1)..lvals.len() {
619                    if let (Term::Literal(a), Term::Literal(b)) = (lvals[i], lvals[j])
620                        && let (Some(la), Some(lb)) = (a.language(), b.language())
621                        && la.eq_ignore_ascii_case(lb)
622                    {
623                        bad.push((lvals[i].clone(), lvals[j].clone()));
624                    }
625                }
626            }
627            bad
628        }
629    }
630}
631
632/// Support for why `φ` holds at `node`. `None` ⟺ it fails. The dual of [`witness`].
633fn sat_trace(
634    eval: &mut ShapeEvaluator<'_>,
635    node: &Term,
636    id: ShapeId,
637    reached_by: &Path,
638    produced_by: Option<&PathSupport>,
639    stack: &mut Stack,
640) -> Option<SatTrace> {
641    let key = (id, node.clone());
642    if !stack.insert(key.clone()) {
643        // back-edge: coinductively assumed true, no grounded support to delete.
644        return Some(SatTrace::Coinductive {
645            shape: id,
646            node: node.clone(),
647        });
648    }
649    if !eval.holds(node, id) {
650        stack.remove(&key);
651        return None; // fails ⇒ no satisfaction to break
652    }
653    let shape = eval.arena().get(id).clone();
654    let result = match shape {
655        Shape::Top | Shape::Pending => Some(SatTrace::Irrefutable { shape: id }),
656        // `sh:severity` is transparent: break the satisfaction of the wrapped shape.
657        Shape::Annotated { shape, .. } => {
658            let inner = sat_trace(eval, node, shape, reached_by, produced_by, stack);
659            stack.remove(&key);
660            return inner;
661        }
662        Shape::TestConst(_) | Shape::TestType(_) | Shape::TestKind(_) => Some(SatTrace::Atom {
663            shape: id,
664            node: node.clone(),
665            reached_by: reached_by.clone(),
666            produced_by: produced_by.cloned().unwrap_or(PathSupport::Empty),
667        }),
668        Shape::Eq(..) | Shape::Disj(..) | Shape::Lt(..) | Shape::Le(..) | Shape::UniqueLang(_) => {
669            Some(SatTrace::Blocked {
670                shape: id,
671                node: node.clone(),
672                reason: BlockReason::Unsupported,
673            })
674        }
675        Shape::Closed(_) => Some(SatTrace::Blocked {
676            shape: id,
677            node: node.clone(),
678            reason: BlockReason::ClosedNeedsAdd,
679        }),
680        Shape::Sparql(_) => Some(SatTrace::Blocked {
681            shape: id,
682            node: node.clone(),
683            reason: BlockReason::OpaqueSparql,
684        }),
685        Shape::Not(c) => {
686            witness(eval, node, c, reached_by, produced_by, stack).map(|w| SatTrace::NotHeld {
687                shape: id,
688                node: node.clone(),
689                inner_fails: Box::new(w),
690            })
691        }
692        Shape::And(cs) => {
693            // holds ⟹ all children hold.
694            let children: Vec<SatTrace> = cs
695                .iter()
696                .filter_map(|c| sat_trace(eval, node, *c, reached_by, produced_by, stack))
697                .collect();
698            Some(SatTrace::AllHeld {
699                shape: id,
700                node: node.clone(),
701                children,
702            })
703        }
704        Shape::Or(cs) => {
705            let satisfied: Vec<SatTrace> = cs
706                .iter()
707                .filter_map(|c| sat_trace(eval, node, *c, reached_by, produced_by, stack))
708                .collect();
709            Some(SatTrace::AnyHeld {
710                shape: id,
711                node: node.clone(),
712                satisfied,
713            })
714        }
715        Shape::Count {
716            path,
717            min,
718            max,
719            qualifier,
720        } => {
721            let values: Vec<Term> = succ(eval.backend(), node, &path).into_iter().collect();
722            let reached = Path::seq(vec![reached_by.clone(), path.clone()]);
723            let mut matches = Vec::new();
724            for u in values {
725                if eval.holds(&u, qualifier) {
726                    let ps = path_support(eval.backend(), node, &path, &u);
727                    if let Some(t) = sat_trace(eval, &u, qualifier, &reached, ps.as_ref(), stack) {
728                        matches.push((u, t));
729                    }
730                }
731            }
732            Some(SatTrace::CountHeld {
733                shape: id,
734                node: node.clone(),
735                path: path.clone(),
736                qualifier,
737                matches,
738                min,
739                max,
740            })
741        }
742    };
743    stack.remove(&key);
744    result
745}
746
747/// Predicates+objects on `node` not allowed by a closed shape's set `q`.
748fn closed_offenders(
749    g: &dyn PathBackend,
750    node: &Term,
751    q: &BTreeSet<NamedNode>,
752) -> Vec<(NamedNode, Term)> {
753    let allowed: HashSet<&NamedNode> = q.iter().collect();
754    let mut out = Vec::new();
755    for p in g.out_predicates(node) {
756        if !allowed.contains(&p) {
757            for o in g.objects(node, &p) {
758                out.push((p.clone(), o));
759            }
760        }
761    }
762    out
763}
764
765/// The existing triples that make `to` a `π`-successor of `from`, if any. The
766/// edges a deletion would cut to remove `to` from the value set.
767fn path_support(g: &dyn PathBackend, from: &Term, path: &Path, to: &Term) -> Option<PathSupport> {
768    match path {
769        Path::Id => (from == to).then_some(PathSupport::Empty),
770        Path::Pred(q) => {
771            if g.objects(from, q).contains(to) {
772                let s = node_of(from)?;
773                Some(PathSupport::Edge(Triple::new(s, q.clone(), to.clone())))
774            } else {
775                None
776            }
777        }
778        Path::Inverse(p) => path_support(g, to, p, from),
779        Path::Alt(ps) => ps.iter().find_map(|p| path_support(g, from, p, to)),
780        Path::Seq(ps) => seq_support(g, from, ps, to),
781        Path::Star(p) => star_support(g, from, p, to),
782    }
783}
784
785fn seq_support(g: &dyn PathBackend, from: &Term, ps: &[Path], to: &Term) -> Option<PathSupport> {
786    let Some((first, rest)) = ps.split_first() else {
787        return (from == to).then_some(PathSupport::Empty);
788    };
789    for mid in succ(g, from, first) {
790        let Some(head) = path_support(g, from, first, &mid) else {
791            continue;
792        };
793        if let Some(tail) = seq_support(g, &mid, rest, to) {
794            let mut chain = vec![head];
795            match tail {
796                PathSupport::Empty => {}
797                PathSupport::Chain(v) => chain.extend(v),
798                other => chain.push(other),
799            }
800            return Some(PathSupport::Chain(chain));
801        }
802    }
803    None
804}
805
806fn star_support(g: &dyn PathBackend, from: &Term, p: &Path, to: &Term) -> Option<PathSupport> {
807    if from == to {
808        return Some(PathSupport::Empty);
809    }
810    let mut visited: HashSet<Term> = HashSet::from([from.clone()]);
811    let mut queue: VecDeque<(Term, Vec<PathSupport>)> =
812        VecDeque::from([(from.clone(), Vec::new())]);
813    while let Some((cur, chain)) = queue.pop_front() {
814        for next in succ(g, &cur, p) {
815            let Some(edge) = path_support(g, &cur, p, &next) else {
816                continue;
817            };
818            let mut chain2 = chain.clone();
819            chain2.push(edge);
820            if next == *to {
821                return Some(PathSupport::Chain(chain2));
822            }
823            if visited.insert(next.clone()) {
824                queue.push_back((next, chain2));
825            }
826        }
827    }
828    None
829}
830
831#[cfg(test)]
832mod tests {
833    use super::*;
834    use shifty_parse::{load_turtle, parse_turtle};
835
836    const PREFIXES: &str = r#"
837        @prefix sh:  <http://www.w3.org/ns/shacl#> .
838        @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
839        @prefix ex:  <http://ex/> .
840        @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
841    "#;
842
843    fn run(ttl: &str) -> Vec<FocusWitness> {
844        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
845        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
846        witness_violations(&loaded.graph, &parsed.schema).expect("stratifiable")
847    }
848
849    /// Does any node in the witness tree satisfy `pred`?
850    fn any(w: &Witness, pred: &impl Fn(&Witness) -> bool) -> bool {
851        if pred(w) {
852            return true;
853        }
854        match w {
855            Witness::All { failed, .. } => failed.iter().any(|c| any(c, pred)),
856            Witness::Any { branches, .. } => branches.iter().any(|c| any(c, pred)),
857            Witness::CountHigh { per_value, .. } => per_value.iter().any(|(_, c)| any(c, pred)),
858            _ => false,
859        }
860    }
861
862    #[test]
863    fn conforming_graph_yields_no_witnesses() {
864        let ttl = format!(
865            "{PREFIXES}
866            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
867                sh:property [ sh:path ex:p ; sh:minCount 1 ] .
868            ex:x ex:p ex:y .
869            "
870        );
871        assert!(run(&ttl).is_empty());
872    }
873
874    #[test]
875    fn min_count_violation_is_count_low() {
876        let ttl = format!(
877            "{PREFIXES}
878            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
879                sh:property [ sh:path ex:p ; sh:minCount 2 ] .
880            ex:x ex:p ex:y .
881            "
882        );
883        let ws = run(&ttl);
884        assert_eq!(ws.len(), 1);
885        assert_eq!(ws[0].focus.to_string(), "<http://ex/x>");
886        assert!(any(&ws[0].failure, &|w| matches!(
887            w,
888            Witness::CountLow {
889                have: 1,
890                min: 2,
891                ..
892            }
893        )));
894    }
895
896    #[test]
897    fn datatype_violation_is_an_atom_with_support() {
898        let ttl = format!(
899            "{PREFIXES}
900            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
901                sh:property [ sh:path ex:p ; sh:datatype xsd:integer ] .
902            ex:x ex:p \"hello\" .
903            "
904        );
905        let ws = run(&ttl);
906        assert_eq!(ws.len(), 1);
907        // The bad value is reached via ex:p, so its atom carries a cut edge.
908        assert!(any(&ws[0].failure, &|w| matches!(
909            w,
910            Witness::Atom {
911                produced_by: Some(PathSupport::Edge(_)),
912                ..
913            }
914        )));
915    }
916
917    #[test]
918    fn focus_level_nodekind_atom_has_no_support() {
919        let ttl = format!(
920            "{PREFIXES}
921            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
922                sh:nodeKind sh:IRI .
923            ex:x ex:p \"v\" .
924            ex:y ex:q ex:x .
925            "
926        );
927        // ex:x is an IRI so it conforms; use a literal-targeted shape instead:
928        let _ = ttl;
929        let ttl2 = format!(
930            "{PREFIXES}
931            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
932                sh:nodeKind sh:Literal .
933            ex:x ex:p ex:y .
934            "
935        );
936        let ws = run(&ttl2);
937        assert_eq!(ws.len(), 1);
938        assert!(matches!(
939            ws[0].failure,
940            Witness::Atom {
941                produced_by: None,
942                ..
943            }
944        ));
945    }
946
947    #[test]
948    fn non_stratifiable_schema_is_diagnosed() {
949        let ttl = format!(
950            "{PREFIXES}
951            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
952                sh:not [ sh:path ex:p ; sh:qualifiedValueShape ex:S ; sh:qualifiedMinCount 1 ] .
953            ex:x ex:p ex:y .
954            "
955        );
956        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
957        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
958        assert!(witness_violations(&loaded.graph, &parsed.schema).is_err());
959    }
960
961    #[test]
962    fn witness_shape_and_satisfy_shape_scope_to_one_shape() {
963        // Two targeted shapes; one focus fails ex:S, one passes ex:S, and a third
964        // node fails an unrelated shape ex:T that the ex:S queries must ignore.
965        let ttl = format!(
966            "{PREFIXES}
967            ex:S a sh:NodeShape ; sh:targetClass ex:C ;
968                sh:property [ sh:path ex:p ; sh:minCount 1 ] .
969            ex:T a sh:NodeShape ; sh:targetClass ex:D ;
970                sh:property [ sh:path ex:q ; sh:minCount 1 ] .
971            ex:good a ex:C ; ex:p ex:y .
972            ex:bad  a ex:C .
973            ex:other a ex:D .
974            "
975        );
976        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
977        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
978        let schema = &parsed.schema;
979
980        let s = shape_id_for_iri(schema, "http://ex/S").expect("ex:S is named");
981        assert!(shape_id_for_iri(schema, "http://ex/missing").is_none());
982
983        // Failures: just ex:bad, never the ex:T violation on ex:other.
984        let fails = witness_shape(&loaded.graph, schema, s).expect("stratifiable");
985        assert_eq!(fails.len(), 1);
986        assert_eq!(fails[0].focus.to_string(), "<http://ex/bad>");
987
988        // Satisfactions: just ex:good, with the matched value recorded.
989        let sats = satisfy_shape(&loaded.graph, schema, s).expect("stratifiable");
990        assert_eq!(sats.len(), 1);
991        assert_eq!(sats[0].focus.to_string(), "<http://ex/good>");
992        // ex:good holds because the ex:p count is met by ex:y.
993        assert!(any_sat(&sats[0].trace, &|t| matches!(
994            t,
995            SatTrace::CountHeld { matches, .. } if matches.iter().any(|(v, _)| v.to_string() == "<http://ex/y>")
996        )));
997    }
998
999    /// Does any node in the satisfaction trace satisfy `pred`?
1000    fn any_sat(t: &SatTrace, pred: &impl Fn(&SatTrace) -> bool) -> bool {
1001        if pred(t) {
1002            return true;
1003        }
1004        match t {
1005            SatTrace::AllHeld { children, .. } => children.iter().any(|c| any_sat(c, pred)),
1006            SatTrace::AnyHeld { satisfied, .. } => satisfied.iter().any(|c| any_sat(c, pred)),
1007            SatTrace::CountHeld { matches, .. } => matches.iter().any(|(_, c)| any_sat(c, pred)),
1008            _ => false,
1009        }
1010    }
1011}