Skip to main content

shifty_engine/
witness.rs

1//! The witnessing evaluator (`docs/06-repair.md` §5) — the structured, lossless
2//! sibling of [`explain`](crate::validate). For a focus node that fails a
3//! statement it returns a [`Witness`]: the failed sub-DAG of `φ`, pruned to
4//! exactly what did not hold, with the structural gap at each node. Its dual,
5//! [`SatTrace`], records *why* a shape currently holds, so a `Not(φ)` failure can
6//! be repaired by breaking `φ`. The two are mutually recursive through `Not`.
7//!
8//! This is the input to repair synthesis; it makes no repair decisions. It reuses
9//! the [`ShapeEvaluator`] satisfaction oracle (`holds`) and its gfp back-edge
10//! guard verbatim, so witnessing agrees with validation by construction.
11
12use crate::frozen::FrozenIndexedDataset;
13use crate::path::{PathBackend, node_of, succ};
14use crate::sparql::SparqlExecutor;
15use crate::validate::{NonStratifiable, ShapeEvaluator, focus_nodes_with, uses_shapes_graph};
16use oxrdf::{Graph, NamedNode, Term, Triple};
17use serde::{Deserialize, Serialize};
18use shifty_algebra::{Path, Schema, Shape, ShapeArena, ShapeId};
19use shifty_opt::analyze;
20use std::collections::{BTreeSet, HashSet, VecDeque};
21
22/// Why one focus node failed one statement: the failed sub-structure of `φ`,
23/// pruned to exactly the parts that did not hold. The input to repair synthesis.
24#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
25pub struct FocusWitness {
26    pub focus: Term,
27    /// Index of the violated `(selector, shape)` statement in the schema.
28    pub statement: usize,
29    pub failure: Witness,
30}
31
32/// The relational (pairwise) leaf constraints — distinct from value-type atoms.
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
34pub enum RelKind {
35    Eq,
36    Disj,
37    Lt,
38    Le,
39    UniqueLang,
40}
41
42/// The existing triples that make a node a `π`-successor of its parent: the
43/// edges a deletion would cut. The deletion-side dual of path materialization.
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
45pub enum PathSupport {
46    /// Reflexive (`Id`): the node reached itself; nothing to cut.
47    Empty,
48    /// A single triple edge whose removal breaks this step.
49    Edge(Triple),
50    /// A chain (`Seq`/`Star` expansion): every edge present; cut any one to break.
51    Chain(Vec<PathSupport>),
52    /// Parallel witnessing chains (`Alt`): cut all to break.
53    Alt(Vec<PathSupport>),
54}
55
56/// The failed sub-structure of `φ` (additive direction: what to *add*).
57#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
58pub enum Witness {
59    /// A value-type leaf failed at `node` (TestConst / TestType / TestKind).
60    /// `produced_by` names the triples that made `node` a value (`Some` for a
61    /// value-scoped atom, `None` for one on the focus itself).
62    Atom {
63        shape: ShapeId,
64        node: Term,
65        reached_by: Path,
66        produced_by: Option<PathSupport>,
67    },
68    /// A relational leaf failed (Eq / Disj / Lt / Le / UniqueLang): `lhs`/`rhs`
69    /// carry the two compared value-sets with their support, `offending` the
70    /// witnessing pairs/members.
71    Relational {
72        shape: ShapeId,
73        node: Term,
74        kind: RelKind,
75        lhs: Vec<(Term, PathSupport)>,
76        rhs: Vec<(Term, PathSupport)>,
77        offending: Vec<(Term, Term)>,
78    },
79    /// `closed(Q)` failed: these (predicate, object) pairs are not allowed.
80    Closed {
81        shape: ShapeId,
82        node: Term,
83        offenders: Vec<(NamedNode, Term)>,
84    },
85    /// `¬φ` failed because `φ` holds at `node` — it must be falsified.
86    Not {
87        shape: ShapeId,
88        node: Term,
89        inner: Box<SatTrace>,
90    },
91    /// Conjunction: every listed child failed and ALL must be repaired.
92    All {
93        shape: ShapeId,
94        node: Term,
95        failed: Vec<Witness>,
96    },
97    /// Disjunction: no branch held; repairing ANY ONE suffices.
98    Any {
99        shape: ShapeId,
100        node: Term,
101        branches: Vec<Witness>,
102    },
103    /// `∃≥min π.q` under-satisfied: `have` values match, `min` required.
104    ///
105    /// `sibling_qualifiers` collects the inner shapes of every `∀π.φ` universal
106    /// (encoded as `∃≤0 π.¬φ`) on this same path that is *conjoined above* this
107    /// count — including ones from sibling property shapes, i.e. a different `And`
108    /// node. Those universals are vacuously satisfied when their path is empty, so
109    /// they never witness as failures themselves, yet any value added to satisfy
110    /// this `CountLow` must also conform to each of them.
111    CountLow {
112        shape: ShapeId,
113        node: Term,
114        path: Path,
115        qualifier: ShapeId,
116        have: u64,
117        min: u64,
118        sibling_qualifiers: Vec<ShapeId>,
119    },
120    /// `∃≤max π.q` over-satisfied. `matched` pairs each counted value with its
121    /// support (so deletion cuts the right edges for `Seq`/`Star` paths).
122    /// `per_value` is populated only for the `∀`-encoding (`∃≤0 π.¬inner`).
123    CountHigh {
124        shape: ShapeId,
125        node: Term,
126        path: Path,
127        qualifier: ShapeId,
128        matched: Vec<(Term, PathSupport)>,
129        max: u64,
130        per_value: Vec<(Term, Witness)>,
131    },
132    /// Opaque SPARQL — no algebraic witness.
133    Opaque { shape: ShapeId, node: Term },
134}
135
136/// Why `φ` currently *holds* at a node (deletive direction: what to *delete*).
137/// The dual of [`Witness`].
138#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
139pub enum SatTrace {
140    /// `⊤` — vacuously true; no graph edit falsifies it.
141    Irrefutable { shape: ShapeId },
142    /// A value-type leaf holds at `node`; `produced_by` names the edges to cut.
143    Atom {
144        shape: ShapeId,
145        node: Term,
146        reached_by: Path,
147        produced_by: PathSupport,
148    },
149    /// Conjunction holds because ALL children hold ⟹ break ANY ONE.
150    AllHeld {
151        shape: ShapeId,
152        node: Term,
153        children: Vec<SatTrace>,
154    },
155    /// Disjunction holds because these branches hold ⟹ break EVERY one.
156    AnyHeld {
157        shape: ShapeId,
158        node: Term,
159        satisfied: Vec<SatTrace>,
160    },
161    /// `∃[min..max] π.q` holds. `matches` carries each counted value with its
162    /// q-support.
163    CountHeld {
164        shape: ShapeId,
165        node: Term,
166        path: Path,
167        qualifier: ShapeId,
168        matches: Vec<(Term, SatTrace)>,
169        min: Option<u64>,
170        max: Option<u64>,
171    },
172    /// `¬φ` holds because `φ` fails ⟹ make `φ` hold. Flips to the additive side.
173    NotHeld {
174        shape: ShapeId,
175        node: Term,
176        inner_fails: Box<Witness>,
177    },
178    /// Holds but cannot be falsified by data deletion in scope (closed / relational
179    /// / opaque SPARQL).
180    Blocked {
181        shape: ShapeId,
182        node: Term,
183        reason: BlockReason,
184    },
185    /// Support reached only through a gfp back-edge: coinductively assumed true,
186    /// with no finite set of facts to delete.
187    Coinductive { shape: ShapeId, node: Term },
188}
189
190/// Why a holding shape admits no data-deletion repair.
191#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
192pub enum BlockReason {
193    OpaqueSparql,
194    /// Falsifying `closed(Q)` would need *adding* a disallowed predicate.
195    ClosedNeedsAdd,
196    /// Relational falsification is not synthesized in this cut.
197    Unsupported,
198}
199
200type Stack = HashSet<(ShapeId, Term)>;
201
202/// Strat-check and build the SPARQL executor shared by every witnessing entry
203/// point. The executor's frozen dataset is built over `context` — the graph that
204/// paths, class hierarchy, and SPARQL read; focus discovery is the caller's job
205/// and reads the (possibly narrower) data graph. The caller derives
206/// `backend`/`ShapeEvaluator` from the returned executor — those borrow it, so
207/// they can't be bundled in here.
208fn prepare(context: &Graph, schema: &Schema) -> Result<SparqlExecutor, NonStratifiable> {
209    let strat = analyze(&schema.arena);
210    if !strat.stratifiable {
211        let components = strat
212            .strata
213            .iter()
214            .filter(|s| !s.stratifiable)
215            .map(|s| s.shapes.clone())
216            .collect();
217        return Err(NonStratifiable { components });
218    }
219
220    let uses_shapes = uses_shapes_graph(&schema.arena);
221    let frozen = if uses_shapes {
222        FrozenIndexedDataset::from_graphs(context, context)
223    } else {
224        FrozenIndexedDataset::from_graph(context)
225    };
226    Ok(SparqlExecutor::from_frozen(frozen, uses_shapes))
227}
228
229/// Witness every `(focus, statement)` that fails, mirroring `validate`'s driver.
230/// Focus discovery uses `data`; paths, class hierarchy, and SPARQL are evaluated
231/// against `context`, which should contain `data` (pass `data` again when there
232/// is no separate shapes graph; for split inputs `context = data ∪ shapes`). The
233/// witnessing dual of [`crate::validate_with_context`].
234pub fn witness_violations(
235    data: &Graph,
236    context: &Graph,
237    schema: &Schema,
238) -> Result<Vec<FocusWitness>, NonStratifiable> {
239    let sparql = prepare(context, schema)?;
240    let backend = sparql
241        .frozen()
242        .expect("witness executor always has a frozen dataset");
243    let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
244
245    let mut out = Vec::new();
246    for (i, st) in schema.statements.iter().enumerate() {
247        for v in focus_nodes_with(data, backend, &st.selector, &schema.arena, &sparql) {
248            let mut stack = Stack::new();
249            if let Some(failure) = witness(
250                &mut evaluator,
251                &v,
252                st.shape,
253                &Path::Id,
254                None,
255                &[],
256                &mut stack,
257            ) {
258                out.push(FocusWitness {
259                    focus: v,
260                    statement: i,
261                    failure,
262                });
263            }
264        }
265    }
266    Ok(out)
267}
268
269/// The arena slot a named shape IRI refers to, if the schema names one. `iri` is
270/// matched bare (no angle brackets), the form stored in [`Schema::names`].
271pub fn shape_id_for_iri(schema: &Schema, iri: &str) -> Option<ShapeId> {
272    schema
273        .names
274        .iter()
275        .find_map(|(id, name)| (name == iri).then_some(*id))
276}
277
278/// Witness only the `(focus, statement)` violations whose statement targets
279/// `shape` — the shape-scoped sibling of [`witness_violations`]. Returns the
280/// *failing* foci with their [`Witness`] trees; passing foci are the domain of
281/// [`satisfy_shape`]. Use [`shape_id_for_iri`] to resolve an IRI to its
282/// `ShapeId`.
283pub fn witness_shape(
284    data: &Graph,
285    context: &Graph,
286    schema: &Schema,
287    shape: ShapeId,
288) -> Result<Vec<FocusWitness>, NonStratifiable> {
289    let sparql = prepare(context, schema)?;
290    let backend = sparql
291        .frozen()
292        .expect("witness executor always has a frozen dataset");
293    let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
294
295    let mut out = Vec::new();
296    for (i, st) in schema.statements.iter().enumerate() {
297        if st.shape != shape {
298            continue;
299        }
300        for v in focus_nodes_with(data, backend, &st.selector, &schema.arena, &sparql) {
301            let mut stack = Stack::new();
302            if let Some(failure) = witness(
303                &mut evaluator,
304                &v,
305                st.shape,
306                &Path::Id,
307                None,
308                &[],
309                &mut stack,
310            ) {
311                out.push(FocusWitness {
312                    focus: v,
313                    statement: i,
314                    failure,
315                });
316            }
317        }
318    }
319    Ok(out)
320}
321
322/// Why one focus node *satisfies* one statement: the [`SatTrace`] recording why
323/// `φ` holds, including the values matched along each checked path. The
324/// satisfaction-side dual of [`FocusWitness`].
325#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
326pub struct FocusSat {
327    pub focus: Term,
328    /// Index of the satisfied `(selector, shape)` statement in the schema.
329    pub statement: usize,
330    pub trace: SatTrace,
331}
332
333/// Trace every `(focus, statement)` that *holds* for a statement targeting
334/// `shape` — the dual of [`witness_shape`]. Each [`FocusSat`] carries why the
335/// node conforms, down to the values matched along every checked path (see
336/// [`SatTrace`]). Use [`shape_id_for_iri`] to resolve an IRI to its `ShapeId`.
337pub fn satisfy_shape(
338    data: &Graph,
339    context: &Graph,
340    schema: &Schema,
341    shape: ShapeId,
342) -> Result<Vec<FocusSat>, NonStratifiable> {
343    let sparql = prepare(context, schema)?;
344    let backend = sparql
345        .frozen()
346        .expect("witness executor always has a frozen dataset");
347    let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
348
349    let mut out = Vec::new();
350    for (i, st) in schema.statements.iter().enumerate() {
351        if st.shape != shape {
352            continue;
353        }
354        for v in focus_nodes_with(data, backend, &st.selector, &schema.arena, &sparql) {
355            let mut stack = Stack::new();
356            if let Some(trace) =
357                sat_trace(&mut evaluator, &v, st.shape, &Path::Id, None, &mut stack)
358            {
359                out.push(FocusSat {
360                    focus: v,
361                    statement: i,
362                    trace,
363                });
364            }
365        }
366    }
367    Ok(out)
368}
369
370/// Witness one specific `node` against one specific `shape` (by id) — the building
371/// block for repairing a `ConformsTo` hole: bind it to a node, then witness that
372/// node against the sub-shape and synthesize its repair. Returns `Ok(None)` when
373/// the node already conforms (nothing to build), `Ok(Some(_))` otherwise. Paths
374/// and class hierarchy are evaluated over `context`; there is no focus discovery
375/// here (`node` is given), so unlike the other entry points this takes no
376/// separate data graph.
377///
378/// The `statement` field of the returned [`FocusWitness`] is a sentinel
379/// (`usize::MAX`): this is not a top-level statement, and synthesis ignores it.
380pub fn witness_node(
381    context: &Graph,
382    schema: &Schema,
383    node: &Term,
384    shape: ShapeId,
385) -> Result<Option<FocusWitness>, NonStratifiable> {
386    let sparql = prepare(context, schema)?;
387    let backend = sparql
388        .frozen()
389        .expect("witness executor always has a frozen dataset");
390    let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
391
392    let mut stack = Stack::new();
393    Ok(witness(
394        &mut evaluator,
395        node,
396        shape,
397        &Path::Id,
398        None,
399        &[],
400        &mut stack,
401    )
402    .map(|failure| FocusWitness {
403        focus: node.clone(),
404        statement: usize::MAX,
405        failure,
406    }))
407}
408
409/// Reasons `φ` (slot `id`) fails at `node`. `None` ⟺ it holds (incl. on a gfp
410/// back-edge). `reached_by` is the structured path from the focus; `produced_by`
411/// is how `node` was reached from its parent value (for replace-in-place).
412///
413/// `scope` carries the universals `∀π.φ` (encoded `∃≤0 π.¬φ`) conjoined *above*
414/// this point — every `And` ancestor contributes its universal children. They
415/// hold vacuously when their path is empty, so they never witness as failures of
416/// their own; a `CountLow` on the same path attaches them as `sibling_qualifiers`
417/// so a value built to satisfy the count also satisfies them (`∀` and a sibling
418/// count may live in *different* property shapes, hence different `And` nodes).
419fn witness(
420    eval: &mut ShapeEvaluator<'_>,
421    node: &Term,
422    id: ShapeId,
423    reached_by: &Path,
424    produced_by: Option<&PathSupport>,
425    scope: &[(Path, ShapeId)],
426    stack: &mut Stack,
427) -> Option<Witness> {
428    let key = (id, node.clone());
429    if !stack.insert(key.clone()) {
430        return None; // back-edge ⇒ assume conforming (gfp)
431    }
432    if eval.holds(node, id) {
433        stack.remove(&key);
434        return None; // holds ⇒ nothing to repair
435    }
436    let shape = eval.arena().get(id).clone();
437    let result = match shape {
438        Shape::Top | Shape::Pending => None,
439        // `sh:severity` is transparent to witnessing: repair the wrapped shape.
440        Shape::Annotated { shape, .. } => {
441            let inner = witness(eval, node, shape, reached_by, produced_by, scope, stack);
442            stack.remove(&key);
443            return inner;
444        }
445        Shape::TestConst(_) | Shape::TestType(_) | Shape::TestKind(_) => Some(Witness::Atom {
446            shape: id,
447            node: node.clone(),
448            reached_by: reached_by.clone(),
449            produced_by: produced_by.cloned(),
450        }),
451        Shape::Eq(..) | Shape::Disj(..) | Shape::Lt(..) | Shape::Le(..) | Shape::UniqueLang(_) => {
452            Some(relational_witness(eval, node, id, &shape))
453        }
454        Shape::Closed(ref q) => {
455            let offenders = closed_offenders(eval.backend(), node, q);
456            (!offenders.is_empty()).then(|| Witness::Closed {
457                shape: id,
458                node: node.clone(),
459                offenders,
460            })
461        }
462        Shape::Not(c) => {
463            // Crossing to the deletive side: outer universals don't constrain the
464            // value rebuild, so the inner witness starts from an empty scope.
465            sat_trace(eval, node, c, reached_by, produced_by, stack).map(|t| Witness::Not {
466                shape: id,
467                node: node.clone(),
468                inner: Box::new(t),
469            })
470        }
471        Shape::And(cs) => {
472            // This `And`'s universals join the scope its children witness under, so
473            // a count in one conjunct sees the `∀π.φ` siblings in the others.
474            let mut child_scope: Vec<(Path, ShapeId)> = scope.to_vec();
475            child_scope.extend(cs.iter().filter_map(|&c| as_universal(eval.arena(), c)));
476            let failed: Vec<Witness> = cs
477                .iter()
478                .filter_map(|c| {
479                    witness(eval, node, *c, reached_by, produced_by, &child_scope, stack)
480                })
481                .collect();
482            (!failed.is_empty()).then(|| Witness::All {
483                shape: id,
484                node: node.clone(),
485                failed,
486            })
487        }
488        Shape::Or(cs) => {
489            // Or failed ⟹ every disjunct failed. A disjunction does not conjoin its
490            // branches, so the scope passes through unchanged (no branch's
491            // universals constrain another's).
492            let branches: Vec<Witness> = cs
493                .iter()
494                .filter_map(|c| witness(eval, node, *c, reached_by, produced_by, scope, stack))
495                .collect();
496            (!branches.is_empty()).then(|| Witness::Any {
497                shape: id,
498                node: node.clone(),
499                branches,
500            })
501        }
502        Shape::Count {
503            path,
504            min,
505            max,
506            qualifier,
507        } => count_witness(
508            eval, node, id, &path, min, max, qualifier, reached_by, scope, stack,
509        ),
510        Shape::Sparql(_) => Some(Witness::Opaque {
511            shape: id,
512            node: node.clone(),
513        }),
514    };
515    stack.remove(&key);
516    result
517}
518
519/// Peel transparent `sh:severity` (`Annotated`) wrappers to the shape beneath.
520fn peel_annotated(arena: &ShapeArena, mut id: ShapeId) -> ShapeId {
521    while let Shape::Annotated { shape, .. } = arena.get(id) {
522        id = *shape;
523    }
524    id
525}
526
527/// If `id` is a universal `∀π.φ` (encoded `∃≤0 π.¬φ`, modulo `Annotated`
528/// wrappers), its `(π, φ)`. `None` for any other shape. This is what an `And`
529/// contributes to the scope its children witness under: such universals hold
530/// vacuously on an empty path, yet any value later added there must satisfy `φ`.
531fn as_universal(arena: &ShapeArena, id: ShapeId) -> Option<(Path, ShapeId)> {
532    let Shape::Count {
533        path,
534        max: Some(0),
535        qualifier,
536        ..
537    } = arena.get(peel_annotated(arena, id)).clone()
538    else {
539        return None;
540    };
541    match arena.get(peel_annotated(arena, qualifier)) {
542        Shape::Not(inner) => Some((path, *inner)),
543        _ => None,
544    }
545}
546
547#[allow(clippy::too_many_arguments)]
548fn count_witness(
549    eval: &mut ShapeEvaluator<'_>,
550    node: &Term,
551    id: ShapeId,
552    path: &Path,
553    min: Option<u64>,
554    max: Option<u64>,
555    qualifier: ShapeId,
556    reached_by: &Path,
557    scope: &[(Path, ShapeId)],
558    stack: &mut Stack,
559) -> Option<Witness> {
560    let values: Vec<Term> = succ(eval.backend(), node, path).into_iter().collect();
561    let matched: Vec<Term> = values
562        .into_iter()
563        .filter(|u| eval.holds(u, qualifier))
564        .collect();
565    let n = matched.len() as u64;
566
567    if let Some(m) = min
568        && n < m
569    {
570        // Universals conjoined on this same path apply to every value we add.
571        let sibling_qualifiers = scope
572            .iter()
573            .filter(|(p, _)| p == path)
574            .map(|(_, inner)| *inner)
575            .collect();
576        return Some(Witness::CountLow {
577            shape: id,
578            node: node.clone(),
579            path: path.clone(),
580            qualifier,
581            have: n,
582            min: m,
583            sibling_qualifiers,
584        });
585    }
586    if let Some(m) = max
587        && n > m
588    {
589        let matched_sup: Vec<(Term, PathSupport)> = matched
590            .iter()
591            .map(|u| {
592                (
593                    u.clone(),
594                    path_support(eval.backend(), node, path, u).unwrap_or(PathSupport::Empty),
595                )
596            })
597            .collect();
598        // ∀-encoding `∃≤0 π.¬inner`: drill into each offending value.
599        let per_value = if m == 0 {
600            if let Shape::Not(inner) = eval.arena().get(qualifier).clone() {
601                let reached = Path::seq(vec![reached_by.clone(), path.clone()]);
602                let mut pv = Vec::new();
603                for u in &matched {
604                    let ps = path_support(eval.backend(), node, path, u);
605                    // Replace-in-place rebuilds `u` against `inner` from a fresh
606                    // scope; sibling universals for this dual are future work.
607                    if let Some(w) = witness(eval, u, inner, &reached, ps.as_ref(), &[], stack) {
608                        pv.push((u.clone(), w));
609                    }
610                }
611                pv
612            } else {
613                Vec::new()
614            }
615        } else {
616            Vec::new()
617        };
618        return Some(Witness::CountHigh {
619            shape: id,
620            node: node.clone(),
621            path: path.clone(),
622            qualifier,
623            matched: matched_sup,
624            max: m,
625            per_value,
626        });
627    }
628    None
629}
630
631fn relational_witness(
632    eval: &ShapeEvaluator<'_>,
633    node: &Term,
634    id: ShapeId,
635    shape: &Shape,
636) -> Witness {
637    let g = eval.backend();
638    let (kind, lpath, rpred) = match shape {
639        Shape::Eq(p, q) => (RelKind::Eq, p.clone(), Some(q.clone())),
640        Shape::Disj(p, q) => (RelKind::Disj, p.clone(), Some(q.clone())),
641        Shape::Lt(p, q) => (RelKind::Lt, p.clone(), Some(q.clone())),
642        Shape::Le(p, q) => (RelKind::Le, p.clone(), Some(q.clone())),
643        Shape::UniqueLang(p) => (RelKind::UniqueLang, p.clone(), None),
644        _ => unreachable!("relational_witness on non-relational shape"),
645    };
646    let with_support = |g: &dyn PathBackend, p: &Path| -> Vec<(Term, PathSupport)> {
647        succ(g, node, p)
648            .into_iter()
649            .map(|v| {
650                let s = path_support(g, node, p, &v).unwrap_or(PathSupport::Empty);
651                (v, s)
652            })
653            .collect()
654    };
655    let lhs = with_support(g, &lpath);
656    let rhs = match &rpred {
657        Some(q) => with_support(g, &Path::Pred(q.clone())),
658        None => Vec::new(),
659    };
660    let offending = offending_pairs(kind, &lhs, &rhs);
661    Witness::Relational {
662        shape: id,
663        node: node.clone(),
664        kind,
665        lhs,
666        rhs,
667        offending,
668    }
669}
670
671/// The witnessing pairs/members for a failed relational constraint.
672fn offending_pairs(
673    kind: RelKind,
674    lhs: &[(Term, PathSupport)],
675    rhs: &[(Term, PathSupport)],
676) -> Vec<(Term, Term)> {
677    let lvals: Vec<&Term> = lhs.iter().map(|(v, _)| v).collect();
678    let rvals: Vec<&Term> = rhs.iter().map(|(v, _)| v).collect();
679    match kind {
680        RelKind::Eq => lvals
681            .iter()
682            .filter(|v| !rvals.contains(v))
683            .chain(rvals.iter().filter(|v| !lvals.contains(v)))
684            .map(|v| ((*v).clone(), (*v).clone()))
685            .collect(),
686        RelKind::Disj => lvals
687            .iter()
688            .filter(|v| rvals.contains(v))
689            .map(|v| ((*v).clone(), (*v).clone()))
690            .collect(),
691        RelKind::Lt | RelKind::Le => {
692            // pairs (a, b) that fail a < b (resp. a ≤ b)
693            let mut bad = Vec::new();
694            for a in &lvals {
695                for b in &rvals {
696                    let ok = match crate::value::compare_terms(a, b) {
697                        Some(std::cmp::Ordering::Less) => true,
698                        Some(std::cmp::Ordering::Equal) => kind == RelKind::Le,
699                        _ => false,
700                    };
701                    if !ok {
702                        bad.push(((*a).clone(), (*b).clone()));
703                    }
704                }
705            }
706            bad
707        }
708        RelKind::UniqueLang => {
709            // pairs of lhs values sharing a language tag
710            let mut bad = Vec::new();
711            for i in 0..lvals.len() {
712                for j in (i + 1)..lvals.len() {
713                    if let (Term::Literal(a), Term::Literal(b)) = (lvals[i], lvals[j])
714                        && let (Some(la), Some(lb)) = (a.language(), b.language())
715                        && la.eq_ignore_ascii_case(lb)
716                    {
717                        bad.push((lvals[i].clone(), lvals[j].clone()));
718                    }
719                }
720            }
721            bad
722        }
723    }
724}
725
726/// Support for why `φ` holds at `node`. `None` ⟺ it fails. The dual of [`witness`].
727fn sat_trace(
728    eval: &mut ShapeEvaluator<'_>,
729    node: &Term,
730    id: ShapeId,
731    reached_by: &Path,
732    produced_by: Option<&PathSupport>,
733    stack: &mut Stack,
734) -> Option<SatTrace> {
735    let key = (id, node.clone());
736    if !stack.insert(key.clone()) {
737        // back-edge: coinductively assumed true, no grounded support to delete.
738        return Some(SatTrace::Coinductive {
739            shape: id,
740            node: node.clone(),
741        });
742    }
743    if !eval.holds(node, id) {
744        stack.remove(&key);
745        return None; // fails ⇒ no satisfaction to break
746    }
747    let shape = eval.arena().get(id).clone();
748    let result = match shape {
749        Shape::Top | Shape::Pending => Some(SatTrace::Irrefutable { shape: id }),
750        // `sh:severity` is transparent: break the satisfaction of the wrapped shape.
751        Shape::Annotated { shape, .. } => {
752            let inner = sat_trace(eval, node, shape, reached_by, produced_by, stack);
753            stack.remove(&key);
754            return inner;
755        }
756        Shape::TestConst(_) | Shape::TestType(_) | Shape::TestKind(_) => Some(SatTrace::Atom {
757            shape: id,
758            node: node.clone(),
759            reached_by: reached_by.clone(),
760            produced_by: produced_by.cloned().unwrap_or(PathSupport::Empty),
761        }),
762        Shape::Eq(..) | Shape::Disj(..) | Shape::Lt(..) | Shape::Le(..) | Shape::UniqueLang(_) => {
763            Some(SatTrace::Blocked {
764                shape: id,
765                node: node.clone(),
766                reason: BlockReason::Unsupported,
767            })
768        }
769        Shape::Closed(_) => Some(SatTrace::Blocked {
770            shape: id,
771            node: node.clone(),
772            reason: BlockReason::ClosedNeedsAdd,
773        }),
774        Shape::Sparql(_) => Some(SatTrace::Blocked {
775            shape: id,
776            node: node.clone(),
777            reason: BlockReason::OpaqueSparql,
778        }),
779        Shape::Not(c) => {
780            witness(eval, node, c, reached_by, produced_by, &[], stack).map(|w| SatTrace::NotHeld {
781                shape: id,
782                node: node.clone(),
783                inner_fails: Box::new(w),
784            })
785        }
786        Shape::And(cs) => {
787            // holds ⟹ all children hold.
788            let children: Vec<SatTrace> = cs
789                .iter()
790                .filter_map(|c| sat_trace(eval, node, *c, reached_by, produced_by, stack))
791                .collect();
792            Some(SatTrace::AllHeld {
793                shape: id,
794                node: node.clone(),
795                children,
796            })
797        }
798        Shape::Or(cs) => {
799            let satisfied: Vec<SatTrace> = cs
800                .iter()
801                .filter_map(|c| sat_trace(eval, node, *c, reached_by, produced_by, stack))
802                .collect();
803            Some(SatTrace::AnyHeld {
804                shape: id,
805                node: node.clone(),
806                satisfied,
807            })
808        }
809        Shape::Count {
810            path,
811            min,
812            max,
813            qualifier,
814        } => {
815            let values: Vec<Term> = succ(eval.backend(), node, &path).into_iter().collect();
816            let reached = Path::seq(vec![reached_by.clone(), path.clone()]);
817            let mut matches = Vec::new();
818            for u in values {
819                if eval.holds(&u, qualifier) {
820                    let ps = path_support(eval.backend(), node, &path, &u);
821                    if let Some(t) = sat_trace(eval, &u, qualifier, &reached, ps.as_ref(), stack) {
822                        matches.push((u, t));
823                    }
824                }
825            }
826            Some(SatTrace::CountHeld {
827                shape: id,
828                node: node.clone(),
829                path: path.clone(),
830                qualifier,
831                matches,
832                min,
833                max,
834            })
835        }
836    };
837    stack.remove(&key);
838    result
839}
840
841/// Predicates+objects on `node` not allowed by a closed shape's set `q`.
842fn closed_offenders(
843    g: &dyn PathBackend,
844    node: &Term,
845    q: &BTreeSet<NamedNode>,
846) -> Vec<(NamedNode, Term)> {
847    let allowed: HashSet<&NamedNode> = q.iter().collect();
848    let mut out = Vec::new();
849    for p in g.out_predicates(node) {
850        if !allowed.contains(&p) {
851            for o in g.objects(node, &p) {
852                out.push((p.clone(), o));
853            }
854        }
855    }
856    out
857}
858
859/// The existing triples that make `to` a `π`-successor of `from`, if any. The
860/// edges a deletion would cut to remove `to` from the value set.
861fn path_support(g: &dyn PathBackend, from: &Term, path: &Path, to: &Term) -> Option<PathSupport> {
862    match path {
863        Path::Id => (from == to).then_some(PathSupport::Empty),
864        Path::Pred(q) => {
865            if g.objects(from, q).contains(to) {
866                let s = node_of(from)?;
867                Some(PathSupport::Edge(Triple::new(s, q.clone(), to.clone())))
868            } else {
869                None
870            }
871        }
872        Path::Inverse(p) => path_support(g, to, p, from),
873        Path::Alt(ps) => ps.iter().find_map(|p| path_support(g, from, p, to)),
874        Path::Seq(ps) => seq_support(g, from, ps, to),
875        Path::Star(p) => star_support(g, from, p, to),
876    }
877}
878
879fn seq_support(g: &dyn PathBackend, from: &Term, ps: &[Path], to: &Term) -> Option<PathSupport> {
880    let Some((first, rest)) = ps.split_first() else {
881        return (from == to).then_some(PathSupport::Empty);
882    };
883    for mid in succ(g, from, first) {
884        let Some(head) = path_support(g, from, first, &mid) else {
885            continue;
886        };
887        if let Some(tail) = seq_support(g, &mid, rest, to) {
888            let mut chain = vec![head];
889            match tail {
890                PathSupport::Empty => {}
891                PathSupport::Chain(v) => chain.extend(v),
892                other => chain.push(other),
893            }
894            return Some(PathSupport::Chain(chain));
895        }
896    }
897    None
898}
899
900fn star_support(g: &dyn PathBackend, from: &Term, p: &Path, to: &Term) -> Option<PathSupport> {
901    if from == to {
902        return Some(PathSupport::Empty);
903    }
904    let mut visited: HashSet<Term> = HashSet::from([from.clone()]);
905    let mut queue: VecDeque<(Term, Vec<PathSupport>)> =
906        VecDeque::from([(from.clone(), Vec::new())]);
907    while let Some((cur, chain)) = queue.pop_front() {
908        for next in succ(g, &cur, p) {
909            let Some(edge) = path_support(g, &cur, p, &next) else {
910                continue;
911            };
912            let mut chain2 = chain.clone();
913            chain2.push(edge);
914            if next == *to {
915                return Some(PathSupport::Chain(chain2));
916            }
917            if visited.insert(next.clone()) {
918                queue.push_back((next, chain2));
919            }
920        }
921    }
922    None
923}
924
925#[cfg(test)]
926mod tests {
927    use super::*;
928    use shifty_parse::{load_turtle, parse_turtle};
929
930    const PREFIXES: &str = r#"
931        @prefix sh:  <http://www.w3.org/ns/shacl#> .
932        @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
933        @prefix ex:  <http://ex/> .
934        @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
935    "#;
936
937    fn run(ttl: &str) -> Vec<FocusWitness> {
938        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
939        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
940        witness_violations(&loaded.graph, &loaded.graph, &parsed.schema).expect("stratifiable")
941    }
942
943    /// Does any node in the witness tree satisfy `pred`?
944    fn any(w: &Witness, pred: &impl Fn(&Witness) -> bool) -> bool {
945        if pred(w) {
946            return true;
947        }
948        match w {
949            Witness::All { failed, .. } => failed.iter().any(|c| any(c, pred)),
950            Witness::Any { branches, .. } => branches.iter().any(|c| any(c, pred)),
951            Witness::CountHigh { per_value, .. } => per_value.iter().any(|(_, c)| any(c, pred)),
952            _ => false,
953        }
954    }
955
956    #[test]
957    fn conforming_graph_yields_no_witnesses() {
958        let ttl = format!(
959            "{PREFIXES}
960            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
961                sh:property [ sh:path ex:p ; sh:minCount 1 ] .
962            ex:x ex:p ex:y .
963            "
964        );
965        assert!(run(&ttl).is_empty());
966    }
967
968    #[test]
969    fn min_count_violation_is_count_low() {
970        let ttl = format!(
971            "{PREFIXES}
972            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
973                sh:property [ sh:path ex:p ; sh:minCount 2 ] .
974            ex:x ex:p ex:y .
975            "
976        );
977        let ws = run(&ttl);
978        assert_eq!(ws.len(), 1);
979        assert_eq!(ws[0].focus.to_string(), "<http://ex/x>");
980        assert!(any(&ws[0].failure, &|w| matches!(
981            w,
982            Witness::CountLow {
983                have: 1,
984                min: 2,
985                ..
986            }
987        )));
988    }
989
990    /// Every `CountLow`'s `sibling_qualifiers`, in pre-order.
991    fn count_low_siblings(w: &Witness) -> Vec<Vec<ShapeId>> {
992        fn go(w: &Witness, out: &mut Vec<Vec<ShapeId>>) {
993            match w {
994                Witness::CountLow {
995                    sibling_qualifiers, ..
996                } => out.push(sibling_qualifiers.clone()),
997                Witness::All { failed, .. } => failed.iter().for_each(|c| go(c, out)),
998                Witness::Any { branches, .. } => branches.iter().for_each(|c| go(c, out)),
999                Witness::CountHigh { per_value, .. } => {
1000                    per_value.iter().for_each(|(_, c)| go(c, out))
1001                }
1002                _ => {}
1003            }
1004        }
1005        let mut out = Vec::new();
1006        go(w, &mut out);
1007        out
1008    }
1009
1010    #[test]
1011    fn cross_property_universals_attach_to_count_low() {
1012        // The min-count and the `sh:class` live in *separate* property shapes on the
1013        // same path — different `And` conjuncts. The class universal holds vacuously
1014        // (no values), so it never witnesses as a failure, yet a value added for the
1015        // count must still satisfy it: it must reach the `CountLow` across `And`s.
1016        let ttl = format!(
1017            "{PREFIXES}
1018            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
1019                sh:property [ sh:path ex:p ; sh:minCount 1 ] ;
1020                sh:property [ sh:path ex:p ; sh:class ex:C ] .
1021            ex:x a ex:Thing .
1022            "
1023        );
1024        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
1025        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
1026        let ws =
1027            witness_violations(&loaded.graph, &loaded.graph, &parsed.schema).expect("stratifiable");
1028        assert_eq!(ws.len(), 1);
1029        let sibs = count_low_siblings(&ws[0].failure);
1030        assert_eq!(sibs.len(), 1, "one CountLow");
1031        assert_eq!(sibs[0].len(), 1, "the class universal is attached");
1032        assert_eq!(
1033            class_of(sibs[0][0], &parsed.schema.arena),
1034            Some("http://ex/C".to_string()),
1035        );
1036    }
1037
1038    #[test]
1039    fn disjoint_or_branch_universal_does_not_attach() {
1040        // The `sh:class` sits in a *different* `sh:or` branch than the count, so the
1041        // two are not conjoined — a value satisfying the count branch need not be a
1042        // class. Scope must not cross the disjunction: the CountLow carries no
1043        // sibling. (Both branches fail here, so the Or genuinely witnesses.)
1044        let ttl = format!(
1045            "{PREFIXES}
1046            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
1047                sh:or (
1048                    [ sh:path ex:p ; sh:minCount 2 ]
1049                    [ sh:path ex:p ; sh:class ex:C ]
1050                ) .
1051            ex:x a ex:Thing ; ex:p ex:y .
1052            "
1053        );
1054        let ws = run(&ttl);
1055        assert_eq!(ws.len(), 1);
1056        let sibs = count_low_siblings(&ws[0].failure);
1057        assert!(!sibs.is_empty(), "the count branch yields a CountLow");
1058        assert!(
1059            sibs.iter().all(|s| s.is_empty()),
1060            "no universal leaks across the disjunction: {sibs:?}",
1061        );
1062    }
1063
1064    /// The class IRI of a `∃≥1 (rdf:type/subClassOf*).test(C)` shape, as a string.
1065    fn class_of(id: ShapeId, arena: &ShapeArena) -> Option<String> {
1066        match shifty_algebra::render::class_target_shape(id, arena)? {
1067            Term::NamedNode(n) => Some(n.as_str().to_string()),
1068            _ => None,
1069        }
1070    }
1071
1072    #[test]
1073    fn datatype_violation_is_an_atom_with_support() {
1074        let ttl = format!(
1075            "{PREFIXES}
1076            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
1077                sh:property [ sh:path ex:p ; sh:datatype xsd:integer ] .
1078            ex:x ex:p \"hello\" .
1079            "
1080        );
1081        let ws = run(&ttl);
1082        assert_eq!(ws.len(), 1);
1083        // The bad value is reached via ex:p, so its atom carries a cut edge.
1084        assert!(any(&ws[0].failure, &|w| matches!(
1085            w,
1086            Witness::Atom {
1087                produced_by: Some(PathSupport::Edge(_)),
1088                ..
1089            }
1090        )));
1091    }
1092
1093    #[test]
1094    fn focus_level_nodekind_atom_has_no_support() {
1095        let ttl = format!(
1096            "{PREFIXES}
1097            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
1098                sh:nodeKind sh:IRI .
1099            ex:x ex:p \"v\" .
1100            ex:y ex:q ex:x .
1101            "
1102        );
1103        // ex:x is an IRI so it conforms; use a literal-targeted shape instead:
1104        let _ = ttl;
1105        let ttl2 = format!(
1106            "{PREFIXES}
1107            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
1108                sh:nodeKind sh:Literal .
1109            ex:x ex:p ex:y .
1110            "
1111        );
1112        let ws = run(&ttl2);
1113        assert_eq!(ws.len(), 1);
1114        assert!(matches!(
1115            ws[0].failure,
1116            Witness::Atom {
1117                produced_by: None,
1118                ..
1119            }
1120        ));
1121    }
1122
1123    #[test]
1124    fn non_stratifiable_schema_is_diagnosed() {
1125        let ttl = format!(
1126            "{PREFIXES}
1127            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
1128                sh:not [ sh:path ex:p ; sh:qualifiedValueShape ex:S ; sh:qualifiedMinCount 1 ] .
1129            ex:x ex:p ex:y .
1130            "
1131        );
1132        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
1133        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
1134        assert!(witness_violations(&loaded.graph, &loaded.graph, &parsed.schema).is_err());
1135    }
1136
1137    #[test]
1138    fn witness_shape_and_satisfy_shape_scope_to_one_shape() {
1139        // Two targeted shapes; one focus fails ex:S, one passes ex:S, and a third
1140        // node fails an unrelated shape ex:T that the ex:S queries must ignore.
1141        let ttl = format!(
1142            "{PREFIXES}
1143            ex:S a sh:NodeShape ; sh:targetClass ex:C ;
1144                sh:property [ sh:path ex:p ; sh:minCount 1 ] .
1145            ex:T a sh:NodeShape ; sh:targetClass ex:D ;
1146                sh:property [ sh:path ex:q ; sh:minCount 1 ] .
1147            ex:good a ex:C ; ex:p ex:y .
1148            ex:bad  a ex:C .
1149            ex:other a ex:D .
1150            "
1151        );
1152        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
1153        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
1154        let schema = &parsed.schema;
1155
1156        let s = shape_id_for_iri(schema, "http://ex/S").expect("ex:S is named");
1157        assert!(shape_id_for_iri(schema, "http://ex/missing").is_none());
1158
1159        // Failures: just ex:bad, never the ex:T violation on ex:other.
1160        let fails = witness_shape(&loaded.graph, &loaded.graph, schema, s).expect("stratifiable");
1161        assert_eq!(fails.len(), 1);
1162        assert_eq!(fails[0].focus.to_string(), "<http://ex/bad>");
1163
1164        // Satisfactions: just ex:good, with the matched value recorded.
1165        let sats = satisfy_shape(&loaded.graph, &loaded.graph, schema, s).expect("stratifiable");
1166        assert_eq!(sats.len(), 1);
1167        assert_eq!(sats[0].focus.to_string(), "<http://ex/good>");
1168        // ex:good holds because the ex:p count is met by ex:y.
1169        assert!(any_sat(&sats[0].trace, &|t| matches!(
1170            t,
1171            SatTrace::CountHeld { matches, .. } if matches.iter().any(|(v, _)| v.to_string() == "<http://ex/y>")
1172        )));
1173    }
1174
1175    /// Does any node in the satisfaction trace satisfy `pred`?
1176    fn any_sat(t: &SatTrace, pred: &impl Fn(&SatTrace) -> bool) -> bool {
1177        if pred(t) {
1178            return true;
1179        }
1180        match t {
1181            SatTrace::AllHeld { children, .. } => children.iter().any(|c| any_sat(c, pred)),
1182            SatTrace::AnyHeld { satisfied, .. } => satisfied.iter().any(|c| any_sat(c, pred)),
1183            SatTrace::CountHeld { matches, .. } => matches.iter().any(|(_, c)| any_sat(c, pred)),
1184            _ => false,
1185        }
1186    }
1187}