Skip to main content

shifty_engine/
witness.rs

1//! The witnessing evaluator (`docs/06-repair.md` §5) — the structured, lossless
2//! sibling of [`explain`](crate::validate). For a focus node that fails a
3//! statement it returns a [`Witness`]: the failed sub-DAG of `φ`, pruned to
4//! exactly what did not hold, with the structural gap at each node. Its dual,
5//! [`SatTrace`], records *why* a shape currently holds, so a `Not(φ)` failure can
6//! be repaired by breaking `φ`. The two are mutually recursive through `Not`.
7//!
8//! This is the input to repair synthesis; it makes no repair decisions. It reuses
9//! the [`ShapeEvaluator`] satisfaction oracle (`holds`) and its gfp back-edge
10//! guard verbatim, so witnessing agrees with validation by construction.
11
12use crate::frozen::FrozenIndexedDataset;
13use crate::path::{PathBackend, node_of, succ};
14use crate::sparql::SparqlExecutor;
15use crate::validate::{NonStratifiable, ShapeEvaluator, focus_nodes_with, uses_shapes_graph};
16use oxrdf::{Graph, NamedNode, Term, Triple};
17use serde::{Deserialize, Serialize};
18use shifty_algebra::{Path, Schema, Shape, ShapeArena, ShapeId};
19use shifty_opt::analyze;
20use std::collections::{BTreeSet, HashSet, VecDeque};
21
22/// Why one focus node failed one statement: the failed sub-structure of `φ`,
23/// pruned to exactly the parts that did not hold. The input to repair synthesis.
24#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
25pub struct FocusWitness {
26    pub focus: Term,
27    /// Index of the violated `(selector, shape)` statement in the schema.
28    pub statement: usize,
29    pub failure: Witness,
30}
31
32/// The relational (pairwise) leaf constraints — distinct from value-type atoms.
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
34pub enum RelKind {
35    Eq,
36    Disj,
37    Lt,
38    Le,
39    UniqueLang,
40}
41
42/// The existing triples that make a node a `π`-successor of its parent: the
43/// edges a deletion would cut. The deletion-side dual of path materialization.
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
45pub enum PathSupport {
46    /// Reflexive (`Id`): the node reached itself; nothing to cut.
47    Empty,
48    /// A single triple edge whose removal breaks this step.
49    Edge(Triple),
50    /// A chain (`Seq`/`Star` expansion): every edge present; cut any one to break.
51    Chain(Vec<PathSupport>),
52    /// Parallel witnessing chains (`Alt`): cut all to break.
53    Alt(Vec<PathSupport>),
54}
55
56/// The failed sub-structure of `φ` (additive direction: what to *add*).
57#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
58pub enum Witness {
59    /// A value-type leaf failed at `node` (TestConst / TestType / TestKind).
60    /// `produced_by` names the triples that made `node` a value (`Some` for a
61    /// value-scoped atom, `None` for one on the focus itself).
62    Atom {
63        shape: ShapeId,
64        node: Term,
65        reached_by: Path,
66        produced_by: Option<PathSupport>,
67    },
68    /// A relational leaf failed (Eq / Disj / Lt / Le / UniqueLang): `lhs`/`rhs`
69    /// carry the two compared value-sets with their support, `offending` the
70    /// witnessing pairs/members.
71    Relational {
72        shape: ShapeId,
73        node: Term,
74        kind: RelKind,
75        lhs: Vec<(Term, PathSupport)>,
76        rhs: Vec<(Term, PathSupport)>,
77        offending: Vec<(Term, Term)>,
78    },
79    /// `closed(Q)` failed: these (predicate, object) pairs are not allowed.
80    Closed {
81        shape: ShapeId,
82        node: Term,
83        offenders: Vec<(NamedNode, Term)>,
84    },
85    /// `¬φ` failed because `φ` holds at `node` — it must be falsified.
86    Not {
87        shape: ShapeId,
88        node: Term,
89        inner: Box<SatTrace>,
90    },
91    /// Conjunction: every listed child failed and ALL must be repaired.
92    All {
93        shape: ShapeId,
94        node: Term,
95        failed: Vec<Witness>,
96    },
97    /// Disjunction: no branch held; repairing ANY ONE suffices.
98    Any {
99        shape: ShapeId,
100        node: Term,
101        branches: Vec<Witness>,
102    },
103    /// `∃≥min π.q` under-satisfied: `have` values match, `min` required.
104    ///
105    /// `sibling_qualifiers` collects the inner shapes of every `∀π.φ` universal
106    /// (encoded as `∃≤0 π.¬φ`) on this same path that is *conjoined above* this
107    /// count — including ones from sibling property shapes, i.e. a different `And`
108    /// node. Those universals are vacuously satisfied when their path is empty, so
109    /// they never witness as failures themselves, yet any value added to satisfy
110    /// this `CountLow` must also conform to each of them.
111    CountLow {
112        shape: ShapeId,
113        node: Term,
114        path: Path,
115        qualifier: ShapeId,
116        have: u64,
117        min: u64,
118        sibling_qualifiers: Vec<ShapeId>,
119    },
120    /// `∃≤max π.q` over-satisfied. `matched` pairs each counted value with its
121    /// support (so deletion cuts the right edges for `Seq`/`Star` paths).
122    /// `per_value` is populated only for the `∀`-encoding (`∃≤0 π.¬inner`).
123    CountHigh {
124        shape: ShapeId,
125        node: Term,
126        path: Path,
127        qualifier: ShapeId,
128        matched: Vec<(Term, PathSupport)>,
129        max: u64,
130        per_value: Vec<(Term, Witness)>,
131    },
132    /// Opaque SPARQL — no algebraic witness.
133    Opaque { shape: ShapeId, node: Term },
134}
135
136/// Why `φ` currently *holds* at a node (deletive direction: what to *delete*).
137/// The dual of [`Witness`].
138#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
139pub enum SatTrace {
140    /// `⊤` — vacuously true; no graph edit falsifies it.
141    Irrefutable { shape: ShapeId },
142    /// A value-type leaf holds at `node`; `produced_by` names the edges to cut.
143    Atom {
144        shape: ShapeId,
145        node: Term,
146        reached_by: Path,
147        produced_by: PathSupport,
148    },
149    /// Conjunction holds because ALL children hold ⟹ break ANY ONE.
150    AllHeld {
151        shape: ShapeId,
152        node: Term,
153        children: Vec<SatTrace>,
154    },
155    /// Disjunction holds because these branches hold ⟹ break EVERY one.
156    AnyHeld {
157        shape: ShapeId,
158        node: Term,
159        satisfied: Vec<SatTrace>,
160    },
161    /// `∃[min..max] π.q` holds. `matches` carries each counted value with its
162    /// q-support.
163    CountHeld {
164        shape: ShapeId,
165        node: Term,
166        path: Path,
167        qualifier: ShapeId,
168        matches: Vec<(Term, SatTrace)>,
169        min: Option<u64>,
170        max: Option<u64>,
171    },
172    /// `¬φ` holds because `φ` fails ⟹ make `φ` hold. Flips to the additive side.
173    NotHeld {
174        shape: ShapeId,
175        node: Term,
176        inner_fails: Box<Witness>,
177    },
178    /// Holds but cannot be falsified by data deletion in scope (closed / relational
179    /// / opaque SPARQL).
180    Blocked {
181        shape: ShapeId,
182        node: Term,
183        reason: BlockReason,
184    },
185    /// Support reached only through a gfp back-edge: coinductively assumed true,
186    /// with no finite set of facts to delete.
187    Coinductive { shape: ShapeId, node: Term },
188}
189
190/// Why a holding shape admits no data-deletion repair.
191#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
192pub enum BlockReason {
193    OpaqueSparql,
194    /// Falsifying `closed(Q)` would need *adding* a disallowed predicate.
195    ClosedNeedsAdd,
196    /// Relational falsification is not synthesized in this cut.
197    Unsupported,
198}
199
200type Stack = HashSet<(ShapeId, Term)>;
201
202/// Strat-check and build the SPARQL executor shared by every witnessing entry
203/// point. The caller derives `backend`/`ShapeEvaluator` from the returned
204/// executor — those borrow it, so they can't be bundled in here.
205fn prepare(data: &Graph, schema: &Schema) -> Result<SparqlExecutor, NonStratifiable> {
206    let strat = analyze(&schema.arena);
207    if !strat.stratifiable {
208        let components = strat
209            .strata
210            .iter()
211            .filter(|s| !s.stratifiable)
212            .map(|s| s.shapes.clone())
213            .collect();
214        return Err(NonStratifiable { components });
215    }
216
217    let uses_shapes = uses_shapes_graph(&schema.arena);
218    let frozen = if uses_shapes {
219        FrozenIndexedDataset::from_graphs(data, data)
220    } else {
221        FrozenIndexedDataset::from_graph(data)
222    };
223    Ok(SparqlExecutor::from_frozen(frozen, uses_shapes))
224}
225
226/// Witness every `(focus, statement)` that fails, mirroring `validate`'s driver.
227pub fn witness_violations(
228    data: &Graph,
229    schema: &Schema,
230) -> Result<Vec<FocusWitness>, NonStratifiable> {
231    let sparql = prepare(data, schema)?;
232    let backend = sparql
233        .frozen()
234        .expect("witness executor always has a frozen dataset");
235    let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
236
237    let mut out = Vec::new();
238    for (i, st) in schema.statements.iter().enumerate() {
239        for v in focus_nodes_with(data, backend, &st.selector, &schema.arena, &sparql) {
240            let mut stack = Stack::new();
241            if let Some(failure) = witness(
242                &mut evaluator,
243                &v,
244                st.shape,
245                &Path::Id,
246                None,
247                &[],
248                &mut stack,
249            ) {
250                out.push(FocusWitness {
251                    focus: v,
252                    statement: i,
253                    failure,
254                });
255            }
256        }
257    }
258    Ok(out)
259}
260
261/// The arena slot a named shape IRI refers to, if the schema names one. `iri` is
262/// matched bare (no angle brackets), the form stored in [`Schema::names`].
263pub fn shape_id_for_iri(schema: &Schema, iri: &str) -> Option<ShapeId> {
264    schema
265        .names
266        .iter()
267        .find_map(|(id, name)| (name == iri).then_some(*id))
268}
269
270/// Witness only the `(focus, statement)` violations whose statement targets
271/// `shape` — the shape-scoped sibling of [`witness_violations`]. Returns the
272/// *failing* foci with their [`Witness`] trees; passing foci are the domain of
273/// [`satisfy_shape`]. Use [`shape_id_for_iri`] to resolve an IRI to its
274/// `ShapeId`.
275pub fn witness_shape(
276    data: &Graph,
277    schema: &Schema,
278    shape: ShapeId,
279) -> Result<Vec<FocusWitness>, NonStratifiable> {
280    let sparql = prepare(data, schema)?;
281    let backend = sparql
282        .frozen()
283        .expect("witness executor always has a frozen dataset");
284    let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
285
286    let mut out = Vec::new();
287    for (i, st) in schema.statements.iter().enumerate() {
288        if st.shape != shape {
289            continue;
290        }
291        for v in focus_nodes_with(data, backend, &st.selector, &schema.arena, &sparql) {
292            let mut stack = Stack::new();
293            if let Some(failure) = witness(
294                &mut evaluator,
295                &v,
296                st.shape,
297                &Path::Id,
298                None,
299                &[],
300                &mut stack,
301            ) {
302                out.push(FocusWitness {
303                    focus: v,
304                    statement: i,
305                    failure,
306                });
307            }
308        }
309    }
310    Ok(out)
311}
312
313/// Why one focus node *satisfies* one statement: the [`SatTrace`] recording why
314/// `φ` holds, including the values matched along each checked path. The
315/// satisfaction-side dual of [`FocusWitness`].
316#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
317pub struct FocusSat {
318    pub focus: Term,
319    /// Index of the satisfied `(selector, shape)` statement in the schema.
320    pub statement: usize,
321    pub trace: SatTrace,
322}
323
324/// Trace every `(focus, statement)` that *holds* for a statement targeting
325/// `shape` — the dual of [`witness_shape`]. Each [`FocusSat`] carries why the
326/// node conforms, down to the values matched along every checked path (see
327/// [`SatTrace`]). Use [`shape_id_for_iri`] to resolve an IRI to its `ShapeId`.
328pub fn satisfy_shape(
329    data: &Graph,
330    schema: &Schema,
331    shape: ShapeId,
332) -> Result<Vec<FocusSat>, NonStratifiable> {
333    let sparql = prepare(data, schema)?;
334    let backend = sparql
335        .frozen()
336        .expect("witness executor always has a frozen dataset");
337    let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
338
339    let mut out = Vec::new();
340    for (i, st) in schema.statements.iter().enumerate() {
341        if st.shape != shape {
342            continue;
343        }
344        for v in focus_nodes_with(data, backend, &st.selector, &schema.arena, &sparql) {
345            let mut stack = Stack::new();
346            if let Some(trace) =
347                sat_trace(&mut evaluator, &v, st.shape, &Path::Id, None, &mut stack)
348            {
349                out.push(FocusSat {
350                    focus: v,
351                    statement: i,
352                    trace,
353                });
354            }
355        }
356    }
357    Ok(out)
358}
359
360/// Witness one specific `node` against one specific `shape` (by id) — the building
361/// block for repairing a `ConformsTo` hole: bind it to a node, then witness that
362/// node against the sub-shape and synthesize its repair. Returns `Ok(None)` when
363/// the node already conforms (nothing to build), `Ok(Some(_))` otherwise.
364///
365/// The `statement` field of the returned [`FocusWitness`] is a sentinel
366/// (`usize::MAX`): this is not a top-level statement, and synthesis ignores it.
367pub fn witness_node(
368    data: &Graph,
369    schema: &Schema,
370    node: &Term,
371    shape: ShapeId,
372) -> Result<Option<FocusWitness>, NonStratifiable> {
373    let sparql = prepare(data, schema)?;
374    let backend = sparql
375        .frozen()
376        .expect("witness executor always has a frozen dataset");
377    let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
378
379    let mut stack = Stack::new();
380    Ok(witness(
381        &mut evaluator,
382        node,
383        shape,
384        &Path::Id,
385        None,
386        &[],
387        &mut stack,
388    )
389    .map(|failure| FocusWitness {
390        focus: node.clone(),
391        statement: usize::MAX,
392        failure,
393    }))
394}
395
396/// Reasons `φ` (slot `id`) fails at `node`. `None` ⟺ it holds (incl. on a gfp
397/// back-edge). `reached_by` is the structured path from the focus; `produced_by`
398/// is how `node` was reached from its parent value (for replace-in-place).
399///
400/// `scope` carries the universals `∀π.φ` (encoded `∃≤0 π.¬φ`) conjoined *above*
401/// this point — every `And` ancestor contributes its universal children. They
402/// hold vacuously when their path is empty, so they never witness as failures of
403/// their own; a `CountLow` on the same path attaches them as `sibling_qualifiers`
404/// so a value built to satisfy the count also satisfies them (`∀` and a sibling
405/// count may live in *different* property shapes, hence different `And` nodes).
406fn witness(
407    eval: &mut ShapeEvaluator<'_>,
408    node: &Term,
409    id: ShapeId,
410    reached_by: &Path,
411    produced_by: Option<&PathSupport>,
412    scope: &[(Path, ShapeId)],
413    stack: &mut Stack,
414) -> Option<Witness> {
415    let key = (id, node.clone());
416    if !stack.insert(key.clone()) {
417        return None; // back-edge ⇒ assume conforming (gfp)
418    }
419    if eval.holds(node, id) {
420        stack.remove(&key);
421        return None; // holds ⇒ nothing to repair
422    }
423    let shape = eval.arena().get(id).clone();
424    let result = match shape {
425        Shape::Top | Shape::Pending => None,
426        // `sh:severity` is transparent to witnessing: repair the wrapped shape.
427        Shape::Annotated { shape, .. } => {
428            let inner = witness(eval, node, shape, reached_by, produced_by, scope, stack);
429            stack.remove(&key);
430            return inner;
431        }
432        Shape::TestConst(_) | Shape::TestType(_) | Shape::TestKind(_) => Some(Witness::Atom {
433            shape: id,
434            node: node.clone(),
435            reached_by: reached_by.clone(),
436            produced_by: produced_by.cloned(),
437        }),
438        Shape::Eq(..) | Shape::Disj(..) | Shape::Lt(..) | Shape::Le(..) | Shape::UniqueLang(_) => {
439            Some(relational_witness(eval, node, id, &shape))
440        }
441        Shape::Closed(ref q) => {
442            let offenders = closed_offenders(eval.backend(), node, q);
443            (!offenders.is_empty()).then(|| Witness::Closed {
444                shape: id,
445                node: node.clone(),
446                offenders,
447            })
448        }
449        Shape::Not(c) => {
450            // Crossing to the deletive side: outer universals don't constrain the
451            // value rebuild, so the inner witness starts from an empty scope.
452            sat_trace(eval, node, c, reached_by, produced_by, stack).map(|t| Witness::Not {
453                shape: id,
454                node: node.clone(),
455                inner: Box::new(t),
456            })
457        }
458        Shape::And(cs) => {
459            // This `And`'s universals join the scope its children witness under, so
460            // a count in one conjunct sees the `∀π.φ` siblings in the others.
461            let mut child_scope: Vec<(Path, ShapeId)> = scope.to_vec();
462            child_scope.extend(cs.iter().filter_map(|&c| as_universal(eval.arena(), c)));
463            let failed: Vec<Witness> = cs
464                .iter()
465                .filter_map(|c| {
466                    witness(eval, node, *c, reached_by, produced_by, &child_scope, stack)
467                })
468                .collect();
469            (!failed.is_empty()).then(|| Witness::All {
470                shape: id,
471                node: node.clone(),
472                failed,
473            })
474        }
475        Shape::Or(cs) => {
476            // Or failed ⟹ every disjunct failed. A disjunction does not conjoin its
477            // branches, so the scope passes through unchanged (no branch's
478            // universals constrain another's).
479            let branches: Vec<Witness> = cs
480                .iter()
481                .filter_map(|c| witness(eval, node, *c, reached_by, produced_by, scope, stack))
482                .collect();
483            (!branches.is_empty()).then(|| Witness::Any {
484                shape: id,
485                node: node.clone(),
486                branches,
487            })
488        }
489        Shape::Count {
490            path,
491            min,
492            max,
493            qualifier,
494        } => count_witness(
495            eval, node, id, &path, min, max, qualifier, reached_by, scope, stack,
496        ),
497        Shape::Sparql(_) => Some(Witness::Opaque {
498            shape: id,
499            node: node.clone(),
500        }),
501    };
502    stack.remove(&key);
503    result
504}
505
506/// Peel transparent `sh:severity` (`Annotated`) wrappers to the shape beneath.
507fn peel_annotated(arena: &ShapeArena, mut id: ShapeId) -> ShapeId {
508    while let Shape::Annotated { shape, .. } = arena.get(id) {
509        id = *shape;
510    }
511    id
512}
513
514/// If `id` is a universal `∀π.φ` (encoded `∃≤0 π.¬φ`, modulo `Annotated`
515/// wrappers), its `(π, φ)`. `None` for any other shape. This is what an `And`
516/// contributes to the scope its children witness under: such universals hold
517/// vacuously on an empty path, yet any value later added there must satisfy `φ`.
518fn as_universal(arena: &ShapeArena, id: ShapeId) -> Option<(Path, ShapeId)> {
519    let Shape::Count {
520        path,
521        max: Some(0),
522        qualifier,
523        ..
524    } = arena.get(peel_annotated(arena, id)).clone()
525    else {
526        return None;
527    };
528    match arena.get(peel_annotated(arena, qualifier)) {
529        Shape::Not(inner) => Some((path, *inner)),
530        _ => None,
531    }
532}
533
534#[allow(clippy::too_many_arguments)]
535fn count_witness(
536    eval: &mut ShapeEvaluator<'_>,
537    node: &Term,
538    id: ShapeId,
539    path: &Path,
540    min: Option<u64>,
541    max: Option<u64>,
542    qualifier: ShapeId,
543    reached_by: &Path,
544    scope: &[(Path, ShapeId)],
545    stack: &mut Stack,
546) -> Option<Witness> {
547    let values: Vec<Term> = succ(eval.backend(), node, path).into_iter().collect();
548    let matched: Vec<Term> = values
549        .into_iter()
550        .filter(|u| eval.holds(u, qualifier))
551        .collect();
552    let n = matched.len() as u64;
553
554    if let Some(m) = min
555        && n < m
556    {
557        // Universals conjoined on this same path apply to every value we add.
558        let sibling_qualifiers = scope
559            .iter()
560            .filter(|(p, _)| p == path)
561            .map(|(_, inner)| *inner)
562            .collect();
563        return Some(Witness::CountLow {
564            shape: id,
565            node: node.clone(),
566            path: path.clone(),
567            qualifier,
568            have: n,
569            min: m,
570            sibling_qualifiers,
571        });
572    }
573    if let Some(m) = max
574        && n > m
575    {
576        let matched_sup: Vec<(Term, PathSupport)> = matched
577            .iter()
578            .map(|u| {
579                (
580                    u.clone(),
581                    path_support(eval.backend(), node, path, u).unwrap_or(PathSupport::Empty),
582                )
583            })
584            .collect();
585        // ∀-encoding `∃≤0 π.¬inner`: drill into each offending value.
586        let per_value = if m == 0 {
587            if let Shape::Not(inner) = eval.arena().get(qualifier).clone() {
588                let reached = Path::seq(vec![reached_by.clone(), path.clone()]);
589                let mut pv = Vec::new();
590                for u in &matched {
591                    let ps = path_support(eval.backend(), node, path, u);
592                    // Replace-in-place rebuilds `u` against `inner` from a fresh
593                    // scope; sibling universals for this dual are future work.
594                    if let Some(w) = witness(eval, u, inner, &reached, ps.as_ref(), &[], stack) {
595                        pv.push((u.clone(), w));
596                    }
597                }
598                pv
599            } else {
600                Vec::new()
601            }
602        } else {
603            Vec::new()
604        };
605        return Some(Witness::CountHigh {
606            shape: id,
607            node: node.clone(),
608            path: path.clone(),
609            qualifier,
610            matched: matched_sup,
611            max: m,
612            per_value,
613        });
614    }
615    None
616}
617
618fn relational_witness(
619    eval: &ShapeEvaluator<'_>,
620    node: &Term,
621    id: ShapeId,
622    shape: &Shape,
623) -> Witness {
624    let g = eval.backend();
625    let (kind, lpath, rpred) = match shape {
626        Shape::Eq(p, q) => (RelKind::Eq, p.clone(), Some(q.clone())),
627        Shape::Disj(p, q) => (RelKind::Disj, p.clone(), Some(q.clone())),
628        Shape::Lt(p, q) => (RelKind::Lt, p.clone(), Some(q.clone())),
629        Shape::Le(p, q) => (RelKind::Le, p.clone(), Some(q.clone())),
630        Shape::UniqueLang(p) => (RelKind::UniqueLang, p.clone(), None),
631        _ => unreachable!("relational_witness on non-relational shape"),
632    };
633    let with_support = |g: &dyn PathBackend, p: &Path| -> Vec<(Term, PathSupport)> {
634        succ(g, node, p)
635            .into_iter()
636            .map(|v| {
637                let s = path_support(g, node, p, &v).unwrap_or(PathSupport::Empty);
638                (v, s)
639            })
640            .collect()
641    };
642    let lhs = with_support(g, &lpath);
643    let rhs = match &rpred {
644        Some(q) => with_support(g, &Path::Pred(q.clone())),
645        None => Vec::new(),
646    };
647    let offending = offending_pairs(kind, &lhs, &rhs);
648    Witness::Relational {
649        shape: id,
650        node: node.clone(),
651        kind,
652        lhs,
653        rhs,
654        offending,
655    }
656}
657
658/// The witnessing pairs/members for a failed relational constraint.
659fn offending_pairs(
660    kind: RelKind,
661    lhs: &[(Term, PathSupport)],
662    rhs: &[(Term, PathSupport)],
663) -> Vec<(Term, Term)> {
664    let lvals: Vec<&Term> = lhs.iter().map(|(v, _)| v).collect();
665    let rvals: Vec<&Term> = rhs.iter().map(|(v, _)| v).collect();
666    match kind {
667        RelKind::Eq => lvals
668            .iter()
669            .filter(|v| !rvals.contains(v))
670            .chain(rvals.iter().filter(|v| !lvals.contains(v)))
671            .map(|v| ((*v).clone(), (*v).clone()))
672            .collect(),
673        RelKind::Disj => lvals
674            .iter()
675            .filter(|v| rvals.contains(v))
676            .map(|v| ((*v).clone(), (*v).clone()))
677            .collect(),
678        RelKind::Lt | RelKind::Le => {
679            // pairs (a, b) that fail a < b (resp. a ≤ b)
680            let mut bad = Vec::new();
681            for a in &lvals {
682                for b in &rvals {
683                    let ok = match crate::value::compare_terms(a, b) {
684                        Some(std::cmp::Ordering::Less) => true,
685                        Some(std::cmp::Ordering::Equal) => kind == RelKind::Le,
686                        _ => false,
687                    };
688                    if !ok {
689                        bad.push(((*a).clone(), (*b).clone()));
690                    }
691                }
692            }
693            bad
694        }
695        RelKind::UniqueLang => {
696            // pairs of lhs values sharing a language tag
697            let mut bad = Vec::new();
698            for i in 0..lvals.len() {
699                for j in (i + 1)..lvals.len() {
700                    if let (Term::Literal(a), Term::Literal(b)) = (lvals[i], lvals[j])
701                        && let (Some(la), Some(lb)) = (a.language(), b.language())
702                        && la.eq_ignore_ascii_case(lb)
703                    {
704                        bad.push((lvals[i].clone(), lvals[j].clone()));
705                    }
706                }
707            }
708            bad
709        }
710    }
711}
712
713/// Support for why `φ` holds at `node`. `None` ⟺ it fails. The dual of [`witness`].
714fn sat_trace(
715    eval: &mut ShapeEvaluator<'_>,
716    node: &Term,
717    id: ShapeId,
718    reached_by: &Path,
719    produced_by: Option<&PathSupport>,
720    stack: &mut Stack,
721) -> Option<SatTrace> {
722    let key = (id, node.clone());
723    if !stack.insert(key.clone()) {
724        // back-edge: coinductively assumed true, no grounded support to delete.
725        return Some(SatTrace::Coinductive {
726            shape: id,
727            node: node.clone(),
728        });
729    }
730    if !eval.holds(node, id) {
731        stack.remove(&key);
732        return None; // fails ⇒ no satisfaction to break
733    }
734    let shape = eval.arena().get(id).clone();
735    let result = match shape {
736        Shape::Top | Shape::Pending => Some(SatTrace::Irrefutable { shape: id }),
737        // `sh:severity` is transparent: break the satisfaction of the wrapped shape.
738        Shape::Annotated { shape, .. } => {
739            let inner = sat_trace(eval, node, shape, reached_by, produced_by, stack);
740            stack.remove(&key);
741            return inner;
742        }
743        Shape::TestConst(_) | Shape::TestType(_) | Shape::TestKind(_) => Some(SatTrace::Atom {
744            shape: id,
745            node: node.clone(),
746            reached_by: reached_by.clone(),
747            produced_by: produced_by.cloned().unwrap_or(PathSupport::Empty),
748        }),
749        Shape::Eq(..) | Shape::Disj(..) | Shape::Lt(..) | Shape::Le(..) | Shape::UniqueLang(_) => {
750            Some(SatTrace::Blocked {
751                shape: id,
752                node: node.clone(),
753                reason: BlockReason::Unsupported,
754            })
755        }
756        Shape::Closed(_) => Some(SatTrace::Blocked {
757            shape: id,
758            node: node.clone(),
759            reason: BlockReason::ClosedNeedsAdd,
760        }),
761        Shape::Sparql(_) => Some(SatTrace::Blocked {
762            shape: id,
763            node: node.clone(),
764            reason: BlockReason::OpaqueSparql,
765        }),
766        Shape::Not(c) => {
767            witness(eval, node, c, reached_by, produced_by, &[], stack).map(|w| SatTrace::NotHeld {
768                shape: id,
769                node: node.clone(),
770                inner_fails: Box::new(w),
771            })
772        }
773        Shape::And(cs) => {
774            // holds ⟹ all children hold.
775            let children: Vec<SatTrace> = cs
776                .iter()
777                .filter_map(|c| sat_trace(eval, node, *c, reached_by, produced_by, stack))
778                .collect();
779            Some(SatTrace::AllHeld {
780                shape: id,
781                node: node.clone(),
782                children,
783            })
784        }
785        Shape::Or(cs) => {
786            let satisfied: Vec<SatTrace> = cs
787                .iter()
788                .filter_map(|c| sat_trace(eval, node, *c, reached_by, produced_by, stack))
789                .collect();
790            Some(SatTrace::AnyHeld {
791                shape: id,
792                node: node.clone(),
793                satisfied,
794            })
795        }
796        Shape::Count {
797            path,
798            min,
799            max,
800            qualifier,
801        } => {
802            let values: Vec<Term> = succ(eval.backend(), node, &path).into_iter().collect();
803            let reached = Path::seq(vec![reached_by.clone(), path.clone()]);
804            let mut matches = Vec::new();
805            for u in values {
806                if eval.holds(&u, qualifier) {
807                    let ps = path_support(eval.backend(), node, &path, &u);
808                    if let Some(t) = sat_trace(eval, &u, qualifier, &reached, ps.as_ref(), stack) {
809                        matches.push((u, t));
810                    }
811                }
812            }
813            Some(SatTrace::CountHeld {
814                shape: id,
815                node: node.clone(),
816                path: path.clone(),
817                qualifier,
818                matches,
819                min,
820                max,
821            })
822        }
823    };
824    stack.remove(&key);
825    result
826}
827
828/// Predicates+objects on `node` not allowed by a closed shape's set `q`.
829fn closed_offenders(
830    g: &dyn PathBackend,
831    node: &Term,
832    q: &BTreeSet<NamedNode>,
833) -> Vec<(NamedNode, Term)> {
834    let allowed: HashSet<&NamedNode> = q.iter().collect();
835    let mut out = Vec::new();
836    for p in g.out_predicates(node) {
837        if !allowed.contains(&p) {
838            for o in g.objects(node, &p) {
839                out.push((p.clone(), o));
840            }
841        }
842    }
843    out
844}
845
846/// The existing triples that make `to` a `π`-successor of `from`, if any. The
847/// edges a deletion would cut to remove `to` from the value set.
848fn path_support(g: &dyn PathBackend, from: &Term, path: &Path, to: &Term) -> Option<PathSupport> {
849    match path {
850        Path::Id => (from == to).then_some(PathSupport::Empty),
851        Path::Pred(q) => {
852            if g.objects(from, q).contains(to) {
853                let s = node_of(from)?;
854                Some(PathSupport::Edge(Triple::new(s, q.clone(), to.clone())))
855            } else {
856                None
857            }
858        }
859        Path::Inverse(p) => path_support(g, to, p, from),
860        Path::Alt(ps) => ps.iter().find_map(|p| path_support(g, from, p, to)),
861        Path::Seq(ps) => seq_support(g, from, ps, to),
862        Path::Star(p) => star_support(g, from, p, to),
863    }
864}
865
866fn seq_support(g: &dyn PathBackend, from: &Term, ps: &[Path], to: &Term) -> Option<PathSupport> {
867    let Some((first, rest)) = ps.split_first() else {
868        return (from == to).then_some(PathSupport::Empty);
869    };
870    for mid in succ(g, from, first) {
871        let Some(head) = path_support(g, from, first, &mid) else {
872            continue;
873        };
874        if let Some(tail) = seq_support(g, &mid, rest, to) {
875            let mut chain = vec![head];
876            match tail {
877                PathSupport::Empty => {}
878                PathSupport::Chain(v) => chain.extend(v),
879                other => chain.push(other),
880            }
881            return Some(PathSupport::Chain(chain));
882        }
883    }
884    None
885}
886
887fn star_support(g: &dyn PathBackend, from: &Term, p: &Path, to: &Term) -> Option<PathSupport> {
888    if from == to {
889        return Some(PathSupport::Empty);
890    }
891    let mut visited: HashSet<Term> = HashSet::from([from.clone()]);
892    let mut queue: VecDeque<(Term, Vec<PathSupport>)> =
893        VecDeque::from([(from.clone(), Vec::new())]);
894    while let Some((cur, chain)) = queue.pop_front() {
895        for next in succ(g, &cur, p) {
896            let Some(edge) = path_support(g, &cur, p, &next) else {
897                continue;
898            };
899            let mut chain2 = chain.clone();
900            chain2.push(edge);
901            if next == *to {
902                return Some(PathSupport::Chain(chain2));
903            }
904            if visited.insert(next.clone()) {
905                queue.push_back((next, chain2));
906            }
907        }
908    }
909    None
910}
911
912#[cfg(test)]
913mod tests {
914    use super::*;
915    use shifty_parse::{load_turtle, parse_turtle};
916
917    const PREFIXES: &str = r#"
918        @prefix sh:  <http://www.w3.org/ns/shacl#> .
919        @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
920        @prefix ex:  <http://ex/> .
921        @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
922    "#;
923
924    fn run(ttl: &str) -> Vec<FocusWitness> {
925        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
926        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
927        witness_violations(&loaded.graph, &parsed.schema).expect("stratifiable")
928    }
929
930    /// Does any node in the witness tree satisfy `pred`?
931    fn any(w: &Witness, pred: &impl Fn(&Witness) -> bool) -> bool {
932        if pred(w) {
933            return true;
934        }
935        match w {
936            Witness::All { failed, .. } => failed.iter().any(|c| any(c, pred)),
937            Witness::Any { branches, .. } => branches.iter().any(|c| any(c, pred)),
938            Witness::CountHigh { per_value, .. } => per_value.iter().any(|(_, c)| any(c, pred)),
939            _ => false,
940        }
941    }
942
943    #[test]
944    fn conforming_graph_yields_no_witnesses() {
945        let ttl = format!(
946            "{PREFIXES}
947            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
948                sh:property [ sh:path ex:p ; sh:minCount 1 ] .
949            ex:x ex:p ex:y .
950            "
951        );
952        assert!(run(&ttl).is_empty());
953    }
954
955    #[test]
956    fn min_count_violation_is_count_low() {
957        let ttl = format!(
958            "{PREFIXES}
959            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
960                sh:property [ sh:path ex:p ; sh:minCount 2 ] .
961            ex:x ex:p ex:y .
962            "
963        );
964        let ws = run(&ttl);
965        assert_eq!(ws.len(), 1);
966        assert_eq!(ws[0].focus.to_string(), "<http://ex/x>");
967        assert!(any(&ws[0].failure, &|w| matches!(
968            w,
969            Witness::CountLow {
970                have: 1,
971                min: 2,
972                ..
973            }
974        )));
975    }
976
977    /// Every `CountLow`'s `sibling_qualifiers`, in pre-order.
978    fn count_low_siblings(w: &Witness) -> Vec<Vec<ShapeId>> {
979        fn go(w: &Witness, out: &mut Vec<Vec<ShapeId>>) {
980            match w {
981                Witness::CountLow {
982                    sibling_qualifiers, ..
983                } => out.push(sibling_qualifiers.clone()),
984                Witness::All { failed, .. } => failed.iter().for_each(|c| go(c, out)),
985                Witness::Any { branches, .. } => branches.iter().for_each(|c| go(c, out)),
986                Witness::CountHigh { per_value, .. } => {
987                    per_value.iter().for_each(|(_, c)| go(c, out))
988                }
989                _ => {}
990            }
991        }
992        let mut out = Vec::new();
993        go(w, &mut out);
994        out
995    }
996
997    #[test]
998    fn cross_property_universals_attach_to_count_low() {
999        // The min-count and the `sh:class` live in *separate* property shapes on the
1000        // same path — different `And` conjuncts. The class universal holds vacuously
1001        // (no values), so it never witnesses as a failure, yet a value added for the
1002        // count must still satisfy it: it must reach the `CountLow` across `And`s.
1003        let ttl = format!(
1004            "{PREFIXES}
1005            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
1006                sh:property [ sh:path ex:p ; sh:minCount 1 ] ;
1007                sh:property [ sh:path ex:p ; sh:class ex:C ] .
1008            ex:x a ex:Thing .
1009            "
1010        );
1011        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
1012        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
1013        let ws = witness_violations(&loaded.graph, &parsed.schema).expect("stratifiable");
1014        assert_eq!(ws.len(), 1);
1015        let sibs = count_low_siblings(&ws[0].failure);
1016        assert_eq!(sibs.len(), 1, "one CountLow");
1017        assert_eq!(sibs[0].len(), 1, "the class universal is attached");
1018        assert_eq!(
1019            class_of(sibs[0][0], &parsed.schema.arena),
1020            Some("http://ex/C".to_string()),
1021        );
1022    }
1023
1024    #[test]
1025    fn disjoint_or_branch_universal_does_not_attach() {
1026        // The `sh:class` sits in a *different* `sh:or` branch than the count, so the
1027        // two are not conjoined — a value satisfying the count branch need not be a
1028        // class. Scope must not cross the disjunction: the CountLow carries no
1029        // sibling. (Both branches fail here, so the Or genuinely witnesses.)
1030        let ttl = format!(
1031            "{PREFIXES}
1032            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
1033                sh:or (
1034                    [ sh:path ex:p ; sh:minCount 2 ]
1035                    [ sh:path ex:p ; sh:class ex:C ]
1036                ) .
1037            ex:x a ex:Thing ; ex:p ex:y .
1038            "
1039        );
1040        let ws = run(&ttl);
1041        assert_eq!(ws.len(), 1);
1042        let sibs = count_low_siblings(&ws[0].failure);
1043        assert!(!sibs.is_empty(), "the count branch yields a CountLow");
1044        assert!(
1045            sibs.iter().all(|s| s.is_empty()),
1046            "no universal leaks across the disjunction: {sibs:?}",
1047        );
1048    }
1049
1050    /// The class IRI of a `∃≥1 (rdf:type/subClassOf*).test(C)` shape, as a string.
1051    fn class_of(id: ShapeId, arena: &ShapeArena) -> Option<String> {
1052        match shifty_algebra::render::class_target_shape(id, arena)? {
1053            Term::NamedNode(n) => Some(n.as_str().to_string()),
1054            _ => None,
1055        }
1056    }
1057
1058    #[test]
1059    fn datatype_violation_is_an_atom_with_support() {
1060        let ttl = format!(
1061            "{PREFIXES}
1062            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
1063                sh:property [ sh:path ex:p ; sh:datatype xsd:integer ] .
1064            ex:x ex:p \"hello\" .
1065            "
1066        );
1067        let ws = run(&ttl);
1068        assert_eq!(ws.len(), 1);
1069        // The bad value is reached via ex:p, so its atom carries a cut edge.
1070        assert!(any(&ws[0].failure, &|w| matches!(
1071            w,
1072            Witness::Atom {
1073                produced_by: Some(PathSupport::Edge(_)),
1074                ..
1075            }
1076        )));
1077    }
1078
1079    #[test]
1080    fn focus_level_nodekind_atom_has_no_support() {
1081        let ttl = format!(
1082            "{PREFIXES}
1083            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
1084                sh:nodeKind sh:IRI .
1085            ex:x ex:p \"v\" .
1086            ex:y ex:q ex:x .
1087            "
1088        );
1089        // ex:x is an IRI so it conforms; use a literal-targeted shape instead:
1090        let _ = ttl;
1091        let ttl2 = format!(
1092            "{PREFIXES}
1093            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
1094                sh:nodeKind sh:Literal .
1095            ex:x ex:p ex:y .
1096            "
1097        );
1098        let ws = run(&ttl2);
1099        assert_eq!(ws.len(), 1);
1100        assert!(matches!(
1101            ws[0].failure,
1102            Witness::Atom {
1103                produced_by: None,
1104                ..
1105            }
1106        ));
1107    }
1108
1109    #[test]
1110    fn non_stratifiable_schema_is_diagnosed() {
1111        let ttl = format!(
1112            "{PREFIXES}
1113            ex:S a sh:NodeShape ; sh:targetNode ex:x ;
1114                sh:not [ sh:path ex:p ; sh:qualifiedValueShape ex:S ; sh:qualifiedMinCount 1 ] .
1115            ex:x ex:p ex:y .
1116            "
1117        );
1118        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
1119        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
1120        assert!(witness_violations(&loaded.graph, &parsed.schema).is_err());
1121    }
1122
1123    #[test]
1124    fn witness_shape_and_satisfy_shape_scope_to_one_shape() {
1125        // Two targeted shapes; one focus fails ex:S, one passes ex:S, and a third
1126        // node fails an unrelated shape ex:T that the ex:S queries must ignore.
1127        let ttl = format!(
1128            "{PREFIXES}
1129            ex:S a sh:NodeShape ; sh:targetClass ex:C ;
1130                sh:property [ sh:path ex:p ; sh:minCount 1 ] .
1131            ex:T a sh:NodeShape ; sh:targetClass ex:D ;
1132                sh:property [ sh:path ex:q ; sh:minCount 1 ] .
1133            ex:good a ex:C ; ex:p ex:y .
1134            ex:bad  a ex:C .
1135            ex:other a ex:D .
1136            "
1137        );
1138        let parsed = parse_turtle(ttl.as_bytes(), None).unwrap();
1139        let loaded = load_turtle(ttl.as_bytes(), None).unwrap();
1140        let schema = &parsed.schema;
1141
1142        let s = shape_id_for_iri(schema, "http://ex/S").expect("ex:S is named");
1143        assert!(shape_id_for_iri(schema, "http://ex/missing").is_none());
1144
1145        // Failures: just ex:bad, never the ex:T violation on ex:other.
1146        let fails = witness_shape(&loaded.graph, schema, s).expect("stratifiable");
1147        assert_eq!(fails.len(), 1);
1148        assert_eq!(fails[0].focus.to_string(), "<http://ex/bad>");
1149
1150        // Satisfactions: just ex:good, with the matched value recorded.
1151        let sats = satisfy_shape(&loaded.graph, schema, s).expect("stratifiable");
1152        assert_eq!(sats.len(), 1);
1153        assert_eq!(sats[0].focus.to_string(), "<http://ex/good>");
1154        // ex:good holds because the ex:p count is met by ex:y.
1155        assert!(any_sat(&sats[0].trace, &|t| matches!(
1156            t,
1157            SatTrace::CountHeld { matches, .. } if matches.iter().any(|(v, _)| v.to_string() == "<http://ex/y>")
1158        )));
1159    }
1160
1161    /// Does any node in the satisfaction trace satisfy `pred`?
1162    fn any_sat(t: &SatTrace, pred: &impl Fn(&SatTrace) -> bool) -> bool {
1163        if pred(t) {
1164            return true;
1165        }
1166        match t {
1167            SatTrace::AllHeld { children, .. } => children.iter().any(|c| any_sat(c, pred)),
1168            SatTrace::AnyHeld { satisfied, .. } => satisfied.iter().any(|c| any_sat(c, pred)),
1169            SatTrace::CountHeld { matches, .. } => matches.iter().any(|(_, c)| any_sat(c, pred)),
1170            _ => false,
1171        }
1172    }
1173}