Skip to main content

shifty_engine/
validate.rs

1//! Reference shape satisfaction `G, v ⊨ φ` and schema validation `G ⊨ S`
2//! (doc 00 §3–§4, Table 2). This is the conformance *oracle*: the optimized
3//! engines in later layers must agree with it.
4//!
5//! Two evaluators share the logic: [`holds`] returns a bare bool (used for
6//! target selection and counting), while [`explain`] returns the specific
7//! atomic constraints that failed, with the value node and path at which they
8//! failed — enough for per-constraint reporting. The `∀π = ∃≤0 π.¬φ` encoding
9//! lets a failed universal drill straight into the offending value node's inner
10//! constraint.
11
12use crate::frozen::FrozenIndexedDataset;
13use crate::path::{PathBackend, node_of, pred, succ};
14use crate::profile::ShapeCacheSample;
15use crate::sparql::{SparqlExecutor, SparqlViolation};
16use crate::value::{compare_terms, value_type_holds};
17use oxrdf::{Graph, NamedNode, Term};
18use regex::Regex;
19use serde::{Deserialize, Serialize};
20use shifty_algebra::render::{path_to_string, shape_to_string};
21use shifty_algebra::{
22    Path, Schema, Selector, Severity, Shape, ShapeArena, ShapeId, SparqlConstraint,
23};
24use shifty_opt::{FocusSource, PhysicalPlan, analyze};
25use std::cmp::Ordering;
26use std::collections::{BTreeSet, HashMap, HashSet};
27use std::fmt;
28use std::sync::OnceLock;
29
30#[derive(Debug, Clone, Copy)]
31struct EvalResult {
32    holds: bool,
33    cacheable: bool,
34}
35
36#[derive(Default)]
37struct EvalState {
38    memo: HashMap<(ShapeId, Term), bool>,
39    active: HashSet<(ShapeId, Term)>,
40    telemetry: Option<ShapeCacheSample>,
41}
42
43/// Per-graph-snapshot shape evaluator.
44///
45/// Completed checks are shared across statements and focus nodes. Results that
46/// depend on the coinductive recursion back-edge are deliberately not cached:
47/// their provisional `true` may only be valid in the active call context.
48pub(crate) struct ShapeEvaluator<'a> {
49    g: &'a dyn PathBackend,
50    arena: &'a ShapeArena,
51    sparql: &'a SparqlExecutor,
52    state: EvalState,
53}
54
55impl<'a> ShapeEvaluator<'a> {
56    pub(crate) fn new(
57        g: &'a dyn PathBackend,
58        arena: &'a ShapeArena,
59        sparql: &'a SparqlExecutor,
60    ) -> Self {
61        Self {
62            g,
63            arena,
64            sparql,
65            state: EvalState {
66                telemetry: crate::profile::is_enabled().then(ShapeCacheSample::default),
67                ..EvalState::default()
68            },
69        }
70    }
71
72    pub(crate) fn holds(&mut self, node: &Term, id: ShapeId) -> bool {
73        holds_memoized(self.g, self.arena, node, id, self.sparql, &mut self.state).holds
74    }
75
76    pub(crate) fn sparql(&self) -> &SparqlExecutor {
77        self.sparql
78    }
79
80    /// The path-evaluation backend, for sibling folds (e.g. witnessing).
81    pub(crate) fn backend(&self) -> &dyn PathBackend {
82        self.g
83    }
84
85    /// The shape arena, for sibling folds (e.g. witnessing).
86    pub(crate) fn arena(&self) -> &ShapeArena {
87        self.arena
88    }
89}
90
91impl Drop for ShapeEvaluator<'_> {
92    fn drop(&mut self) {
93        let Some(mut sample) = self.state.telemetry else {
94            return;
95        };
96        sample.entries = self.state.memo.len();
97        sample.estimated_bytes = estimated_memo_bytes(&self.state.memo);
98        crate::profile::record_shape_cache(sample);
99    }
100}
101
102/// A single failed atomic constraint.
103#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
104pub struct Reason {
105    /// The node at which the constraint failed (a value node, or the focus).
106    pub value: Term,
107    /// The path from the enclosing focus to `value`, if the failure is
108    /// value-scoped (rendered in `π` notation).
109    pub path: Option<String>,
110    /// The failing constraint's arena slot (cross-references the algebra dump).
111    pub shape: ShapeId,
112    /// `sh:severity` on the source shape, defaulting to `sh:Violation`.
113    #[serde(default)]
114    pub severity: Severity,
115    pub message: String,
116    /// Non-empty when this reason is an `sh:or` group: one entry per OR branch
117    /// that failed, so the caller can tell "fix any one of these."
118    #[serde(default, skip_serializing_if = "Vec::is_empty")]
119    pub sub_reasons: Vec<Reason>,
120}
121
122/// One focus node that failed its statement's shape, with the reasons why.
123#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
124pub struct Violation {
125    pub focus: Term,
126    /// Index of the violated `(selector, shape)` statement in the schema.
127    pub statement: usize,
128    /// The most severe top-level reason in this grouped finding.
129    pub severity: Severity,
130    pub reasons: Vec<Reason>,
131}
132
133/// The outcome of validating a data graph against a schema.
134#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
135pub struct ValidationOutcome {
136    pub conforms: bool,
137    pub violations: Vec<Violation>,
138}
139
140/// Controls which retained findings make a validation outcome non-conforming.
141#[derive(Debug, Clone, PartialEq, Eq)]
142pub struct ValidationOptions {
143    /// Lowest severity that makes `conforms` false. Defaults to `sh:Info`, so
144    /// all findings fail validation as they did before severity was retained.
145    pub minimum_severity: Severity,
146    /// Whether to sort violations by severity, focus node, and statement.
147    /// Defaults to `true` for deterministic output.
148    pub sort_results: bool,
149}
150
151impl Default for ValidationOptions {
152    fn default() -> Self {
153        Self {
154            minimum_severity: Severity::Info,
155            sort_results: true,
156        }
157    }
158}
159
160fn most_severe(reasons: &[Reason]) -> Severity {
161    reasons
162        .iter()
163        .max_by_key(|reason| reason.severity.rank())
164        .map(|reason| reason.severity.clone())
165        .unwrap_or(Severity::Violation)
166}
167
168fn conforms_at_threshold(violations: &[Violation], minimum: &Severity) -> bool {
169    !violations
170        .iter()
171        .flat_map(|violation| &violation.reasons)
172        .any(|reason| reason.severity.meets(minimum))
173}
174
175fn sort_violations(violations: &mut [Violation], enabled: bool) {
176    if enabled {
177        violations.sort_by(|left, right| {
178            right
179                .severity
180                .rank()
181                .cmp(&left.severity.rank())
182                .then_with(|| left.focus.to_string().cmp(&right.focus.to_string()))
183                .then_with(|| left.statement.cmp(&right.statement))
184        });
185    }
186}
187
188/// Which RDF graph(s) validation uses for focus discovery and evaluation.
189#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
190pub enum ValidationGraphMode {
191    /// Focus nodes and evaluation both use only the data graph.
192    Data,
193    /// Focus nodes come from data; paths, class hierarchy, and SPARQL use the
194    /// union of data and shapes. This is the default for split graphs.
195    #[default]
196    Union,
197    /// Focus discovery and evaluation both use the full data/shapes union.
198    UnionAll,
199}
200
201/// The schema is not stratifiable: it recurses through genuine negation, so it
202/// has no defined 2-valued semantics (`docs/03-recursion-semantics.md`). We
203/// diagnose rather than guess. Carries the offending shape components.
204#[derive(Debug, Clone, PartialEq, Eq)]
205pub struct NonStratifiable {
206    pub components: Vec<Vec<ShapeId>>,
207}
208
209impl fmt::Display for NonStratifiable {
210    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
211        write!(f, "non-stratifiable schema (recursion through negation): ")?;
212        for (i, c) in self.components.iter().enumerate() {
213            if i > 0 {
214                write!(f, "; ")?;
215            }
216            let ids: Vec<String> = c.iter().map(|s| format!("@{}", s.0)).collect();
217            write!(f, "{{{}}}", ids.join(" "))?;
218        }
219        Ok(())
220    }
221}
222
223impl std::error::Error for NonStratifiable {}
224
225/// Validate `data` against `schema`.
226///
227/// Honors the decided recursion semantics (`docs/03-recursion-semantics.md`):
228/// the schema must be **stratifiable** (no recursion through net negation), else
229/// we return [`NonStratifiable`]. For a stratifiable schema all recursion is
230/// net-positive (monotone), and [`explain`]/[`holds`]'s "assume conforming on a
231/// back-edge" cycle guard computes exactly the **greatest fixpoint** — the
232/// coinductive validation reading we chose.
233pub fn validate(data: &Graph, schema: &Schema) -> Result<ValidationOutcome, NonStratifiable> {
234    validate_with_options(data, schema, &ValidationOptions::default())
235}
236
237/// Validate `data` against `schema` using an explicit severity policy.
238pub fn validate_with_options(
239    data: &Graph,
240    schema: &Schema,
241    options: &ValidationOptions,
242) -> Result<ValidationOutcome, NonStratifiable> {
243    validate_with_context_and_options(data, data, schema, options)
244}
245
246/// Validate split data and shapes graphs using the selected graph mode.
247pub fn validate_graphs(
248    data: &Graph,
249    shapes: &Graph,
250    schema: &Schema,
251) -> Result<ValidationOutcome, NonStratifiable> {
252    validate_graphs_with_mode_and_options(
253        data,
254        shapes,
255        schema,
256        ValidationGraphMode::default(),
257        &ValidationOptions::default(),
258    )
259}
260
261/// Validate split data and shapes graphs using an explicit graph mode.
262pub fn validate_graphs_with_mode(
263    data: &Graph,
264    shapes: &Graph,
265    schema: &Schema,
266    mode: ValidationGraphMode,
267) -> Result<ValidationOutcome, NonStratifiable> {
268    validate_graphs_with_mode_and_options(data, shapes, schema, mode, &ValidationOptions::default())
269}
270
271/// Validate split graphs using an explicit graph mode and severity policy.
272pub fn validate_graphs_with_mode_and_options(
273    data: &Graph,
274    shapes: &Graph,
275    schema: &Schema,
276    mode: ValidationGraphMode,
277    options: &ValidationOptions,
278) -> Result<ValidationOutcome, NonStratifiable> {
279    match mode {
280        ValidationGraphMode::Data => {
281            let uses_shapes = uses_shapes_graph(&schema.arena);
282            let frozen = if uses_shapes {
283                FrozenIndexedDataset::from_graphs(data, shapes)
284            } else {
285                FrozenIndexedDataset::from_graph(data)
286            };
287            validate_with_frozen(data, schema, frozen, uses_shapes, options)
288        }
289        ValidationGraphMode::Union => {
290            let uses_shapes = uses_shapes_graph(&schema.arena);
291            let frozen = if uses_shapes {
292                FrozenIndexedDataset::from_graph_union_with_shapes(data, shapes)
293            } else {
294                FrozenIndexedDataset::from_graph_union(data, shapes)
295            };
296            validate_with_frozen(data, schema, frozen, uses_shapes, options)
297        }
298        ValidationGraphMode::UnionAll => {
299            let union = graph_union(data, shapes);
300            validate_with_context_and_options(&union, &union, schema, options)
301        }
302    }
303}
304
305/// Validate focus nodes from `data` while evaluating paths, class hierarchy,
306/// and SPARQL against `context`. For split data/shapes inputs, `context` should
307/// be their RDF union.
308pub fn validate_with_context(
309    data: &Graph,
310    context: &Graph,
311    schema: &Schema,
312) -> Result<ValidationOutcome, NonStratifiable> {
313    validate_with_context_and_options(data, context, schema, &ValidationOptions::default())
314}
315
316/// Validate with separate focus/context graphs and an explicit severity policy.
317pub fn validate_with_context_and_options(
318    data: &Graph,
319    context: &Graph,
320    schema: &Schema,
321    options: &ValidationOptions,
322) -> Result<ValidationOutcome, NonStratifiable> {
323    let uses_shapes = uses_shapes_graph(&schema.arena);
324    let frozen = if uses_shapes {
325        FrozenIndexedDataset::from_graphs(context, context)
326    } else {
327        FrozenIndexedDataset::from_graph(context)
328    };
329    validate_with_frozen(data, schema, frozen, uses_shapes, options)
330}
331
332fn validate_with_frozen(
333    data: &Graph,
334    schema: &Schema,
335    frozen: FrozenIndexedDataset,
336    has_shapes_graph: bool,
337    options: &ValidationOptions,
338) -> Result<ValidationOutcome, NonStratifiable> {
339    let strat = analyze(&schema.arena);
340    if !strat.stratifiable {
341        let components = strat
342            .strata
343            .iter()
344            .filter(|s| !s.stratifiable)
345            .map(|s| s.shapes.clone())
346            .collect();
347        return Err(NonStratifiable { components });
348    }
349
350    let sparql = SparqlExecutor::from_frozen(frozen, has_shapes_graph);
351    let backend = sparql
352        .frozen()
353        .expect("validation executor always has a frozen dataset");
354    let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
355    let mut violations = Vec::new();
356    for (i, st) in schema.statements.iter().enumerate() {
357        let label = schema
358            .names
359            .get(&st.shape)
360            .cloned()
361            .unwrap_or_else(|| format!("@{}", st.shape.0));
362        let foci = focus_nodes_with_evaluator(data, &st.selector, &mut evaluator);
363        prefetch_sparql_constraints(&schema.arena, st.shape, &foci, &sparql);
364        for v in foci {
365            let t = std::time::Instant::now();
366            let mut stack = HashSet::new();
367            let mut reasons = explain(
368                &mut evaluator,
369                &v,
370                st.shape,
371                None,
372                &Severity::Violation,
373                &mut stack,
374            );
375            crate::profile::record_shape(&label, t.elapsed().as_micros() as u64);
376            dedup_reasons(&mut reasons);
377            if !reasons.is_empty() {
378                let severity = most_severe(&reasons);
379                violations.push(Violation {
380                    focus: v,
381                    statement: i,
382                    severity,
383                    reasons,
384                });
385            }
386        }
387    }
388    sort_violations(&mut violations, options.sort_results);
389    Ok(ValidationOutcome {
390        conforms: conforms_at_threshold(&violations, &options.minimum_severity),
391        violations,
392    })
393}
394
395/// Whether any `sh:sparql` constraint references `$shapesGraph`, requiring the
396/// shapes graph to be mirrored into a named graph for evaluation.
397pub(crate) fn uses_shapes_graph(arena: &ShapeArena) -> bool {
398    (0..arena.len()).any(|i| {
399        matches!(arena.get(ShapeId(i as u32)), Shape::Sparql(c) if c.query.contains("shapesGraph"))
400    })
401}
402
403/// The RDF merge of two graphs (`left ∪ right`). The standard way to build the
404/// `context` graph the `*_with_context` repair entry points expect: the union of
405/// a data graph and a shapes/ontology graph.
406pub fn graph_union(left: &Graph, right: &Graph) -> Graph {
407    let mut union = left.clone();
408    for triple in right.iter() {
409        union.insert(triple);
410    }
411    union
412}
413
414/// Validate using a [`PhysicalPlan`] (Layer 5): focus nodes come from compiled
415/// [`FocusSource`]s (so class targets seed backward from the constant instead of
416/// scanning every node) and checks run over the plan's cost-ordered arena. The
417/// result is identical to [`validate`] on the same schema — the W3C harness
418/// cross-checks this.
419pub fn validate_plan(
420    data: &Graph,
421    plan: &PhysicalPlan,
422) -> Result<ValidationOutcome, NonStratifiable> {
423    validate_plan_with_options(data, plan, &ValidationOptions::default())
424}
425
426/// Validate a physical plan using an explicit severity policy.
427pub fn validate_plan_with_options(
428    data: &Graph,
429    plan: &PhysicalPlan,
430    options: &ValidationOptions,
431) -> Result<ValidationOutcome, NonStratifiable> {
432    validate_plan_with_context_and_options(data, data, plan, options)
433}
434
435/// Validate a physical plan over split graphs using the default graph mode.
436pub fn validate_plan_graphs(
437    data: &Graph,
438    shapes: &Graph,
439    plan: &PhysicalPlan,
440) -> Result<ValidationOutcome, NonStratifiable> {
441    validate_plan_graphs_with_mode_and_options(
442        data,
443        shapes,
444        plan,
445        ValidationGraphMode::default(),
446        &ValidationOptions::default(),
447    )
448}
449
450/// Validate a physical plan over split graphs using an explicit graph mode.
451pub fn validate_plan_graphs_with_mode(
452    data: &Graph,
453    shapes: &Graph,
454    plan: &PhysicalPlan,
455    mode: ValidationGraphMode,
456) -> Result<ValidationOutcome, NonStratifiable> {
457    validate_plan_graphs_with_mode_and_options(
458        data,
459        shapes,
460        plan,
461        mode,
462        &ValidationOptions::default(),
463    )
464}
465
466/// Validate a physical plan over split graphs with an explicit severity policy.
467pub fn validate_plan_graphs_with_mode_and_options(
468    data: &Graph,
469    shapes: &Graph,
470    plan: &PhysicalPlan,
471    mode: ValidationGraphMode,
472    options: &ValidationOptions,
473) -> Result<ValidationOutcome, NonStratifiable> {
474    match mode {
475        ValidationGraphMode::Data => {
476            let uses_shapes = uses_shapes_graph(&plan.arena);
477            let frozen = if uses_shapes {
478                FrozenIndexedDataset::from_graphs(data, shapes)
479            } else {
480                FrozenIndexedDataset::from_graph(data)
481            };
482            validate_plan_with_frozen(data, plan, frozen, uses_shapes, options)
483        }
484        ValidationGraphMode::Union => {
485            let uses_shapes = uses_shapes_graph(&plan.arena);
486            let frozen = if uses_shapes {
487                FrozenIndexedDataset::from_graph_union_with_shapes(data, shapes)
488            } else {
489                FrozenIndexedDataset::from_graph_union(data, shapes)
490            };
491            validate_plan_with_frozen(data, plan, frozen, uses_shapes, options)
492        }
493        ValidationGraphMode::UnionAll => {
494            let union = graph_union(data, shapes);
495            validate_plan_with_context_and_options(&union, &union, plan, options)
496        }
497    }
498}
499
500/// Validate plan focus nodes from `data` against the supplied execution context.
501pub fn validate_plan_with_context(
502    data: &Graph,
503    context: &Graph,
504    plan: &PhysicalPlan,
505) -> Result<ValidationOutcome, NonStratifiable> {
506    validate_plan_with_context_and_options(data, context, plan, &ValidationOptions::default())
507}
508
509/// Validate plan focus nodes against a context with a severity policy.
510pub fn validate_plan_with_context_and_options(
511    data: &Graph,
512    context: &Graph,
513    plan: &PhysicalPlan,
514    options: &ValidationOptions,
515) -> Result<ValidationOutcome, NonStratifiable> {
516    let uses_shapes = uses_shapes_graph(&plan.arena);
517    let frozen = if uses_shapes {
518        FrozenIndexedDataset::from_graphs(context, context)
519    } else {
520        FrozenIndexedDataset::from_graph(context)
521    };
522    validate_plan_with_frozen(data, plan, frozen, uses_shapes, options)
523}
524
525fn validate_plan_with_frozen(
526    data: &Graph,
527    plan: &PhysicalPlan,
528    frozen: FrozenIndexedDataset,
529    has_shapes_graph: bool,
530    options: &ValidationOptions,
531) -> Result<ValidationOutcome, NonStratifiable> {
532    let strat = analyze(&plan.arena);
533    if !strat.stratifiable {
534        let components = strat
535            .strata
536            .iter()
537            .filter(|s| !s.stratifiable)
538            .map(|s| s.shapes.clone())
539            .collect();
540        return Err(NonStratifiable { components });
541    }
542
543    let sparql = SparqlExecutor::from_frozen(frozen, has_shapes_graph);
544    let backend = sparql
545        .frozen()
546        .expect("validation executor always has a frozen dataset");
547    let mut evaluator = ShapeEvaluator::new(backend, &plan.arena, &sparql);
548    let mut violations = Vec::new();
549    for (i, sp) in plan.statements.iter().enumerate() {
550        let label = plan
551            .names
552            .get(&sp.shape)
553            .cloned()
554            .unwrap_or_else(|| format!("@{}", sp.shape.0));
555        let foci = focus_for_source(data, &sp.source, &mut evaluator);
556        prefetch_sparql_constraints(&plan.arena, sp.shape, &foci, &sparql);
557        for v in foci {
558            let t = std::time::Instant::now();
559            let mut stack = HashSet::new();
560            let mut reasons = explain(
561                &mut evaluator,
562                &v,
563                sp.shape,
564                None,
565                &Severity::Violation,
566                &mut stack,
567            );
568            crate::profile::record_shape(&label, t.elapsed().as_micros() as u64);
569            dedup_reasons(&mut reasons);
570            if !reasons.is_empty() {
571                let severity = most_severe(&reasons);
572                violations.push(Violation {
573                    focus: v,
574                    statement: i,
575                    severity,
576                    reasons,
577                });
578            }
579        }
580    }
581    sort_violations(&mut violations, options.sort_results);
582    Ok(ValidationOutcome {
583        conforms: conforms_at_threshold(&violations, &options.minimum_severity),
584        violations,
585    })
586}
587
588/// Focus nodes for a compiled [`FocusSource`].
589fn focus_for_source(
590    data: &Graph,
591    source: &FocusSource,
592    evaluator: &mut ShapeEvaluator<'_>,
593) -> Vec<Term> {
594    match source {
595        FocusSource::SubjectsOf(p) => subjects_of(data, p),
596        FocusSource::ObjectsOf(p) => objects_of(data, p),
597        FocusSource::Node(c) => vec![c.clone()],
598        // the optimization: seed backward from the constant, no full scan
599        FocusSource::PathToConst { path, target } => pred(evaluator.g, target, path)
600            .into_iter()
601            .filter(|node| graph_contains_term(data, node))
602            .collect(),
603        FocusSource::ScanFilter { path, qualifier } => all_nodes(data)
604            .into_iter()
605            .filter(|v| {
606                succ(evaluator.g, v, path)
607                    .iter()
608                    .any(|u| evaluator.holds(u, *qualifier))
609            })
610            .collect(),
611        FocusSource::Sparql(target) => {
612            let candidates = all_nodes(data);
613            evaluator
614                .sparql
615                .target_nodes(&target.query)
616                .unwrap_or_default()
617                .into_iter()
618                .filter(|node| candidates.contains(node))
619                .collect()
620        }
621    }
622}
623
624/// The focus nodes selected by a selector.
625pub fn focus_nodes(data: &Graph, sel: &Selector, arena: &ShapeArena) -> Vec<Term> {
626    let sparql =
627        SparqlExecutor::new(data).expect("building an in-memory Oxigraph store should succeed");
628    let mut evaluator = ShapeEvaluator::new(data, arena, &sparql);
629    focus_nodes_with_evaluator(data, sel, &mut evaluator)
630}
631
632pub(crate) fn focus_nodes_with(
633    data: &Graph,
634    backend: &dyn PathBackend,
635    sel: &Selector,
636    arena: &ShapeArena,
637    sparql: &SparqlExecutor,
638) -> Vec<Term> {
639    let mut evaluator = ShapeEvaluator::new(backend, arena, sparql);
640    focus_nodes_with_evaluator(data, sel, &mut evaluator)
641}
642
643fn focus_nodes_with_evaluator(
644    data: &Graph,
645    sel: &Selector,
646    evaluator: &mut ShapeEvaluator<'_>,
647) -> Vec<Term> {
648    match sel {
649        Selector::HasOut(q) => subjects_of(data, q),
650        Selector::HasIn(q) => objects_of(data, q),
651        Selector::IsConst(c) => vec![c.clone()],
652        Selector::HasPath(path, qual) => match evaluator.arena.get(*qual) {
653            // Class targets are lowered to
654            // rdf:type/rdfs:subClassOf* ending at a constant. Searching
655            // backward from that constant avoids traversing the hierarchy once
656            // for every node in the data graph.
657            Shape::TestConst(target) => pred(evaluator.g, target, path)
658                .into_iter()
659                .filter(|node| graph_contains_term(data, node))
660                .collect(),
661            _ => all_nodes(data)
662                .into_iter()
663                .filter(|v| {
664                    succ(evaluator.g, v, path)
665                        .iter()
666                        .any(|u| evaluator.holds(u, *qual))
667                })
668                .collect(),
669        },
670        Selector::Sparql(target) => {
671            let candidates = all_nodes(data);
672            evaluator
673                .sparql
674                .target_nodes(&target.query)
675                .unwrap_or_default()
676                .into_iter()
677                .filter(|node| candidates.contains(node))
678                .collect()
679        }
680    }
681}
682
683fn holds_memoized(
684    g: &dyn PathBackend,
685    arena: &ShapeArena,
686    v: &Term,
687    id: ShapeId,
688    sparql: &SparqlExecutor,
689    state: &mut EvalState,
690) -> EvalResult {
691    let key = (id, v.clone());
692    if let Some(&holds) = state.memo.get(&key) {
693        if let Some(telemetry) = state.telemetry.as_mut() {
694            telemetry.hits += 1;
695        }
696        return EvalResult {
697            holds,
698            cacheable: true,
699        };
700    }
701    if let Some(telemetry) = state.telemetry.as_mut() {
702        telemetry.misses += 1;
703    }
704    if !state.active.insert(key.clone()) {
705        if let Some(telemetry) = state.telemetry.as_mut() {
706            telemetry.recursion_back_edges += 1;
707        }
708        return EvalResult {
709            holds: true,
710            cacheable: false,
711        }; // back-edge ⇒ assume conforming: the gfp choice (doc 03)
712    }
713    let result = match arena.get(id) {
714        Shape::Annotated { shape, .. } => holds_memoized(g, arena, v, *shape, sparql, state),
715        Shape::Top | Shape::Pending => EvalResult {
716            holds: true,
717            cacheable: true,
718        },
719        Shape::Sparql(constraint) => EvalResult {
720            holds: sparql
721                .constraint_violations(constraint, v)
722                .is_ok_and(|violations| violations.is_empty()),
723            cacheable: true,
724        },
725        Shape::TestConst(c) => EvalResult {
726            holds: v == c,
727            cacheable: true,
728        },
729        Shape::TestType(t) => EvalResult {
730            holds: value_type_holds(t, v),
731            cacheable: true,
732        },
733        Shape::TestKind(k) => EvalResult {
734            holds: k.matches(v),
735            cacheable: true,
736        },
737        Shape::Closed(q) => EvalResult {
738            holds: closed_offenders(g, v, q).is_empty(),
739            cacheable: true,
740        },
741        Shape::Eq(path, p) => EvalResult {
742            holds: succ(g, v, path) == objects(g, v, p),
743            cacheable: true,
744        },
745        Shape::Disj(path, p) => EvalResult {
746            holds: succ(g, v, path).is_disjoint(&objects(g, v, p)),
747            cacheable: true,
748        },
749        Shape::Lt(path, p) => EvalResult {
750            holds: all_pairs_ordered(g, v, path, p, false),
751            cacheable: true,
752        },
753        Shape::Le(path, p) => EvalResult {
754            holds: all_pairs_ordered(g, v, path, p, true),
755            cacheable: true,
756        },
757        Shape::UniqueLang(path) => EvalResult {
758            holds: unique_lang(&succ(g, v, path)),
759            cacheable: true,
760        },
761        Shape::Not(c) => {
762            let child = holds_memoized(g, arena, v, *c, sparql, state);
763            EvalResult {
764                holds: !child.holds,
765                cacheable: child.cacheable,
766            }
767        }
768        Shape::And(cs) => {
769            let mut result = EvalResult {
770                holds: true,
771                cacheable: true,
772            };
773            for child in cs {
774                let child = holds_memoized(g, arena, v, *child, sparql, state);
775                result.cacheable &= child.cacheable;
776                if !child.holds {
777                    result.holds = false;
778                    break;
779                }
780            }
781            result
782        }
783        Shape::Or(cs) => {
784            let mut result = EvalResult {
785                holds: false,
786                cacheable: true,
787            };
788            for child in cs {
789                let child = holds_memoized(g, arena, v, *child, sparql, state);
790                result.cacheable &= child.cacheable;
791                if child.holds {
792                    result.holds = true;
793                    break;
794                }
795            }
796            result
797        }
798        Shape::Count {
799            path,
800            min,
801            max,
802            qualifier,
803        } => {
804            let mut n = 0;
805            let mut cacheable = true;
806            for value in succ(g, v, path) {
807                let qualified = holds_memoized(g, arena, &value, *qualifier, sparql, state);
808                cacheable &= qualified.cacheable;
809                n += u64::from(qualified.holds);
810            }
811            EvalResult {
812                holds: min.is_none_or(|m| n >= m) && max.is_none_or(|m| n <= m),
813                cacheable,
814            }
815        }
816    };
817    state.active.remove(&key);
818    if result.cacheable {
819        state.memo.insert(key, result.holds);
820        if let Some(telemetry) = state.telemetry.as_mut() {
821            telemetry.insertions += 1;
822        }
823    } else if let Some(telemetry) = state.telemetry.as_mut() {
824        telemetry.non_cacheable_results += 1;
825    }
826    result
827}
828
829fn estimated_memo_bytes(memo: &HashMap<(ShapeId, Term), bool>) -> usize {
830    const CONTROL_BYTE_ESTIMATE: usize = 1;
831    let bucket_bytes =
832        memo.capacity() * (std::mem::size_of::<((ShapeId, Term), bool)>() + CONTROL_BYTE_ESTIMATE);
833    bucket_bytes
834        + memo
835            .keys()
836            .map(|(_, term)| estimated_term_heap_bytes(term))
837            .sum::<usize>()
838}
839
840fn estimated_term_heap_bytes(term: &Term) -> usize {
841    match term {
842        Term::NamedNode(node) => node.as_str().len(),
843        Term::BlankNode(node) => node.as_str().len(),
844        Term::Literal(literal) => {
845            literal.value().len()
846                + literal.language().map_or_else(
847                    || {
848                        let datatype = literal.datatype();
849                        if datatype.as_str() == "http://www.w3.org/2001/XMLSchema#string" {
850                            0
851                        } else {
852                            datatype.as_str().len()
853                        }
854                    },
855                    str::len,
856                )
857        }
858    }
859}
860
861/// Batch-evaluate the SPARQL constraints reachable from `root` at the focus set
862/// before the per-node walk, so their fallback queries run once for the whole
863/// focus set instead of once per focus (doc §189). Only constraints reached through
864/// focus-preserving operators (`∧`/`∨`/`¬`) are evaluated at `foci`; constraints
865/// under a `Count` path apply to value nodes, not the statement focus, so they
866/// are skipped here and fall back to per-focus execution. Prefetching is a pure
867/// memo (constraint violations depend only on focus + immutable dataset), so it
868/// is sound regardless of the operator context the constraint is reached in.
869fn prefetch_sparql_constraints(
870    arena: &ShapeArena,
871    root: ShapeId,
872    foci: &[Term],
873    sparql: &SparqlExecutor,
874) {
875    if foci.len() < 2 {
876        return;
877    }
878    let mut constraints = Vec::new();
879    let mut seen = HashSet::new();
880    collect_focus_sparql(arena, root, &mut seen, &mut constraints);
881    for constraint in constraints {
882        let _ = sparql.prefetch_constraint(constraint, foci);
883    }
884}
885
886fn collect_focus_sparql<'a>(
887    arena: &'a ShapeArena,
888    id: ShapeId,
889    seen: &mut HashSet<ShapeId>,
890    out: &mut Vec<&'a SparqlConstraint>,
891) {
892    if !seen.insert(id) {
893        return; // cyclic (recursive) shape: stop at the back-edge
894    }
895    match arena.get(id) {
896        Shape::Annotated { shape, .. } => collect_focus_sparql(arena, *shape, seen, out),
897        Shape::Sparql(constraint) => out.push(constraint),
898        Shape::Not(inner) => collect_focus_sparql(arena, *inner, seen, out),
899        Shape::And(ids) | Shape::Or(ids) => {
900            for &child in ids {
901                collect_focus_sparql(arena, child, seen, out);
902            }
903        }
904        // `Count` crosses a path (different focus); all other variants are leaves.
905        _ => {}
906    }
907}
908
909/// The reasons `φ` (slot `id`) fails at `node`. Empty iff it holds. `path_ctx`
910/// is the rendered path by which `node` was reached from the enclosing focus.
911fn explain(
912    evaluator: &mut ShapeEvaluator<'_>,
913    node: &Term,
914    id: ShapeId,
915    path_ctx: Option<&str>,
916    severity: &Severity,
917    stack: &mut HashSet<(ShapeId, Term)>,
918) -> Vec<Reason> {
919    let key = (id, node.clone());
920    if !stack.insert(key.clone()) {
921        return Vec::new(); // back-edge ⇒ assume conforming (gfp, doc 03)
922    }
923    if evaluator.holds(node, id) {
924        stack.remove(&key);
925        return Vec::new();
926    }
927    let reasons = match evaluator.arena.get(id).clone() {
928        Shape::Annotated {
929            severity: source_severity,
930            shape,
931        } => explain(evaluator, node, shape, path_ctx, &source_severity, stack),
932        Shape::Top | Shape::Pending => Vec::new(),
933        Shape::Sparql(constraint) => {
934            match evaluator.sparql.constraint_violations(&constraint, node) {
935                Ok(violations) => violations
936                    .into_iter()
937                    .map(|violation| {
938                        // Compute the message before the value/path fields are moved
939                        // out of `violation`.
940                        let message = sparql_violation_message(&violation, &constraint, node);
941                        Reason {
942                            value: violation.value.unwrap_or_else(|| node.clone()),
943                            path: violation
944                                .path
945                                .map(|path| path.to_string())
946                                .or_else(|| path_ctx.map(str::to_string))
947                                .or_else(|| constraint.path.as_ref().map(path_to_string)),
948                            message,
949                            shape: id,
950                            severity: severity.clone(),
951                            sub_reasons: Vec::new(),
952                        }
953                    })
954                    .collect(),
955                Err(error) => vec![Reason {
956                    value: node.clone(),
957                    path: path_ctx.map(str::to_string),
958                    shape: id,
959                    severity: severity.clone(),
960                    message: format!("SPARQL constraint evaluation failed: {error}"),
961                    sub_reasons: Vec::new(),
962                }],
963            }
964        }
965        Shape::TestConst(_)
966        | Shape::TestType(_)
967        | Shape::TestKind(_)
968        | Shape::Eq(..)
969        | Shape::Disj(..)
970        | Shape::Lt(..)
971        | Shape::Le(..)
972        | Shape::UniqueLang(_) => leaf(
973            evaluator.holds(node, id),
974            node,
975            id,
976            path_ctx,
977            severity,
978            format!("{} not satisfied", shape_to_string(evaluator.arena, id)),
979        ),
980        Shape::Closed(q) => {
981            let bad = closed_offenders(evaluator.g, node, &q);
982            if bad.is_empty() {
983                Vec::new()
984            } else {
985                let preds: Vec<String> = bad.iter().map(|p| p.to_string()).collect();
986                vec![Reason {
987                    value: node.clone(),
988                    path: path_ctx.map(str::to_string),
989                    shape: id,
990                    severity: severity.clone(),
991                    message: format!("closed: unexpected predicate(s) {}", preds.join(", ")),
992                    sub_reasons: Vec::new(),
993                }]
994            }
995        }
996        Shape::Not(c) => {
997            if explain(evaluator, node, c, path_ctx, severity, stack).is_empty() {
998                vec![Reason {
999                    value: node.clone(),
1000                    path: path_ctx.map(str::to_string),
1001                    shape: id,
1002                    severity: severity.clone(),
1003                    message: "negated shape unexpectedly held".to_string(),
1004                    sub_reasons: Vec::new(),
1005                }]
1006            } else {
1007                Vec::new()
1008            }
1009        }
1010        Shape::And(cs) => cs
1011            .iter()
1012            .flat_map(|c| explain(evaluator, node, *c, path_ctx, severity, stack))
1013            .collect(),
1014        Shape::Or(cs) => {
1015            let mut sub_reasons = Vec::new();
1016            let mut satisfied = false;
1017            for c in &cs {
1018                let sub = explain(evaluator, node, *c, path_ctx, severity, stack);
1019                if sub.is_empty() {
1020                    satisfied = true;
1021                    break;
1022                }
1023                sub_reasons.extend(sub);
1024            }
1025            if satisfied {
1026                Vec::new()
1027            } else {
1028                vec![Reason {
1029                    value: node.clone(),
1030                    path: path_ctx.map(str::to_string),
1031                    shape: id,
1032                    severity: severity.clone(),
1033                    message: format!("none of {} alternative(s) satisfied", cs.len()),
1034                    sub_reasons,
1035                }]
1036            }
1037        }
1038        Shape::Count {
1039            path,
1040            min,
1041            max,
1042            qualifier,
1043        } => explain_count(
1044            evaluator, node, id, &path, min, max, qualifier, severity, stack,
1045        ),
1046    };
1047    stack.remove(&key);
1048    reasons
1049}
1050
1051#[allow(clippy::too_many_arguments)]
1052fn explain_count(
1053    evaluator: &mut ShapeEvaluator<'_>,
1054    node: &Term,
1055    id: ShapeId,
1056    path: &Path,
1057    min: Option<u64>,
1058    max: Option<u64>,
1059    qualifier: ShapeId,
1060    severity: &Severity,
1061    stack: &mut HashSet<(ShapeId, Term)>,
1062) -> Vec<Reason> {
1063    let path_str = path_to_string(path);
1064    let matched: Vec<Term> = succ(evaluator.g, node, path)
1065        .into_iter()
1066        .filter(|u| evaluator.holds(u, qualifier))
1067        .collect();
1068    let n = matched.len() as u64;
1069    let mut reasons = Vec::new();
1070
1071    if let Some(mx) = max
1072        && n > mx
1073    {
1074        match evaluator.arena.get(qualifier).clone() {
1075            // ∀path.inner encoded as ∃≤0 path.¬inner: drill into the offenders.
1076            Shape::Not(inner) if mx == 0 => {
1077                for u in &matched {
1078                    reasons.extend(explain(
1079                        evaluator,
1080                        u,
1081                        inner,
1082                        Some(&path_str),
1083                        severity,
1084                        stack,
1085                    ));
1086                }
1087            }
1088            _ => reasons.push(Reason {
1089                value: node.clone(),
1090                path: Some(path_str.clone()),
1091                shape: id,
1092                severity: severity.clone(),
1093                message: format!("at most {mx} value(s) may match along {path_str}, found {n}"),
1094                sub_reasons: Vec::new(),
1095            }),
1096        }
1097    }
1098
1099    if let Some(mn) = min
1100        && n < mn
1101    {
1102        reasons.push(Reason {
1103            value: node.clone(),
1104            path: Some(path_str.clone()),
1105            shape: id,
1106            severity: severity.clone(),
1107            message: format!("at least {mn} value(s) required along {path_str}, found {n}"),
1108            sub_reasons: Vec::new(),
1109        });
1110    }
1111
1112    reasons
1113}
1114
1115/// The message for a `sh:sparql` violation, by SHACL §5.2.1 precedence: the
1116/// result's own `?message` binding, then the constraint's (or shape's)
1117/// `sh:message` (with `{$this}`/`{?var}` substitution), then a constructed
1118/// description naming the shape and value.
1119fn sparql_violation_message(
1120    violation: &SparqlViolation,
1121    constraint: &SparqlConstraint,
1122    node: &Term,
1123) -> String {
1124    if let Some(message) = &violation.message {
1125        return term_text(message);
1126    }
1127    if !constraint.messages.is_empty() {
1128        return constraint
1129            .messages
1130            .iter()
1131            .map(|m| apply_message_template(&term_text(m), node, &violation.bindings))
1132            .collect::<Vec<_>>()
1133            .join("; ");
1134    }
1135    let mut message = match &constraint.shape {
1136        Some(shape) => format!("SPARQL constraint at {shape} not satisfied"),
1137        None => "SPARQL constraint not satisfied".to_string(),
1138    };
1139    if let Some(value) = &violation.value {
1140        message.push_str(&format!(" (value: {value})"));
1141    }
1142    message
1143}
1144
1145/// Substitute `{$varName}` / `{?varName}` placeholders in a message template.
1146///
1147/// `$this` resolves to `focus`; all other names are looked up in `bindings`
1148/// (keyed without the `$`/`?` sigil). Unresolved placeholders are left as-is.
1149pub(crate) fn apply_message_template(
1150    template: &str,
1151    focus: &Term,
1152    bindings: &HashMap<String, Term>,
1153) -> String {
1154    static RE: OnceLock<Regex> = OnceLock::new();
1155    let re = RE
1156        .get_or_init(|| Regex::new(r"\{(\$[A-Za-z_]\w*|\?[A-Za-z_]\w*)\}").expect("static regex"));
1157    re.replace_all(template, |caps: &regex::Captures| {
1158        let placeholder = &caps[1];
1159        let name = &placeholder[1..]; // strip leading `$` or `?`
1160        let term = if name == "this" {
1161            Some(focus)
1162        } else {
1163            bindings.get(name)
1164        };
1165        term.map(|t| match t {
1166            Term::NamedNode(n) => format!("<{}>", n.as_str()),
1167            Term::BlankNode(b) => format!("_:{}", b.as_str()),
1168            Term::Literal(l) => l.value().to_string(),
1169        })
1170        .unwrap_or_else(|| placeholder.to_string())
1171    })
1172    .to_string()
1173}
1174
1175/// A term's human-facing text: a literal's lexical value, otherwise its RDF
1176/// rendering (`<iri>` / `_:id`).
1177fn term_text(term: &Term) -> String {
1178    match term {
1179        Term::Literal(literal) => literal.value().to_string(),
1180        other => other.to_string(),
1181    }
1182}
1183
1184fn leaf(
1185    ok: bool,
1186    node: &Term,
1187    id: ShapeId,
1188    path_ctx: Option<&str>,
1189    severity: &Severity,
1190    message: String,
1191) -> Vec<Reason> {
1192    if ok {
1193        Vec::new()
1194    } else {
1195        vec![Reason {
1196            value: node.clone(),
1197            path: path_ctx.map(str::to_string),
1198            shape: id,
1199            severity: severity.clone(),
1200            message,
1201            sub_reasons: Vec::new(),
1202        }]
1203    }
1204}
1205
1206fn all_pairs_ordered(
1207    g: &dyn PathBackend,
1208    v: &Term,
1209    path: &Path,
1210    p: &NamedNode,
1211    allow_eq: bool,
1212) -> bool {
1213    let lhs = succ(g, v, path);
1214    let rhs = objects(g, v, p);
1215    for a in &lhs {
1216        for b in &rhs {
1217            match compare_terms(a, b) {
1218                Some(Ordering::Less) => {}
1219                Some(Ordering::Equal) if allow_eq => {}
1220                _ => return false,
1221            }
1222        }
1223    }
1224    true
1225}
1226
1227fn objects(g: &dyn PathBackend, v: &Term, p: &NamedNode) -> HashSet<Term> {
1228    succ(g, v, &Path::Pred(p.clone()))
1229}
1230
1231/// Predicates on `node` not allowed by a closed shape's set `q`.
1232fn closed_offenders(
1233    g: &dyn PathBackend,
1234    node: &Term,
1235    q: &BTreeSet<NamedNode>,
1236) -> BTreeSet<NamedNode> {
1237    g.out_predicates(node)
1238        .into_iter()
1239        .filter(|p| !q.contains(p))
1240        .collect()
1241}
1242
1243fn unique_lang(values: &HashSet<Term>) -> bool {
1244    let mut seen = HashSet::new();
1245    for term in values {
1246        if let Term::Literal(l) = term
1247            && let Some(lang) = l.language()
1248            && !seen.insert(lang.to_ascii_lowercase())
1249        {
1250            return false;
1251        }
1252    }
1253    true
1254}
1255
1256fn dedup_reasons(reasons: &mut Vec<Reason>) {
1257    let mut seen = HashSet::new();
1258    reasons.retain(|r| {
1259        seen.insert((
1260            r.value.to_string(),
1261            r.message.clone(),
1262            r.severity.as_str().to_string(),
1263        ))
1264    });
1265}
1266
1267fn subject_term(s: oxrdf::NamedOrBlankNodeRef) -> Term {
1268    crate::path::term_of(s.into_owned())
1269}
1270
1271/// Distinct subjects of triples with predicate `p`.
1272fn subjects_of(data: &Graph, p: &NamedNode) -> Vec<Term> {
1273    let mut seen = HashSet::new();
1274    data.triples_for_predicate(p.as_ref())
1275        .filter_map(|t| {
1276            let term = subject_term(t.subject);
1277            seen.insert(term.clone()).then_some(term)
1278        })
1279        .collect()
1280}
1281
1282/// Distinct objects of triples with predicate `p`.
1283fn objects_of(data: &Graph, p: &NamedNode) -> Vec<Term> {
1284    let mut seen = HashSet::new();
1285    data.triples_for_predicate(p.as_ref())
1286        .filter_map(|t| {
1287            let term = t.object.into_owned();
1288            seen.insert(term.clone()).then_some(term)
1289        })
1290        .collect()
1291}
1292
1293/// All distinct terms appearing as a subject or object in the graph.
1294fn all_nodes(g: &Graph) -> HashSet<Term> {
1295    let mut nodes = HashSet::new();
1296    for t in g.iter() {
1297        nodes.insert(subject_term(t.subject));
1298        nodes.insert(t.object.into_owned());
1299    }
1300    nodes
1301}
1302
1303/// Whether `term` appears in the graph's node domain.
1304fn graph_contains_term(g: &Graph, term: &Term) -> bool {
1305    node_of(term).is_some_and(|node| g.triples_for_subject(&node).next().is_some())
1306        || g.triples_for_object(term).next().is_some()
1307}
1308
1309#[cfg(test)]
1310mod tests {
1311    use super::*;
1312    use oxrdf::{NamedNode, Triple};
1313
1314    fn iri(local: &str) -> NamedNode {
1315        NamedNode::new(format!("http://ex/{local}")).unwrap()
1316    }
1317
1318    fn term(local: &str) -> Term {
1319        Term::NamedNode(iri(local))
1320    }
1321
1322    #[test]
1323    fn memoizes_shared_value_checks_across_focus_nodes() {
1324        let p = iri("p");
1325        let shared = term("shared");
1326        let mut graph = Graph::new();
1327        graph.insert(&Triple::new(iri("a"), p.clone(), shared.clone()));
1328        graph.insert(&Triple::new(iri("b"), p.clone(), shared.clone()));
1329
1330        let mut arena = ShapeArena::new();
1331        let qualifier = arena.insert(Shape::TestConst(shared));
1332        let root = arena.insert(Shape::Count {
1333            path: Path::Pred(p),
1334            min: Some(1),
1335            max: None,
1336            qualifier,
1337        });
1338        let sparql = SparqlExecutor::new(&graph).unwrap();
1339        crate::profile::enable();
1340        {
1341            let mut evaluator = ShapeEvaluator::new(&graph, &arena, &sparql);
1342            assert!(evaluator.holds(&term("a"), root));
1343            assert!(evaluator.holds(&term("b"), root));
1344        }
1345        let profile = crate::profile::take().unwrap();
1346        let cache = profile.shape_cache();
1347        assert_eq!(cache.evaluators, 1);
1348        assert!(cache.hits >= 1, "shared qualifier should hit the cache");
1349        assert_eq!(cache.peak_entries, 3);
1350        assert!(cache.estimated_peak_bytes > 0);
1351    }
1352
1353    #[test]
1354    fn does_not_cache_cycle_dependent_results() {
1355        // A := B ∧ false; B := A. While evaluating A, the B result is
1356        // provisionally true through the A back-edge, but the gfp solution is
1357        // A=false, B=false. Caching that provisional B=true would be unsound.
1358        let mut arena = ShapeArena::new();
1359        let a = arena.reserve();
1360        let b = arena.reserve();
1361        let bottom = arena.insert(Shape::Or(Vec::new()));
1362        arena.set(a, Shape::And(vec![b, bottom]));
1363        arena.set(b, Shape::And(vec![a]));
1364
1365        let graph = Graph::new();
1366        let sparql = SparqlExecutor::new(&graph).unwrap();
1367        let node = term("x");
1368
1369        crate::profile::enable();
1370        {
1371            let mut evaluator = ShapeEvaluator::new(&graph, &arena, &sparql);
1372            assert!(!evaluator.holds(&node, a));
1373            assert!(!evaluator.holds(&node, b));
1374        }
1375        let profile = crate::profile::take().unwrap();
1376        let cache = profile.shape_cache();
1377        assert!(cache.recursion_back_edges > 0);
1378        assert!(cache.non_cacheable_results > 0);
1379    }
1380}