Skip to main content

shifty_engine/
validate.rs

1//! Reference shape satisfaction `G, v ⊨ φ` and schema validation `G ⊨ S`
2//! (doc 00 §3–§4, Table 2). This is the conformance *oracle*: the optimized
3//! engines in later layers must agree with it.
4//!
5//! Two evaluators share the logic: [`holds`] returns a bare bool (used for
6//! target selection and counting), while [`explain`] returns the specific
7//! atomic constraints that failed, with the value node and path at which they
8//! failed — enough for per-constraint reporting. The `∀π = ∃≤0 π.¬φ` encoding
9//! lets a failed universal drill straight into the offending value node's inner
10//! constraint.
11
12use crate::frozen::FrozenIndexedDataset;
13use crate::path::{PathBackend, node_of, pred, succ};
14use crate::profile::ShapeCacheSample;
15use crate::sparql::{SparqlExecutor, SparqlViolation};
16use crate::value::{compare_terms, value_type_holds};
17use oxrdf::{Graph, NamedNode, Term};
18use regex::Regex;
19use serde::{Deserialize, Serialize};
20use shifty_algebra::render::{path_to_string, shape_to_string};
21use shifty_algebra::{
22    Path, Schema, Selector, Severity, Shape, ShapeArena, ShapeId, SparqlConstraint,
23};
24use shifty_opt::{FocusSource, PhysicalPlan, analyze};
25use std::cmp::Ordering;
26use std::collections::{BTreeSet, HashMap, HashSet};
27use std::fmt;
28use std::sync::OnceLock;
29
30#[derive(Debug, Clone, Copy)]
31struct EvalResult {
32    holds: bool,
33    cacheable: bool,
34}
35
36#[derive(Default)]
37struct EvalState {
38    memo: HashMap<(ShapeId, Term), bool>,
39    active: HashSet<(ShapeId, Term)>,
40    telemetry: Option<ShapeCacheSample>,
41}
42
43/// Per-graph-snapshot shape evaluator.
44///
45/// Completed checks are shared across statements and focus nodes. Results that
46/// depend on the coinductive recursion back-edge are deliberately not cached:
47/// their provisional `true` may only be valid in the active call context.
48pub(crate) struct ShapeEvaluator<'a> {
49    g: &'a dyn PathBackend,
50    arena: &'a ShapeArena,
51    sparql: &'a SparqlExecutor,
52    state: EvalState,
53}
54
55impl<'a> ShapeEvaluator<'a> {
56    pub(crate) fn new(
57        g: &'a dyn PathBackend,
58        arena: &'a ShapeArena,
59        sparql: &'a SparqlExecutor,
60    ) -> Self {
61        Self {
62            g,
63            arena,
64            sparql,
65            state: EvalState {
66                telemetry: crate::profile::is_enabled().then(ShapeCacheSample::default),
67                ..EvalState::default()
68            },
69        }
70    }
71
72    pub(crate) fn holds(&mut self, node: &Term, id: ShapeId) -> bool {
73        holds_memoized(self.g, self.arena, node, id, self.sparql, &mut self.state).holds
74    }
75
76    pub(crate) fn sparql(&self) -> &SparqlExecutor {
77        self.sparql
78    }
79
80    /// The path-evaluation backend, for sibling folds (e.g. witnessing).
81    pub(crate) fn backend(&self) -> &dyn PathBackend {
82        self.g
83    }
84
85    /// The shape arena, for sibling folds (e.g. witnessing).
86    pub(crate) fn arena(&self) -> &ShapeArena {
87        self.arena
88    }
89}
90
91impl Drop for ShapeEvaluator<'_> {
92    fn drop(&mut self) {
93        let Some(mut sample) = self.state.telemetry else {
94            return;
95        };
96        sample.entries = self.state.memo.len();
97        sample.estimated_bytes = estimated_memo_bytes(&self.state.memo);
98        crate::profile::record_shape_cache(sample);
99    }
100}
101
102/// A single failed atomic constraint.
103#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
104pub struct Reason {
105    /// The node at which the constraint failed (a value node, or the focus).
106    pub value: Term,
107    /// The path from the enclosing focus to `value`, if the failure is
108    /// value-scoped (rendered in `π` notation).
109    pub path: Option<String>,
110    /// The failing constraint's arena slot (cross-references the algebra dump).
111    pub shape: ShapeId,
112    /// `sh:severity` on the source shape, defaulting to `sh:Violation`.
113    #[serde(default)]
114    pub severity: Severity,
115    pub message: String,
116    /// Non-empty when this reason is an `sh:or` group: one entry per OR branch
117    /// that failed, so the caller can tell "fix any one of these."
118    #[serde(default, skip_serializing_if = "Vec::is_empty")]
119    pub sub_reasons: Vec<Reason>,
120}
121
122/// One focus node that failed its statement's shape, with the reasons why.
123#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
124pub struct Violation {
125    pub focus: Term,
126    /// Index of the violated `(selector, shape)` statement in the schema.
127    pub statement: usize,
128    /// The most severe top-level reason in this grouped finding.
129    pub severity: Severity,
130    pub reasons: Vec<Reason>,
131}
132
133/// The outcome of validating a data graph against a schema.
134#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
135pub struct ValidationOutcome {
136    pub conforms: bool,
137    pub violations: Vec<Violation>,
138}
139
140/// Controls which retained findings make a validation outcome non-conforming.
141#[derive(Debug, Clone, PartialEq, Eq)]
142pub struct ValidationOptions {
143    /// Lowest severity that makes `conforms` false. Defaults to `sh:Info`, so
144    /// all findings fail validation as they did before severity was retained.
145    pub minimum_severity: Severity,
146    /// Whether to sort violations by severity, focus node, and statement.
147    /// Defaults to `true` for deterministic output.
148    pub sort_results: bool,
149}
150
151impl Default for ValidationOptions {
152    fn default() -> Self {
153        Self {
154            minimum_severity: Severity::Info,
155            sort_results: true,
156        }
157    }
158}
159
160fn most_severe(reasons: &[Reason]) -> Severity {
161    reasons
162        .iter()
163        .max_by_key(|reason| reason.severity.rank())
164        .map(|reason| reason.severity.clone())
165        .unwrap_or(Severity::Violation)
166}
167
168fn conforms_at_threshold(violations: &[Violation], minimum: &Severity) -> bool {
169    !violations
170        .iter()
171        .flat_map(|violation| &violation.reasons)
172        .any(|reason| reason.severity.meets(minimum))
173}
174
175fn sort_violations(violations: &mut [Violation], enabled: bool) {
176    if enabled {
177        violations.sort_by(|left, right| {
178            right
179                .severity
180                .rank()
181                .cmp(&left.severity.rank())
182                .then_with(|| left.focus.to_string().cmp(&right.focus.to_string()))
183                .then_with(|| left.statement.cmp(&right.statement))
184        });
185    }
186}
187
188/// Which RDF graph(s) validation uses for focus discovery and evaluation.
189#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
190pub enum ValidationGraphMode {
191    /// Focus nodes and evaluation both use only the data graph.
192    Data,
193    /// Focus nodes come from data; paths, class hierarchy, and SPARQL use the
194    /// union of data and shapes. This is the default for split graphs.
195    #[default]
196    Union,
197    /// Focus discovery and evaluation both use the full data/shapes union.
198    UnionAll,
199}
200
201/// The schema is not stratifiable: it recurses through genuine negation, so it
202/// has no defined 2-valued semantics (`docs/03-recursion-semantics.md`). We
203/// diagnose rather than guess. Carries the offending shape components.
204#[derive(Debug, Clone, PartialEq, Eq)]
205pub struct NonStratifiable {
206    pub components: Vec<Vec<ShapeId>>,
207}
208
209impl fmt::Display for NonStratifiable {
210    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
211        write!(f, "non-stratifiable schema (recursion through negation): ")?;
212        for (i, c) in self.components.iter().enumerate() {
213            if i > 0 {
214                write!(f, "; ")?;
215            }
216            let ids: Vec<String> = c.iter().map(|s| format!("@{}", s.0)).collect();
217            write!(f, "{{{}}}", ids.join(" "))?;
218        }
219        Ok(())
220    }
221}
222
223impl std::error::Error for NonStratifiable {}
224
225/// Validate `data` against `schema`.
226///
227/// Honors the decided recursion semantics (`docs/03-recursion-semantics.md`):
228/// the schema must be **stratifiable** (no recursion through net negation), else
229/// we return [`NonStratifiable`]. For a stratifiable schema all recursion is
230/// net-positive (monotone), and [`explain`]/[`holds`]'s "assume conforming on a
231/// back-edge" cycle guard computes exactly the **greatest fixpoint** — the
232/// coinductive validation reading we chose.
233pub fn validate(data: &Graph, schema: &Schema) -> Result<ValidationOutcome, NonStratifiable> {
234    validate_with_options(data, schema, &ValidationOptions::default())
235}
236
237/// Validate `data` against `schema` using an explicit severity policy.
238pub fn validate_with_options(
239    data: &Graph,
240    schema: &Schema,
241    options: &ValidationOptions,
242) -> Result<ValidationOutcome, NonStratifiable> {
243    validate_with_context_and_options(data, data, schema, options)
244}
245
246/// Validate split data and shapes graphs using the selected graph mode.
247pub fn validate_graphs(
248    data: &Graph,
249    shapes: &Graph,
250    schema: &Schema,
251) -> Result<ValidationOutcome, NonStratifiable> {
252    validate_graphs_with_mode_and_options(
253        data,
254        shapes,
255        schema,
256        ValidationGraphMode::default(),
257        &ValidationOptions::default(),
258    )
259}
260
261/// Validate split data and shapes graphs using an explicit graph mode.
262pub fn validate_graphs_with_mode(
263    data: &Graph,
264    shapes: &Graph,
265    schema: &Schema,
266    mode: ValidationGraphMode,
267) -> Result<ValidationOutcome, NonStratifiable> {
268    validate_graphs_with_mode_and_options(data, shapes, schema, mode, &ValidationOptions::default())
269}
270
271/// Validate split graphs using an explicit graph mode and severity policy.
272pub fn validate_graphs_with_mode_and_options(
273    data: &Graph,
274    shapes: &Graph,
275    schema: &Schema,
276    mode: ValidationGraphMode,
277    options: &ValidationOptions,
278) -> Result<ValidationOutcome, NonStratifiable> {
279    match mode {
280        ValidationGraphMode::Data => {
281            let uses_shapes = uses_shapes_graph(&schema.arena);
282            let frozen = if uses_shapes {
283                FrozenIndexedDataset::from_graphs(data, shapes)
284            } else {
285                FrozenIndexedDataset::from_graph(data)
286            };
287            validate_with_frozen(data, schema, frozen, uses_shapes, options)
288        }
289        ValidationGraphMode::Union => {
290            let uses_shapes = uses_shapes_graph(&schema.arena);
291            let frozen = if uses_shapes {
292                FrozenIndexedDataset::from_graph_union_with_shapes(data, shapes)
293            } else {
294                FrozenIndexedDataset::from_graph_union(data, shapes)
295            };
296            validate_with_frozen(data, schema, frozen, uses_shapes, options)
297        }
298        ValidationGraphMode::UnionAll => {
299            let union = graph_union(data, shapes);
300            validate_with_context_and_options(&union, &union, schema, options)
301        }
302    }
303}
304
305/// Validate focus nodes from `data` while evaluating paths, class hierarchy,
306/// and SPARQL against `context`. For split data/shapes inputs, `context` should
307/// be their RDF union.
308pub fn validate_with_context(
309    data: &Graph,
310    context: &Graph,
311    schema: &Schema,
312) -> Result<ValidationOutcome, NonStratifiable> {
313    validate_with_context_and_options(data, context, schema, &ValidationOptions::default())
314}
315
316/// Validate with separate focus/context graphs and an explicit severity policy.
317pub fn validate_with_context_and_options(
318    data: &Graph,
319    context: &Graph,
320    schema: &Schema,
321    options: &ValidationOptions,
322) -> Result<ValidationOutcome, NonStratifiable> {
323    let uses_shapes = uses_shapes_graph(&schema.arena);
324    let frozen = if uses_shapes {
325        FrozenIndexedDataset::from_graphs(context, context)
326    } else {
327        FrozenIndexedDataset::from_graph(context)
328    };
329    validate_with_frozen(data, schema, frozen, uses_shapes, options)
330}
331
332fn validate_with_frozen(
333    data: &Graph,
334    schema: &Schema,
335    frozen: FrozenIndexedDataset,
336    has_shapes_graph: bool,
337    options: &ValidationOptions,
338) -> Result<ValidationOutcome, NonStratifiable> {
339    let strat = analyze(&schema.arena);
340    if !strat.stratifiable {
341        let components = strat
342            .strata
343            .iter()
344            .filter(|s| !s.stratifiable)
345            .map(|s| s.shapes.clone())
346            .collect();
347        return Err(NonStratifiable { components });
348    }
349
350    let sparql = SparqlExecutor::from_frozen(frozen, has_shapes_graph);
351    let backend = sparql
352        .frozen()
353        .expect("validation executor always has a frozen dataset");
354    let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
355    let mut violations = Vec::new();
356    for (i, st) in schema.statements.iter().enumerate() {
357        let label = schema
358            .names
359            .get(&st.shape)
360            .cloned()
361            .unwrap_or_else(|| format!("@{}", st.shape.0));
362        let foci = focus_nodes_with_evaluator(data, &st.selector, &mut evaluator);
363        prefetch_sparql_constraints(&schema.arena, st.shape, &foci, &sparql);
364        for v in foci {
365            let t = std::time::Instant::now();
366            let mut stack = HashSet::new();
367            let mut reasons = explain(
368                &mut evaluator,
369                &v,
370                st.shape,
371                None,
372                &Severity::Violation,
373                &mut stack,
374            );
375            crate::profile::record_shape(&label, t.elapsed().as_micros() as u64);
376            dedup_reasons(&mut reasons);
377            if !reasons.is_empty() {
378                let severity = most_severe(&reasons);
379                violations.push(Violation {
380                    focus: v,
381                    statement: i,
382                    severity,
383                    reasons,
384                });
385            }
386        }
387    }
388    sort_violations(&mut violations, options.sort_results);
389    Ok(ValidationOutcome {
390        conforms: conforms_at_threshold(&violations, &options.minimum_severity),
391        violations,
392    })
393}
394
395/// Whether any `sh:sparql` constraint references `$shapesGraph`, requiring the
396/// shapes graph to be mirrored into a named graph for evaluation.
397pub(crate) fn uses_shapes_graph(arena: &ShapeArena) -> bool {
398    (0..arena.len()).any(|i| {
399        matches!(arena.get(ShapeId(i as u32)), Shape::Sparql(c) if c.query.contains("shapesGraph"))
400    })
401}
402
403pub(crate) fn graph_union(left: &Graph, right: &Graph) -> Graph {
404    let mut union = left.clone();
405    for triple in right.iter() {
406        union.insert(triple);
407    }
408    union
409}
410
411/// Validate using a [`PhysicalPlan`] (Layer 5): focus nodes come from compiled
412/// [`FocusSource`]s (so class targets seed backward from the constant instead of
413/// scanning every node) and checks run over the plan's cost-ordered arena. The
414/// result is identical to [`validate`] on the same schema — the W3C harness
415/// cross-checks this.
416pub fn validate_plan(
417    data: &Graph,
418    plan: &PhysicalPlan,
419) -> Result<ValidationOutcome, NonStratifiable> {
420    validate_plan_with_options(data, plan, &ValidationOptions::default())
421}
422
423/// Validate a physical plan using an explicit severity policy.
424pub fn validate_plan_with_options(
425    data: &Graph,
426    plan: &PhysicalPlan,
427    options: &ValidationOptions,
428) -> Result<ValidationOutcome, NonStratifiable> {
429    validate_plan_with_context_and_options(data, data, plan, options)
430}
431
432/// Validate a physical plan over split graphs using the default graph mode.
433pub fn validate_plan_graphs(
434    data: &Graph,
435    shapes: &Graph,
436    plan: &PhysicalPlan,
437) -> Result<ValidationOutcome, NonStratifiable> {
438    validate_plan_graphs_with_mode_and_options(
439        data,
440        shapes,
441        plan,
442        ValidationGraphMode::default(),
443        &ValidationOptions::default(),
444    )
445}
446
447/// Validate a physical plan over split graphs using an explicit graph mode.
448pub fn validate_plan_graphs_with_mode(
449    data: &Graph,
450    shapes: &Graph,
451    plan: &PhysicalPlan,
452    mode: ValidationGraphMode,
453) -> Result<ValidationOutcome, NonStratifiable> {
454    validate_plan_graphs_with_mode_and_options(
455        data,
456        shapes,
457        plan,
458        mode,
459        &ValidationOptions::default(),
460    )
461}
462
463/// Validate a physical plan over split graphs with an explicit severity policy.
464pub fn validate_plan_graphs_with_mode_and_options(
465    data: &Graph,
466    shapes: &Graph,
467    plan: &PhysicalPlan,
468    mode: ValidationGraphMode,
469    options: &ValidationOptions,
470) -> Result<ValidationOutcome, NonStratifiable> {
471    match mode {
472        ValidationGraphMode::Data => {
473            let uses_shapes = uses_shapes_graph(&plan.arena);
474            let frozen = if uses_shapes {
475                FrozenIndexedDataset::from_graphs(data, shapes)
476            } else {
477                FrozenIndexedDataset::from_graph(data)
478            };
479            validate_plan_with_frozen(data, plan, frozen, uses_shapes, options)
480        }
481        ValidationGraphMode::Union => {
482            let uses_shapes = uses_shapes_graph(&plan.arena);
483            let frozen = if uses_shapes {
484                FrozenIndexedDataset::from_graph_union_with_shapes(data, shapes)
485            } else {
486                FrozenIndexedDataset::from_graph_union(data, shapes)
487            };
488            validate_plan_with_frozen(data, plan, frozen, uses_shapes, options)
489        }
490        ValidationGraphMode::UnionAll => {
491            let union = graph_union(data, shapes);
492            validate_plan_with_context_and_options(&union, &union, plan, options)
493        }
494    }
495}
496
497/// Validate plan focus nodes from `data` against the supplied execution context.
498pub fn validate_plan_with_context(
499    data: &Graph,
500    context: &Graph,
501    plan: &PhysicalPlan,
502) -> Result<ValidationOutcome, NonStratifiable> {
503    validate_plan_with_context_and_options(data, context, plan, &ValidationOptions::default())
504}
505
506/// Validate plan focus nodes against a context with a severity policy.
507pub fn validate_plan_with_context_and_options(
508    data: &Graph,
509    context: &Graph,
510    plan: &PhysicalPlan,
511    options: &ValidationOptions,
512) -> Result<ValidationOutcome, NonStratifiable> {
513    let uses_shapes = uses_shapes_graph(&plan.arena);
514    let frozen = if uses_shapes {
515        FrozenIndexedDataset::from_graphs(context, context)
516    } else {
517        FrozenIndexedDataset::from_graph(context)
518    };
519    validate_plan_with_frozen(data, plan, frozen, uses_shapes, options)
520}
521
522fn validate_plan_with_frozen(
523    data: &Graph,
524    plan: &PhysicalPlan,
525    frozen: FrozenIndexedDataset,
526    has_shapes_graph: bool,
527    options: &ValidationOptions,
528) -> Result<ValidationOutcome, NonStratifiable> {
529    let strat = analyze(&plan.arena);
530    if !strat.stratifiable {
531        let components = strat
532            .strata
533            .iter()
534            .filter(|s| !s.stratifiable)
535            .map(|s| s.shapes.clone())
536            .collect();
537        return Err(NonStratifiable { components });
538    }
539
540    let sparql = SparqlExecutor::from_frozen(frozen, has_shapes_graph);
541    let backend = sparql
542        .frozen()
543        .expect("validation executor always has a frozen dataset");
544    let mut evaluator = ShapeEvaluator::new(backend, &plan.arena, &sparql);
545    let mut violations = Vec::new();
546    for (i, sp) in plan.statements.iter().enumerate() {
547        let label = plan
548            .names
549            .get(&sp.shape)
550            .cloned()
551            .unwrap_or_else(|| format!("@{}", sp.shape.0));
552        let foci = focus_for_source(data, &sp.source, &mut evaluator);
553        prefetch_sparql_constraints(&plan.arena, sp.shape, &foci, &sparql);
554        for v in foci {
555            let t = std::time::Instant::now();
556            let mut stack = HashSet::new();
557            let mut reasons = explain(
558                &mut evaluator,
559                &v,
560                sp.shape,
561                None,
562                &Severity::Violation,
563                &mut stack,
564            );
565            crate::profile::record_shape(&label, t.elapsed().as_micros() as u64);
566            dedup_reasons(&mut reasons);
567            if !reasons.is_empty() {
568                let severity = most_severe(&reasons);
569                violations.push(Violation {
570                    focus: v,
571                    statement: i,
572                    severity,
573                    reasons,
574                });
575            }
576        }
577    }
578    sort_violations(&mut violations, options.sort_results);
579    Ok(ValidationOutcome {
580        conforms: conforms_at_threshold(&violations, &options.minimum_severity),
581        violations,
582    })
583}
584
585/// Focus nodes for a compiled [`FocusSource`].
586fn focus_for_source(
587    data: &Graph,
588    source: &FocusSource,
589    evaluator: &mut ShapeEvaluator<'_>,
590) -> Vec<Term> {
591    match source {
592        FocusSource::SubjectsOf(p) => subjects_of(data, p),
593        FocusSource::ObjectsOf(p) => objects_of(data, p),
594        FocusSource::Node(c) => vec![c.clone()],
595        // the optimization: seed backward from the constant, no full scan
596        FocusSource::PathToConst { path, target } => pred(evaluator.g, target, path)
597            .into_iter()
598            .filter(|node| graph_contains_term(data, node))
599            .collect(),
600        FocusSource::ScanFilter { path, qualifier } => all_nodes(data)
601            .into_iter()
602            .filter(|v| {
603                succ(evaluator.g, v, path)
604                    .iter()
605                    .any(|u| evaluator.holds(u, *qualifier))
606            })
607            .collect(),
608        FocusSource::Sparql(target) => {
609            let candidates = all_nodes(data);
610            evaluator
611                .sparql
612                .target_nodes(&target.query)
613                .unwrap_or_default()
614                .into_iter()
615                .filter(|node| candidates.contains(node))
616                .collect()
617        }
618    }
619}
620
621/// The focus nodes selected by a selector.
622pub fn focus_nodes(data: &Graph, sel: &Selector, arena: &ShapeArena) -> Vec<Term> {
623    let sparql =
624        SparqlExecutor::new(data).expect("building an in-memory Oxigraph store should succeed");
625    let mut evaluator = ShapeEvaluator::new(data, arena, &sparql);
626    focus_nodes_with_evaluator(data, sel, &mut evaluator)
627}
628
629pub(crate) fn focus_nodes_with(
630    data: &Graph,
631    backend: &dyn PathBackend,
632    sel: &Selector,
633    arena: &ShapeArena,
634    sparql: &SparqlExecutor,
635) -> Vec<Term> {
636    let mut evaluator = ShapeEvaluator::new(backend, arena, sparql);
637    focus_nodes_with_evaluator(data, sel, &mut evaluator)
638}
639
640fn focus_nodes_with_evaluator(
641    data: &Graph,
642    sel: &Selector,
643    evaluator: &mut ShapeEvaluator<'_>,
644) -> Vec<Term> {
645    match sel {
646        Selector::HasOut(q) => subjects_of(data, q),
647        Selector::HasIn(q) => objects_of(data, q),
648        Selector::IsConst(c) => vec![c.clone()],
649        Selector::HasPath(path, qual) => match evaluator.arena.get(*qual) {
650            // Class targets are lowered to
651            // rdf:type/rdfs:subClassOf* ending at a constant. Searching
652            // backward from that constant avoids traversing the hierarchy once
653            // for every node in the data graph.
654            Shape::TestConst(target) => pred(evaluator.g, target, path)
655                .into_iter()
656                .filter(|node| graph_contains_term(data, node))
657                .collect(),
658            _ => all_nodes(data)
659                .into_iter()
660                .filter(|v| {
661                    succ(evaluator.g, v, path)
662                        .iter()
663                        .any(|u| evaluator.holds(u, *qual))
664                })
665                .collect(),
666        },
667        Selector::Sparql(target) => {
668            let candidates = all_nodes(data);
669            evaluator
670                .sparql
671                .target_nodes(&target.query)
672                .unwrap_or_default()
673                .into_iter()
674                .filter(|node| candidates.contains(node))
675                .collect()
676        }
677    }
678}
679
680fn holds_memoized(
681    g: &dyn PathBackend,
682    arena: &ShapeArena,
683    v: &Term,
684    id: ShapeId,
685    sparql: &SparqlExecutor,
686    state: &mut EvalState,
687) -> EvalResult {
688    let key = (id, v.clone());
689    if let Some(&holds) = state.memo.get(&key) {
690        if let Some(telemetry) = state.telemetry.as_mut() {
691            telemetry.hits += 1;
692        }
693        return EvalResult {
694            holds,
695            cacheable: true,
696        };
697    }
698    if let Some(telemetry) = state.telemetry.as_mut() {
699        telemetry.misses += 1;
700    }
701    if !state.active.insert(key.clone()) {
702        if let Some(telemetry) = state.telemetry.as_mut() {
703            telemetry.recursion_back_edges += 1;
704        }
705        return EvalResult {
706            holds: true,
707            cacheable: false,
708        }; // back-edge ⇒ assume conforming: the gfp choice (doc 03)
709    }
710    let result = match arena.get(id) {
711        Shape::Annotated { shape, .. } => holds_memoized(g, arena, v, *shape, sparql, state),
712        Shape::Top | Shape::Pending => EvalResult {
713            holds: true,
714            cacheable: true,
715        },
716        Shape::Sparql(constraint) => EvalResult {
717            holds: sparql
718                .constraint_violations(constraint, v)
719                .is_ok_and(|violations| violations.is_empty()),
720            cacheable: true,
721        },
722        Shape::TestConst(c) => EvalResult {
723            holds: v == c,
724            cacheable: true,
725        },
726        Shape::TestType(t) => EvalResult {
727            holds: value_type_holds(t, v),
728            cacheable: true,
729        },
730        Shape::TestKind(k) => EvalResult {
731            holds: k.matches(v),
732            cacheable: true,
733        },
734        Shape::Closed(q) => EvalResult {
735            holds: closed_offenders(g, v, q).is_empty(),
736            cacheable: true,
737        },
738        Shape::Eq(path, p) => EvalResult {
739            holds: succ(g, v, path) == objects(g, v, p),
740            cacheable: true,
741        },
742        Shape::Disj(path, p) => EvalResult {
743            holds: succ(g, v, path).is_disjoint(&objects(g, v, p)),
744            cacheable: true,
745        },
746        Shape::Lt(path, p) => EvalResult {
747            holds: all_pairs_ordered(g, v, path, p, false),
748            cacheable: true,
749        },
750        Shape::Le(path, p) => EvalResult {
751            holds: all_pairs_ordered(g, v, path, p, true),
752            cacheable: true,
753        },
754        Shape::UniqueLang(path) => EvalResult {
755            holds: unique_lang(&succ(g, v, path)),
756            cacheable: true,
757        },
758        Shape::Not(c) => {
759            let child = holds_memoized(g, arena, v, *c, sparql, state);
760            EvalResult {
761                holds: !child.holds,
762                cacheable: child.cacheable,
763            }
764        }
765        Shape::And(cs) => {
766            let mut result = EvalResult {
767                holds: true,
768                cacheable: true,
769            };
770            for child in cs {
771                let child = holds_memoized(g, arena, v, *child, sparql, state);
772                result.cacheable &= child.cacheable;
773                if !child.holds {
774                    result.holds = false;
775                    break;
776                }
777            }
778            result
779        }
780        Shape::Or(cs) => {
781            let mut result = EvalResult {
782                holds: false,
783                cacheable: true,
784            };
785            for child in cs {
786                let child = holds_memoized(g, arena, v, *child, sparql, state);
787                result.cacheable &= child.cacheable;
788                if child.holds {
789                    result.holds = true;
790                    break;
791                }
792            }
793            result
794        }
795        Shape::Count {
796            path,
797            min,
798            max,
799            qualifier,
800        } => {
801            let mut n = 0;
802            let mut cacheable = true;
803            for value in succ(g, v, path) {
804                let qualified = holds_memoized(g, arena, &value, *qualifier, sparql, state);
805                cacheable &= qualified.cacheable;
806                n += u64::from(qualified.holds);
807            }
808            EvalResult {
809                holds: min.is_none_or(|m| n >= m) && max.is_none_or(|m| n <= m),
810                cacheable,
811            }
812        }
813    };
814    state.active.remove(&key);
815    if result.cacheable {
816        state.memo.insert(key, result.holds);
817        if let Some(telemetry) = state.telemetry.as_mut() {
818            telemetry.insertions += 1;
819        }
820    } else if let Some(telemetry) = state.telemetry.as_mut() {
821        telemetry.non_cacheable_results += 1;
822    }
823    result
824}
825
826fn estimated_memo_bytes(memo: &HashMap<(ShapeId, Term), bool>) -> usize {
827    const CONTROL_BYTE_ESTIMATE: usize = 1;
828    let bucket_bytes =
829        memo.capacity() * (std::mem::size_of::<((ShapeId, Term), bool)>() + CONTROL_BYTE_ESTIMATE);
830    bucket_bytes
831        + memo
832            .keys()
833            .map(|(_, term)| estimated_term_heap_bytes(term))
834            .sum::<usize>()
835}
836
837fn estimated_term_heap_bytes(term: &Term) -> usize {
838    match term {
839        Term::NamedNode(node) => node.as_str().len(),
840        Term::BlankNode(node) => node.as_str().len(),
841        Term::Literal(literal) => {
842            literal.value().len()
843                + literal.language().map_or_else(
844                    || {
845                        let datatype = literal.datatype();
846                        if datatype.as_str() == "http://www.w3.org/2001/XMLSchema#string" {
847                            0
848                        } else {
849                            datatype.as_str().len()
850                        }
851                    },
852                    str::len,
853                )
854        }
855    }
856}
857
858/// Batch-evaluate the SPARQL constraints reachable from `root` at the focus set
859/// before the per-node walk, so their fallback queries run once for the whole
860/// focus set instead of once per focus (doc §189). Only constraints reached through
861/// focus-preserving operators (`∧`/`∨`/`¬`) are evaluated at `foci`; constraints
862/// under a `Count` path apply to value nodes, not the statement focus, so they
863/// are skipped here and fall back to per-focus execution. Prefetching is a pure
864/// memo (constraint violations depend only on focus + immutable dataset), so it
865/// is sound regardless of the operator context the constraint is reached in.
866fn prefetch_sparql_constraints(
867    arena: &ShapeArena,
868    root: ShapeId,
869    foci: &[Term],
870    sparql: &SparqlExecutor,
871) {
872    if foci.len() < 2 {
873        return;
874    }
875    let mut constraints = Vec::new();
876    let mut seen = HashSet::new();
877    collect_focus_sparql(arena, root, &mut seen, &mut constraints);
878    for constraint in constraints {
879        let _ = sparql.prefetch_constraint(constraint, foci);
880    }
881}
882
883fn collect_focus_sparql<'a>(
884    arena: &'a ShapeArena,
885    id: ShapeId,
886    seen: &mut HashSet<ShapeId>,
887    out: &mut Vec<&'a SparqlConstraint>,
888) {
889    if !seen.insert(id) {
890        return; // cyclic (recursive) shape: stop at the back-edge
891    }
892    match arena.get(id) {
893        Shape::Annotated { shape, .. } => collect_focus_sparql(arena, *shape, seen, out),
894        Shape::Sparql(constraint) => out.push(constraint),
895        Shape::Not(inner) => collect_focus_sparql(arena, *inner, seen, out),
896        Shape::And(ids) | Shape::Or(ids) => {
897            for &child in ids {
898                collect_focus_sparql(arena, child, seen, out);
899            }
900        }
901        // `Count` crosses a path (different focus); all other variants are leaves.
902        _ => {}
903    }
904}
905
906/// The reasons `φ` (slot `id`) fails at `node`. Empty iff it holds. `path_ctx`
907/// is the rendered path by which `node` was reached from the enclosing focus.
908fn explain(
909    evaluator: &mut ShapeEvaluator<'_>,
910    node: &Term,
911    id: ShapeId,
912    path_ctx: Option<&str>,
913    severity: &Severity,
914    stack: &mut HashSet<(ShapeId, Term)>,
915) -> Vec<Reason> {
916    let key = (id, node.clone());
917    if !stack.insert(key.clone()) {
918        return Vec::new(); // back-edge ⇒ assume conforming (gfp, doc 03)
919    }
920    if evaluator.holds(node, id) {
921        stack.remove(&key);
922        return Vec::new();
923    }
924    let reasons = match evaluator.arena.get(id).clone() {
925        Shape::Annotated {
926            severity: source_severity,
927            shape,
928        } => explain(evaluator, node, shape, path_ctx, &source_severity, stack),
929        Shape::Top | Shape::Pending => Vec::new(),
930        Shape::Sparql(constraint) => {
931            match evaluator.sparql.constraint_violations(&constraint, node) {
932                Ok(violations) => violations
933                    .into_iter()
934                    .map(|violation| {
935                        // Compute the message before the value/path fields are moved
936                        // out of `violation`.
937                        let message = sparql_violation_message(&violation, &constraint, node);
938                        Reason {
939                            value: violation.value.unwrap_or_else(|| node.clone()),
940                            path: violation
941                                .path
942                                .map(|path| path.to_string())
943                                .or_else(|| path_ctx.map(str::to_string))
944                                .or_else(|| constraint.path.as_ref().map(path_to_string)),
945                            message,
946                            shape: id,
947                            severity: severity.clone(),
948                            sub_reasons: Vec::new(),
949                        }
950                    })
951                    .collect(),
952                Err(error) => vec![Reason {
953                    value: node.clone(),
954                    path: path_ctx.map(str::to_string),
955                    shape: id,
956                    severity: severity.clone(),
957                    message: format!("SPARQL constraint evaluation failed: {error}"),
958                    sub_reasons: Vec::new(),
959                }],
960            }
961        }
962        Shape::TestConst(_)
963        | Shape::TestType(_)
964        | Shape::TestKind(_)
965        | Shape::Eq(..)
966        | Shape::Disj(..)
967        | Shape::Lt(..)
968        | Shape::Le(..)
969        | Shape::UniqueLang(_) => leaf(
970            evaluator.holds(node, id),
971            node,
972            id,
973            path_ctx,
974            severity,
975            format!("{} not satisfied", shape_to_string(evaluator.arena, id)),
976        ),
977        Shape::Closed(q) => {
978            let bad = closed_offenders(evaluator.g, node, &q);
979            if bad.is_empty() {
980                Vec::new()
981            } else {
982                let preds: Vec<String> = bad.iter().map(|p| p.to_string()).collect();
983                vec![Reason {
984                    value: node.clone(),
985                    path: path_ctx.map(str::to_string),
986                    shape: id,
987                    severity: severity.clone(),
988                    message: format!("closed: unexpected predicate(s) {}", preds.join(", ")),
989                    sub_reasons: Vec::new(),
990                }]
991            }
992        }
993        Shape::Not(c) => {
994            if explain(evaluator, node, c, path_ctx, severity, stack).is_empty() {
995                vec![Reason {
996                    value: node.clone(),
997                    path: path_ctx.map(str::to_string),
998                    shape: id,
999                    severity: severity.clone(),
1000                    message: "negated shape unexpectedly held".to_string(),
1001                    sub_reasons: Vec::new(),
1002                }]
1003            } else {
1004                Vec::new()
1005            }
1006        }
1007        Shape::And(cs) => cs
1008            .iter()
1009            .flat_map(|c| explain(evaluator, node, *c, path_ctx, severity, stack))
1010            .collect(),
1011        Shape::Or(cs) => {
1012            let mut sub_reasons = Vec::new();
1013            let mut satisfied = false;
1014            for c in &cs {
1015                let sub = explain(evaluator, node, *c, path_ctx, severity, stack);
1016                if sub.is_empty() {
1017                    satisfied = true;
1018                    break;
1019                }
1020                sub_reasons.extend(sub);
1021            }
1022            if satisfied {
1023                Vec::new()
1024            } else {
1025                vec![Reason {
1026                    value: node.clone(),
1027                    path: path_ctx.map(str::to_string),
1028                    shape: id,
1029                    severity: severity.clone(),
1030                    message: format!("none of {} alternative(s) satisfied", cs.len()),
1031                    sub_reasons,
1032                }]
1033            }
1034        }
1035        Shape::Count {
1036            path,
1037            min,
1038            max,
1039            qualifier,
1040        } => explain_count(
1041            evaluator, node, id, &path, min, max, qualifier, severity, stack,
1042        ),
1043    };
1044    stack.remove(&key);
1045    reasons
1046}
1047
1048#[allow(clippy::too_many_arguments)]
1049fn explain_count(
1050    evaluator: &mut ShapeEvaluator<'_>,
1051    node: &Term,
1052    id: ShapeId,
1053    path: &Path,
1054    min: Option<u64>,
1055    max: Option<u64>,
1056    qualifier: ShapeId,
1057    severity: &Severity,
1058    stack: &mut HashSet<(ShapeId, Term)>,
1059) -> Vec<Reason> {
1060    let path_str = path_to_string(path);
1061    let matched: Vec<Term> = succ(evaluator.g, node, path)
1062        .into_iter()
1063        .filter(|u| evaluator.holds(u, qualifier))
1064        .collect();
1065    let n = matched.len() as u64;
1066    let mut reasons = Vec::new();
1067
1068    if let Some(mx) = max
1069        && n > mx
1070    {
1071        match evaluator.arena.get(qualifier).clone() {
1072            // ∀path.inner encoded as ∃≤0 path.¬inner: drill into the offenders.
1073            Shape::Not(inner) if mx == 0 => {
1074                for u in &matched {
1075                    reasons.extend(explain(
1076                        evaluator,
1077                        u,
1078                        inner,
1079                        Some(&path_str),
1080                        severity,
1081                        stack,
1082                    ));
1083                }
1084            }
1085            _ => reasons.push(Reason {
1086                value: node.clone(),
1087                path: Some(path_str.clone()),
1088                shape: id,
1089                severity: severity.clone(),
1090                message: format!("at most {mx} value(s) may match along {path_str}, found {n}"),
1091                sub_reasons: Vec::new(),
1092            }),
1093        }
1094    }
1095
1096    if let Some(mn) = min
1097        && n < mn
1098    {
1099        reasons.push(Reason {
1100            value: node.clone(),
1101            path: Some(path_str.clone()),
1102            shape: id,
1103            severity: severity.clone(),
1104            message: format!("at least {mn} value(s) required along {path_str}, found {n}"),
1105            sub_reasons: Vec::new(),
1106        });
1107    }
1108
1109    reasons
1110}
1111
1112/// The message for a `sh:sparql` violation, by SHACL §5.2.1 precedence: the
1113/// result's own `?message` binding, then the constraint's (or shape's)
1114/// `sh:message` (with `{$this}`/`{?var}` substitution), then a constructed
1115/// description naming the shape and value.
1116fn sparql_violation_message(
1117    violation: &SparqlViolation,
1118    constraint: &SparqlConstraint,
1119    node: &Term,
1120) -> String {
1121    if let Some(message) = &violation.message {
1122        return term_text(message);
1123    }
1124    if !constraint.messages.is_empty() {
1125        return constraint
1126            .messages
1127            .iter()
1128            .map(|m| apply_message_template(&term_text(m), node, &violation.bindings))
1129            .collect::<Vec<_>>()
1130            .join("; ");
1131    }
1132    let mut message = match &constraint.shape {
1133        Some(shape) => format!("SPARQL constraint at {shape} not satisfied"),
1134        None => "SPARQL constraint not satisfied".to_string(),
1135    };
1136    if let Some(value) = &violation.value {
1137        message.push_str(&format!(" (value: {value})"));
1138    }
1139    message
1140}
1141
1142/// Substitute `{$varName}` / `{?varName}` placeholders in a message template.
1143///
1144/// `$this` resolves to `focus`; all other names are looked up in `bindings`
1145/// (keyed without the `$`/`?` sigil). Unresolved placeholders are left as-is.
1146pub(crate) fn apply_message_template(
1147    template: &str,
1148    focus: &Term,
1149    bindings: &HashMap<String, Term>,
1150) -> String {
1151    static RE: OnceLock<Regex> = OnceLock::new();
1152    let re = RE
1153        .get_or_init(|| Regex::new(r"\{(\$[A-Za-z_]\w*|\?[A-Za-z_]\w*)\}").expect("static regex"));
1154    re.replace_all(template, |caps: &regex::Captures| {
1155        let placeholder = &caps[1];
1156        let name = &placeholder[1..]; // strip leading `$` or `?`
1157        let term = if name == "this" {
1158            Some(focus)
1159        } else {
1160            bindings.get(name)
1161        };
1162        term.map(|t| match t {
1163            Term::NamedNode(n) => format!("<{}>", n.as_str()),
1164            Term::BlankNode(b) => format!("_:{}", b.as_str()),
1165            Term::Literal(l) => l.value().to_string(),
1166        })
1167        .unwrap_or_else(|| placeholder.to_string())
1168    })
1169    .to_string()
1170}
1171
1172/// A term's human-facing text: a literal's lexical value, otherwise its RDF
1173/// rendering (`<iri>` / `_:id`).
1174fn term_text(term: &Term) -> String {
1175    match term {
1176        Term::Literal(literal) => literal.value().to_string(),
1177        other => other.to_string(),
1178    }
1179}
1180
1181fn leaf(
1182    ok: bool,
1183    node: &Term,
1184    id: ShapeId,
1185    path_ctx: Option<&str>,
1186    severity: &Severity,
1187    message: String,
1188) -> Vec<Reason> {
1189    if ok {
1190        Vec::new()
1191    } else {
1192        vec![Reason {
1193            value: node.clone(),
1194            path: path_ctx.map(str::to_string),
1195            shape: id,
1196            severity: severity.clone(),
1197            message,
1198            sub_reasons: Vec::new(),
1199        }]
1200    }
1201}
1202
1203fn all_pairs_ordered(
1204    g: &dyn PathBackend,
1205    v: &Term,
1206    path: &Path,
1207    p: &NamedNode,
1208    allow_eq: bool,
1209) -> bool {
1210    let lhs = succ(g, v, path);
1211    let rhs = objects(g, v, p);
1212    for a in &lhs {
1213        for b in &rhs {
1214            match compare_terms(a, b) {
1215                Some(Ordering::Less) => {}
1216                Some(Ordering::Equal) if allow_eq => {}
1217                _ => return false,
1218            }
1219        }
1220    }
1221    true
1222}
1223
1224fn objects(g: &dyn PathBackend, v: &Term, p: &NamedNode) -> HashSet<Term> {
1225    succ(g, v, &Path::Pred(p.clone()))
1226}
1227
1228/// Predicates on `node` not allowed by a closed shape's set `q`.
1229fn closed_offenders(
1230    g: &dyn PathBackend,
1231    node: &Term,
1232    q: &BTreeSet<NamedNode>,
1233) -> BTreeSet<NamedNode> {
1234    g.out_predicates(node)
1235        .into_iter()
1236        .filter(|p| !q.contains(p))
1237        .collect()
1238}
1239
1240fn unique_lang(values: &HashSet<Term>) -> bool {
1241    let mut seen = HashSet::new();
1242    for term in values {
1243        if let Term::Literal(l) = term
1244            && let Some(lang) = l.language()
1245            && !seen.insert(lang.to_ascii_lowercase())
1246        {
1247            return false;
1248        }
1249    }
1250    true
1251}
1252
1253fn dedup_reasons(reasons: &mut Vec<Reason>) {
1254    let mut seen = HashSet::new();
1255    reasons.retain(|r| {
1256        seen.insert((
1257            r.value.to_string(),
1258            r.message.clone(),
1259            r.severity.as_str().to_string(),
1260        ))
1261    });
1262}
1263
1264fn subject_term(s: oxrdf::NamedOrBlankNodeRef) -> Term {
1265    crate::path::term_of(s.into_owned())
1266}
1267
1268/// Distinct subjects of triples with predicate `p`.
1269fn subjects_of(data: &Graph, p: &NamedNode) -> Vec<Term> {
1270    let mut seen = HashSet::new();
1271    data.triples_for_predicate(p.as_ref())
1272        .filter_map(|t| {
1273            let term = subject_term(t.subject);
1274            seen.insert(term.clone()).then_some(term)
1275        })
1276        .collect()
1277}
1278
1279/// Distinct objects of triples with predicate `p`.
1280fn objects_of(data: &Graph, p: &NamedNode) -> Vec<Term> {
1281    let mut seen = HashSet::new();
1282    data.triples_for_predicate(p.as_ref())
1283        .filter_map(|t| {
1284            let term = t.object.into_owned();
1285            seen.insert(term.clone()).then_some(term)
1286        })
1287        .collect()
1288}
1289
1290/// All distinct terms appearing as a subject or object in the graph.
1291fn all_nodes(g: &Graph) -> HashSet<Term> {
1292    let mut nodes = HashSet::new();
1293    for t in g.iter() {
1294        nodes.insert(subject_term(t.subject));
1295        nodes.insert(t.object.into_owned());
1296    }
1297    nodes
1298}
1299
1300/// Whether `term` appears in the graph's node domain.
1301fn graph_contains_term(g: &Graph, term: &Term) -> bool {
1302    node_of(term).is_some_and(|node| g.triples_for_subject(&node).next().is_some())
1303        || g.triples_for_object(term).next().is_some()
1304}
1305
1306#[cfg(test)]
1307mod tests {
1308    use super::*;
1309    use oxrdf::{NamedNode, Triple};
1310
1311    fn iri(local: &str) -> NamedNode {
1312        NamedNode::new(format!("http://ex/{local}")).unwrap()
1313    }
1314
1315    fn term(local: &str) -> Term {
1316        Term::NamedNode(iri(local))
1317    }
1318
1319    #[test]
1320    fn memoizes_shared_value_checks_across_focus_nodes() {
1321        let p = iri("p");
1322        let shared = term("shared");
1323        let mut graph = Graph::new();
1324        graph.insert(&Triple::new(iri("a"), p.clone(), shared.clone()));
1325        graph.insert(&Triple::new(iri("b"), p.clone(), shared.clone()));
1326
1327        let mut arena = ShapeArena::new();
1328        let qualifier = arena.insert(Shape::TestConst(shared));
1329        let root = arena.insert(Shape::Count {
1330            path: Path::Pred(p),
1331            min: Some(1),
1332            max: None,
1333            qualifier,
1334        });
1335        let sparql = SparqlExecutor::new(&graph).unwrap();
1336        crate::profile::enable();
1337        {
1338            let mut evaluator = ShapeEvaluator::new(&graph, &arena, &sparql);
1339            assert!(evaluator.holds(&term("a"), root));
1340            assert!(evaluator.holds(&term("b"), root));
1341        }
1342        let profile = crate::profile::take().unwrap();
1343        let cache = profile.shape_cache();
1344        assert_eq!(cache.evaluators, 1);
1345        assert!(cache.hits >= 1, "shared qualifier should hit the cache");
1346        assert_eq!(cache.peak_entries, 3);
1347        assert!(cache.estimated_peak_bytes > 0);
1348    }
1349
1350    #[test]
1351    fn does_not_cache_cycle_dependent_results() {
1352        // A := B ∧ false; B := A. While evaluating A, the B result is
1353        // provisionally true through the A back-edge, but the gfp solution is
1354        // A=false, B=false. Caching that provisional B=true would be unsound.
1355        let mut arena = ShapeArena::new();
1356        let a = arena.reserve();
1357        let b = arena.reserve();
1358        let bottom = arena.insert(Shape::Or(Vec::new()));
1359        arena.set(a, Shape::And(vec![b, bottom]));
1360        arena.set(b, Shape::And(vec![a]));
1361
1362        let graph = Graph::new();
1363        let sparql = SparqlExecutor::new(&graph).unwrap();
1364        let node = term("x");
1365
1366        crate::profile::enable();
1367        {
1368            let mut evaluator = ShapeEvaluator::new(&graph, &arena, &sparql);
1369            assert!(!evaluator.holds(&node, a));
1370            assert!(!evaluator.holds(&node, b));
1371        }
1372        let profile = crate::profile::take().unwrap();
1373        let cache = profile.shape_cache();
1374        assert!(cache.recursion_back_edges > 0);
1375        assert!(cache.non_cacheable_results > 0);
1376    }
1377}