Skip to main content

shifty_engine/
validate.rs

1//! Reference shape satisfaction `G, v ⊨ φ` and schema validation `G ⊨ S`
2//! (doc 00 §3–§4, Table 2). This is the conformance *oracle*: the optimized
3//! engines in later layers must agree with it.
4//!
5//! Two evaluators share the logic: [`holds`] returns a bare bool (used for
6//! target selection and counting), while [`explain`] returns the specific
7//! atomic constraints that failed, with the value node and path at which they
8//! failed — enough for per-constraint reporting. The `∀π = ∃≤0 π.¬φ` encoding
9//! lets a failed universal drill straight into the offending value node's inner
10//! constraint.
11
12use crate::frozen::FrozenIndexedDataset;
13use crate::path::{PathBackend, node_of, pred, succ};
14use crate::profile::ShapeCacheSample;
15use crate::sparql::{SparqlExecutor, SparqlViolation};
16use crate::value::{compare_terms, value_type_holds};
17use oxrdf::{Graph, NamedNode, Term};
18use regex::Regex;
19use serde::{Deserialize, Serialize};
20use shifty_algebra::render::{path_to_string, shape_to_string};
21use shifty_algebra::{Path, Schema, Selector, Shape, ShapeArena, ShapeId, SparqlConstraint};
22use shifty_opt::{FocusSource, PhysicalPlan, analyze};
23use std::cmp::Ordering;
24use std::collections::{BTreeSet, HashMap, HashSet};
25use std::fmt;
26use std::sync::OnceLock;
27
28#[derive(Debug, Clone, Copy)]
29struct EvalResult {
30    holds: bool,
31    cacheable: bool,
32}
33
34#[derive(Default)]
35struct EvalState {
36    memo: HashMap<(ShapeId, Term), bool>,
37    active: HashSet<(ShapeId, Term)>,
38    telemetry: Option<ShapeCacheSample>,
39}
40
41/// Per-graph-snapshot shape evaluator.
42///
43/// Completed checks are shared across statements and focus nodes. Results that
44/// depend on the coinductive recursion back-edge are deliberately not cached:
45/// their provisional `true` may only be valid in the active call context.
46pub(crate) struct ShapeEvaluator<'a> {
47    g: &'a dyn PathBackend,
48    arena: &'a ShapeArena,
49    sparql: &'a SparqlExecutor,
50    state: EvalState,
51}
52
53impl<'a> ShapeEvaluator<'a> {
54    pub(crate) fn new(
55        g: &'a dyn PathBackend,
56        arena: &'a ShapeArena,
57        sparql: &'a SparqlExecutor,
58    ) -> Self {
59        Self {
60            g,
61            arena,
62            sparql,
63            state: EvalState {
64                telemetry: crate::profile::is_enabled().then(ShapeCacheSample::default),
65                ..EvalState::default()
66            },
67        }
68    }
69
70    pub(crate) fn holds(&mut self, node: &Term, id: ShapeId) -> bool {
71        holds_memoized(self.g, self.arena, node, id, self.sparql, &mut self.state).holds
72    }
73
74    pub(crate) fn sparql(&self) -> &SparqlExecutor {
75        self.sparql
76    }
77}
78
79impl Drop for ShapeEvaluator<'_> {
80    fn drop(&mut self) {
81        let Some(mut sample) = self.state.telemetry else {
82            return;
83        };
84        sample.entries = self.state.memo.len();
85        sample.estimated_bytes = estimated_memo_bytes(&self.state.memo);
86        crate::profile::record_shape_cache(sample);
87    }
88}
89
90/// A single failed atomic constraint.
91#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
92pub struct Reason {
93    /// The node at which the constraint failed (a value node, or the focus).
94    pub value: Term,
95    /// The path from the enclosing focus to `value`, if the failure is
96    /// value-scoped (rendered in `π` notation).
97    pub path: Option<String>,
98    /// The failing constraint's arena slot (cross-references the algebra dump).
99    pub shape: ShapeId,
100    pub message: String,
101    /// Non-empty when this reason is an `sh:or` group: one entry per OR branch
102    /// that failed, so the caller can tell "fix any one of these."
103    #[serde(default, skip_serializing_if = "Vec::is_empty")]
104    pub sub_reasons: Vec<Reason>,
105}
106
107/// One focus node that failed its statement's shape, with the reasons why.
108#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
109pub struct Violation {
110    pub focus: Term,
111    /// Index of the violated `(selector, shape)` statement in the schema.
112    pub statement: usize,
113    pub reasons: Vec<Reason>,
114}
115
116/// The outcome of validating a data graph against a schema.
117#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
118pub struct ValidationOutcome {
119    pub conforms: bool,
120    pub violations: Vec<Violation>,
121}
122
123/// Which RDF graph(s) validation uses for focus discovery and evaluation.
124#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
125pub enum ValidationGraphMode {
126    /// Focus nodes and evaluation both use only the data graph.
127    Data,
128    /// Focus nodes come from data; paths, class hierarchy, and SPARQL use the
129    /// union of data and shapes. This is the default for split graphs.
130    #[default]
131    Union,
132    /// Focus discovery and evaluation both use the full data/shapes union.
133    UnionAll,
134}
135
136/// The schema is not stratifiable: it recurses through genuine negation, so it
137/// has no defined 2-valued semantics (`docs/03-recursion-semantics.md`). We
138/// diagnose rather than guess. Carries the offending shape components.
139#[derive(Debug, Clone, PartialEq, Eq)]
140pub struct NonStratifiable {
141    pub components: Vec<Vec<ShapeId>>,
142}
143
144impl fmt::Display for NonStratifiable {
145    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
146        write!(f, "non-stratifiable schema (recursion through negation): ")?;
147        for (i, c) in self.components.iter().enumerate() {
148            if i > 0 {
149                write!(f, "; ")?;
150            }
151            let ids: Vec<String> = c.iter().map(|s| format!("@{}", s.0)).collect();
152            write!(f, "{{{}}}", ids.join(" "))?;
153        }
154        Ok(())
155    }
156}
157
158impl std::error::Error for NonStratifiable {}
159
160/// Validate `data` against `schema`.
161///
162/// Honors the decided recursion semantics (`docs/03-recursion-semantics.md`):
163/// the schema must be **stratifiable** (no recursion through net negation), else
164/// we return [`NonStratifiable`]. For a stratifiable schema all recursion is
165/// net-positive (monotone), and [`explain`]/[`holds`]'s "assume conforming on a
166/// back-edge" cycle guard computes exactly the **greatest fixpoint** — the
167/// coinductive validation reading we chose.
168pub fn validate(data: &Graph, schema: &Schema) -> Result<ValidationOutcome, NonStratifiable> {
169    validate_with_context(data, data, schema)
170}
171
172/// Validate split data and shapes graphs using the selected graph mode.
173pub fn validate_graphs(
174    data: &Graph,
175    shapes: &Graph,
176    schema: &Schema,
177) -> Result<ValidationOutcome, NonStratifiable> {
178    validate_graphs_with_mode(data, shapes, schema, ValidationGraphMode::default())
179}
180
181/// Validate split data and shapes graphs using an explicit graph mode.
182pub fn validate_graphs_with_mode(
183    data: &Graph,
184    shapes: &Graph,
185    schema: &Schema,
186    mode: ValidationGraphMode,
187) -> Result<ValidationOutcome, NonStratifiable> {
188    match mode {
189        ValidationGraphMode::Data => {
190            let uses_shapes = uses_shapes_graph(&schema.arena);
191            let frozen = if uses_shapes {
192                FrozenIndexedDataset::from_graphs(data, shapes)
193            } else {
194                FrozenIndexedDataset::from_graph(data)
195            };
196            validate_with_frozen(data, schema, frozen, uses_shapes)
197        }
198        ValidationGraphMode::Union => {
199            let uses_shapes = uses_shapes_graph(&schema.arena);
200            let frozen = if uses_shapes {
201                FrozenIndexedDataset::from_graph_union_with_shapes(data, shapes)
202            } else {
203                FrozenIndexedDataset::from_graph_union(data, shapes)
204            };
205            validate_with_frozen(data, schema, frozen, uses_shapes)
206        }
207        ValidationGraphMode::UnionAll => {
208            let union = graph_union(data, shapes);
209            validate_with_context(&union, &union, schema)
210        }
211    }
212}
213
214/// Validate focus nodes from `data` while evaluating paths, class hierarchy,
215/// and SPARQL against `context`. For split data/shapes inputs, `context` should
216/// be their RDF union.
217pub fn validate_with_context(
218    data: &Graph,
219    context: &Graph,
220    schema: &Schema,
221) -> Result<ValidationOutcome, NonStratifiable> {
222    let uses_shapes = uses_shapes_graph(&schema.arena);
223    let frozen = if uses_shapes {
224        FrozenIndexedDataset::from_graphs(context, context)
225    } else {
226        FrozenIndexedDataset::from_graph(context)
227    };
228    validate_with_frozen(data, schema, frozen, uses_shapes)
229}
230
231fn validate_with_frozen(
232    data: &Graph,
233    schema: &Schema,
234    frozen: FrozenIndexedDataset,
235    has_shapes_graph: bool,
236) -> Result<ValidationOutcome, NonStratifiable> {
237    let strat = analyze(&schema.arena);
238    if !strat.stratifiable {
239        let components = strat
240            .strata
241            .iter()
242            .filter(|s| !s.stratifiable)
243            .map(|s| s.shapes.clone())
244            .collect();
245        return Err(NonStratifiable { components });
246    }
247
248    let sparql = SparqlExecutor::from_frozen(frozen, has_shapes_graph);
249    let backend = sparql
250        .frozen()
251        .expect("validation executor always has a frozen dataset");
252    let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
253    let mut violations = Vec::new();
254    for (i, st) in schema.statements.iter().enumerate() {
255        let label = schema
256            .names
257            .get(&st.shape)
258            .cloned()
259            .unwrap_or_else(|| format!("@{}", st.shape.0));
260        let foci = focus_nodes_with_evaluator(data, &st.selector, &mut evaluator);
261        prefetch_sparql_constraints(&schema.arena, st.shape, &foci, &sparql);
262        for v in foci {
263            let t = std::time::Instant::now();
264            let mut stack = HashSet::new();
265            let mut reasons = explain(&mut evaluator, &v, st.shape, None, &mut stack);
266            crate::profile::record_shape(&label, t.elapsed().as_micros() as u64);
267            dedup_reasons(&mut reasons);
268            if !reasons.is_empty() {
269                violations.push(Violation {
270                    focus: v,
271                    statement: i,
272                    reasons,
273                });
274            }
275        }
276    }
277    Ok(ValidationOutcome {
278        conforms: violations.is_empty(),
279        violations,
280    })
281}
282
283/// Whether any `sh:sparql` constraint references `$shapesGraph`, requiring the
284/// shapes graph to be mirrored into a named graph for evaluation.
285fn uses_shapes_graph(arena: &ShapeArena) -> bool {
286    (0..arena.len()).any(|i| {
287        matches!(arena.get(ShapeId(i as u32)), Shape::Sparql(c) if c.query.contains("shapesGraph"))
288    })
289}
290
291pub(crate) fn graph_union(left: &Graph, right: &Graph) -> Graph {
292    let mut union = left.clone();
293    for triple in right.iter() {
294        union.insert(triple);
295    }
296    union
297}
298
299/// Validate using a [`PhysicalPlan`] (Layer 5): focus nodes come from compiled
300/// [`FocusSource`]s (so class targets seed backward from the constant instead of
301/// scanning every node) and checks run over the plan's cost-ordered arena. The
302/// result is identical to [`validate`] on the same schema — the W3C harness
303/// cross-checks this.
304pub fn validate_plan(
305    data: &Graph,
306    plan: &PhysicalPlan,
307) -> Result<ValidationOutcome, NonStratifiable> {
308    validate_plan_with_context(data, data, plan)
309}
310
311/// Validate a physical plan over split graphs using the default graph mode.
312pub fn validate_plan_graphs(
313    data: &Graph,
314    shapes: &Graph,
315    plan: &PhysicalPlan,
316) -> Result<ValidationOutcome, NonStratifiable> {
317    validate_plan_graphs_with_mode(data, shapes, plan, ValidationGraphMode::default())
318}
319
320/// Validate a physical plan over split graphs using an explicit graph mode.
321pub fn validate_plan_graphs_with_mode(
322    data: &Graph,
323    shapes: &Graph,
324    plan: &PhysicalPlan,
325    mode: ValidationGraphMode,
326) -> Result<ValidationOutcome, NonStratifiable> {
327    match mode {
328        ValidationGraphMode::Data => {
329            let uses_shapes = uses_shapes_graph(&plan.arena);
330            let frozen = if uses_shapes {
331                FrozenIndexedDataset::from_graphs(data, shapes)
332            } else {
333                FrozenIndexedDataset::from_graph(data)
334            };
335            validate_plan_with_frozen(data, plan, frozen, uses_shapes)
336        }
337        ValidationGraphMode::Union => {
338            let uses_shapes = uses_shapes_graph(&plan.arena);
339            let frozen = if uses_shapes {
340                FrozenIndexedDataset::from_graph_union_with_shapes(data, shapes)
341            } else {
342                FrozenIndexedDataset::from_graph_union(data, shapes)
343            };
344            validate_plan_with_frozen(data, plan, frozen, uses_shapes)
345        }
346        ValidationGraphMode::UnionAll => {
347            let union = graph_union(data, shapes);
348            validate_plan_with_context(&union, &union, plan)
349        }
350    }
351}
352
353/// Validate plan focus nodes from `data` against the supplied execution context.
354pub fn validate_plan_with_context(
355    data: &Graph,
356    context: &Graph,
357    plan: &PhysicalPlan,
358) -> Result<ValidationOutcome, NonStratifiable> {
359    let uses_shapes = uses_shapes_graph(&plan.arena);
360    let frozen = if uses_shapes {
361        FrozenIndexedDataset::from_graphs(context, context)
362    } else {
363        FrozenIndexedDataset::from_graph(context)
364    };
365    validate_plan_with_frozen(data, plan, frozen, uses_shapes)
366}
367
368fn validate_plan_with_frozen(
369    data: &Graph,
370    plan: &PhysicalPlan,
371    frozen: FrozenIndexedDataset,
372    has_shapes_graph: bool,
373) -> Result<ValidationOutcome, NonStratifiable> {
374    let strat = analyze(&plan.arena);
375    if !strat.stratifiable {
376        let components = strat
377            .strata
378            .iter()
379            .filter(|s| !s.stratifiable)
380            .map(|s| s.shapes.clone())
381            .collect();
382        return Err(NonStratifiable { components });
383    }
384
385    let sparql = SparqlExecutor::from_frozen(frozen, has_shapes_graph);
386    let backend = sparql
387        .frozen()
388        .expect("validation executor always has a frozen dataset");
389    let mut evaluator = ShapeEvaluator::new(backend, &plan.arena, &sparql);
390    let mut violations = Vec::new();
391    for (i, sp) in plan.statements.iter().enumerate() {
392        let label = plan
393            .names
394            .get(&sp.shape)
395            .cloned()
396            .unwrap_or_else(|| format!("@{}", sp.shape.0));
397        let foci = focus_for_source(data, &sp.source, &mut evaluator);
398        prefetch_sparql_constraints(&plan.arena, sp.shape, &foci, &sparql);
399        for v in foci {
400            let t = std::time::Instant::now();
401            let mut stack = HashSet::new();
402            let mut reasons = explain(&mut evaluator, &v, sp.shape, None, &mut stack);
403            crate::profile::record_shape(&label, t.elapsed().as_micros() as u64);
404            dedup_reasons(&mut reasons);
405            if !reasons.is_empty() {
406                violations.push(Violation {
407                    focus: v,
408                    statement: i,
409                    reasons,
410                });
411            }
412        }
413    }
414    Ok(ValidationOutcome {
415        conforms: violations.is_empty(),
416        violations,
417    })
418}
419
420/// Focus nodes for a compiled [`FocusSource`].
421fn focus_for_source(
422    data: &Graph,
423    source: &FocusSource,
424    evaluator: &mut ShapeEvaluator<'_>,
425) -> Vec<Term> {
426    match source {
427        FocusSource::SubjectsOf(p) => subjects_of(data, p),
428        FocusSource::ObjectsOf(p) => objects_of(data, p),
429        FocusSource::Node(c) => vec![c.clone()],
430        // the optimization: seed backward from the constant, no full scan
431        FocusSource::PathToConst { path, target } => pred(evaluator.g, target, path)
432            .into_iter()
433            .filter(|node| graph_contains_term(data, node))
434            .collect(),
435        FocusSource::ScanFilter { path, qualifier } => all_nodes(data)
436            .into_iter()
437            .filter(|v| {
438                succ(evaluator.g, v, path)
439                    .iter()
440                    .any(|u| evaluator.holds(u, *qualifier))
441            })
442            .collect(),
443        FocusSource::Sparql(target) => {
444            let candidates = all_nodes(data);
445            evaluator
446                .sparql
447                .target_nodes(&target.query)
448                .unwrap_or_default()
449                .into_iter()
450                .filter(|node| candidates.contains(node))
451                .collect()
452        }
453    }
454}
455
456/// The focus nodes selected by a selector.
457pub fn focus_nodes(data: &Graph, sel: &Selector, arena: &ShapeArena) -> Vec<Term> {
458    let sparql =
459        SparqlExecutor::new(data).expect("building an in-memory Oxigraph store should succeed");
460    let mut evaluator = ShapeEvaluator::new(data, arena, &sparql);
461    focus_nodes_with_evaluator(data, sel, &mut evaluator)
462}
463
464pub(crate) fn focus_nodes_with(
465    data: &Graph,
466    backend: &dyn PathBackend,
467    sel: &Selector,
468    arena: &ShapeArena,
469    sparql: &SparqlExecutor,
470) -> Vec<Term> {
471    let mut evaluator = ShapeEvaluator::new(backend, arena, sparql);
472    focus_nodes_with_evaluator(data, sel, &mut evaluator)
473}
474
475fn focus_nodes_with_evaluator(
476    data: &Graph,
477    sel: &Selector,
478    evaluator: &mut ShapeEvaluator<'_>,
479) -> Vec<Term> {
480    match sel {
481        Selector::HasOut(q) => subjects_of(data, q),
482        Selector::HasIn(q) => objects_of(data, q),
483        Selector::IsConst(c) => vec![c.clone()],
484        Selector::HasPath(path, qual) => match evaluator.arena.get(*qual) {
485            // Class targets are lowered to
486            // rdf:type/rdfs:subClassOf* ending at a constant. Searching
487            // backward from that constant avoids traversing the hierarchy once
488            // for every node in the data graph.
489            Shape::TestConst(target) => pred(evaluator.g, target, path)
490                .into_iter()
491                .filter(|node| graph_contains_term(data, node))
492                .collect(),
493            _ => all_nodes(data)
494                .into_iter()
495                .filter(|v| {
496                    succ(evaluator.g, v, path)
497                        .iter()
498                        .any(|u| evaluator.holds(u, *qual))
499                })
500                .collect(),
501        },
502        Selector::Sparql(target) => {
503            let candidates = all_nodes(data);
504            evaluator
505                .sparql
506                .target_nodes(&target.query)
507                .unwrap_or_default()
508                .into_iter()
509                .filter(|node| candidates.contains(node))
510                .collect()
511        }
512    }
513}
514
515fn holds_memoized(
516    g: &dyn PathBackend,
517    arena: &ShapeArena,
518    v: &Term,
519    id: ShapeId,
520    sparql: &SparqlExecutor,
521    state: &mut EvalState,
522) -> EvalResult {
523    let key = (id, v.clone());
524    if let Some(&holds) = state.memo.get(&key) {
525        if let Some(telemetry) = state.telemetry.as_mut() {
526            telemetry.hits += 1;
527        }
528        return EvalResult {
529            holds,
530            cacheable: true,
531        };
532    }
533    if let Some(telemetry) = state.telemetry.as_mut() {
534        telemetry.misses += 1;
535    }
536    if !state.active.insert(key.clone()) {
537        if let Some(telemetry) = state.telemetry.as_mut() {
538            telemetry.recursion_back_edges += 1;
539        }
540        return EvalResult {
541            holds: true,
542            cacheable: false,
543        }; // back-edge ⇒ assume conforming: the gfp choice (doc 03)
544    }
545    let result = match arena.get(id) {
546        Shape::Top | Shape::Pending => EvalResult {
547            holds: true,
548            cacheable: true,
549        },
550        Shape::Sparql(constraint) => EvalResult {
551            holds: sparql
552                .constraint_violations(constraint, v)
553                .is_ok_and(|violations| violations.is_empty()),
554            cacheable: true,
555        },
556        Shape::TestConst(c) => EvalResult {
557            holds: v == c,
558            cacheable: true,
559        },
560        Shape::TestType(t) => EvalResult {
561            holds: value_type_holds(t, v),
562            cacheable: true,
563        },
564        Shape::TestKind(k) => EvalResult {
565            holds: k.matches(v),
566            cacheable: true,
567        },
568        Shape::Closed(q) => EvalResult {
569            holds: closed_offenders(g, v, q).is_empty(),
570            cacheable: true,
571        },
572        Shape::Eq(path, p) => EvalResult {
573            holds: succ(g, v, path) == objects(g, v, p),
574            cacheable: true,
575        },
576        Shape::Disj(path, p) => EvalResult {
577            holds: succ(g, v, path).is_disjoint(&objects(g, v, p)),
578            cacheable: true,
579        },
580        Shape::Lt(path, p) => EvalResult {
581            holds: all_pairs_ordered(g, v, path, p, false),
582            cacheable: true,
583        },
584        Shape::Le(path, p) => EvalResult {
585            holds: all_pairs_ordered(g, v, path, p, true),
586            cacheable: true,
587        },
588        Shape::UniqueLang(path) => EvalResult {
589            holds: unique_lang(&succ(g, v, path)),
590            cacheable: true,
591        },
592        Shape::Not(c) => {
593            let child = holds_memoized(g, arena, v, *c, sparql, state);
594            EvalResult {
595                holds: !child.holds,
596                cacheable: child.cacheable,
597            }
598        }
599        Shape::And(cs) => {
600            let mut result = EvalResult {
601                holds: true,
602                cacheable: true,
603            };
604            for child in cs {
605                let child = holds_memoized(g, arena, v, *child, sparql, state);
606                result.cacheable &= child.cacheable;
607                if !child.holds {
608                    result.holds = false;
609                    break;
610                }
611            }
612            result
613        }
614        Shape::Or(cs) => {
615            let mut result = EvalResult {
616                holds: false,
617                cacheable: true,
618            };
619            for child in cs {
620                let child = holds_memoized(g, arena, v, *child, sparql, state);
621                result.cacheable &= child.cacheable;
622                if child.holds {
623                    result.holds = true;
624                    break;
625                }
626            }
627            result
628        }
629        Shape::Count {
630            path,
631            min,
632            max,
633            qualifier,
634        } => {
635            let mut n = 0;
636            let mut cacheable = true;
637            for value in succ(g, v, path) {
638                let qualified = holds_memoized(g, arena, &value, *qualifier, sparql, state);
639                cacheable &= qualified.cacheable;
640                n += u64::from(qualified.holds);
641            }
642            EvalResult {
643                holds: min.is_none_or(|m| n >= m) && max.is_none_or(|m| n <= m),
644                cacheable,
645            }
646        }
647    };
648    state.active.remove(&key);
649    if result.cacheable {
650        state.memo.insert(key, result.holds);
651        if let Some(telemetry) = state.telemetry.as_mut() {
652            telemetry.insertions += 1;
653        }
654    } else if let Some(telemetry) = state.telemetry.as_mut() {
655        telemetry.non_cacheable_results += 1;
656    }
657    result
658}
659
660fn estimated_memo_bytes(memo: &HashMap<(ShapeId, Term), bool>) -> usize {
661    const CONTROL_BYTE_ESTIMATE: usize = 1;
662    let bucket_bytes =
663        memo.capacity() * (std::mem::size_of::<((ShapeId, Term), bool)>() + CONTROL_BYTE_ESTIMATE);
664    bucket_bytes
665        + memo
666            .keys()
667            .map(|(_, term)| estimated_term_heap_bytes(term))
668            .sum::<usize>()
669}
670
671fn estimated_term_heap_bytes(term: &Term) -> usize {
672    match term {
673        Term::NamedNode(node) => node.as_str().len(),
674        Term::BlankNode(node) => node.as_str().len(),
675        Term::Literal(literal) => {
676            literal.value().len()
677                + literal.language().map_or_else(
678                    || {
679                        let datatype = literal.datatype();
680                        if datatype.as_str() == "http://www.w3.org/2001/XMLSchema#string" {
681                            0
682                        } else {
683                            datatype.as_str().len()
684                        }
685                    },
686                    str::len,
687                )
688        }
689    }
690}
691
692/// Batch-evaluate the SPARQL constraints reachable from `root` at the focus set
693/// before the per-node walk, so their fallback queries run once for the whole
694/// focus set instead of once per focus (doc §189). Only constraints reached through
695/// focus-preserving operators (`∧`/`∨`/`¬`) are evaluated at `foci`; constraints
696/// under a `Count` path apply to value nodes, not the statement focus, so they
697/// are skipped here and fall back to per-focus execution. Prefetching is a pure
698/// memo (constraint violations depend only on focus + immutable dataset), so it
699/// is sound regardless of the operator context the constraint is reached in.
700fn prefetch_sparql_constraints(
701    arena: &ShapeArena,
702    root: ShapeId,
703    foci: &[Term],
704    sparql: &SparqlExecutor,
705) {
706    if foci.len() < 2 {
707        return;
708    }
709    let mut constraints = Vec::new();
710    let mut seen = HashSet::new();
711    collect_focus_sparql(arena, root, &mut seen, &mut constraints);
712    for constraint in constraints {
713        let _ = sparql.prefetch_constraint(constraint, foci);
714    }
715}
716
717fn collect_focus_sparql<'a>(
718    arena: &'a ShapeArena,
719    id: ShapeId,
720    seen: &mut HashSet<ShapeId>,
721    out: &mut Vec<&'a SparqlConstraint>,
722) {
723    if !seen.insert(id) {
724        return; // cyclic (recursive) shape: stop at the back-edge
725    }
726    match arena.get(id) {
727        Shape::Sparql(constraint) => out.push(constraint),
728        Shape::Not(inner) => collect_focus_sparql(arena, *inner, seen, out),
729        Shape::And(ids) | Shape::Or(ids) => {
730            for &child in ids {
731                collect_focus_sparql(arena, child, seen, out);
732            }
733        }
734        // `Count` crosses a path (different focus); all other variants are leaves.
735        _ => {}
736    }
737}
738
739/// The reasons `φ` (slot `id`) fails at `node`. Empty iff it holds. `path_ctx`
740/// is the rendered path by which `node` was reached from the enclosing focus.
741fn explain(
742    evaluator: &mut ShapeEvaluator<'_>,
743    node: &Term,
744    id: ShapeId,
745    path_ctx: Option<&str>,
746    stack: &mut HashSet<(ShapeId, Term)>,
747) -> Vec<Reason> {
748    let key = (id, node.clone());
749    if !stack.insert(key.clone()) {
750        return Vec::new(); // back-edge ⇒ assume conforming (gfp, doc 03)
751    }
752    if evaluator.holds(node, id) {
753        stack.remove(&key);
754        return Vec::new();
755    }
756    let reasons = match evaluator.arena.get(id).clone() {
757        Shape::Top | Shape::Pending => Vec::new(),
758        Shape::Sparql(constraint) => {
759            match evaluator.sparql.constraint_violations(&constraint, node) {
760                Ok(violations) => violations
761                    .into_iter()
762                    .map(|violation| {
763                        // Compute the message before the value/path fields are moved
764                        // out of `violation`.
765                        let message = sparql_violation_message(&violation, &constraint, node);
766                        Reason {
767                            value: violation.value.unwrap_or_else(|| node.clone()),
768                            path: violation
769                                .path
770                                .map(|path| path.to_string())
771                                .or_else(|| path_ctx.map(str::to_string))
772                                .or_else(|| constraint.path.as_ref().map(path_to_string)),
773                            message,
774                            shape: id,
775                            sub_reasons: Vec::new(),
776                        }
777                    })
778                    .collect(),
779                Err(error) => vec![Reason {
780                    value: node.clone(),
781                    path: path_ctx.map(str::to_string),
782                    shape: id,
783                    message: format!("SPARQL constraint evaluation failed: {error}"),
784                    sub_reasons: Vec::new(),
785                }],
786            }
787        }
788        Shape::TestConst(_)
789        | Shape::TestType(_)
790        | Shape::TestKind(_)
791        | Shape::Eq(..)
792        | Shape::Disj(..)
793        | Shape::Lt(..)
794        | Shape::Le(..)
795        | Shape::UniqueLang(_) => leaf(
796            evaluator.holds(node, id),
797            node,
798            id,
799            path_ctx,
800            format!("{} not satisfied", shape_to_string(evaluator.arena, id)),
801        ),
802        Shape::Closed(q) => {
803            let bad = closed_offenders(evaluator.g, node, &q);
804            if bad.is_empty() {
805                Vec::new()
806            } else {
807                let preds: Vec<String> = bad.iter().map(|p| p.to_string()).collect();
808                vec![Reason {
809                    value: node.clone(),
810                    path: path_ctx.map(str::to_string),
811                    shape: id,
812                    message: format!("closed: unexpected predicate(s) {}", preds.join(", ")),
813                    sub_reasons: Vec::new(),
814                }]
815            }
816        }
817        Shape::Not(c) => {
818            if explain(evaluator, node, c, path_ctx, stack).is_empty() {
819                vec![Reason {
820                    value: node.clone(),
821                    path: path_ctx.map(str::to_string),
822                    shape: id,
823                    message: "negated shape unexpectedly held".to_string(),
824                    sub_reasons: Vec::new(),
825                }]
826            } else {
827                Vec::new()
828            }
829        }
830        Shape::And(cs) => cs
831            .iter()
832            .flat_map(|c| explain(evaluator, node, *c, path_ctx, stack))
833            .collect(),
834        Shape::Or(cs) => {
835            let mut sub_reasons = Vec::new();
836            let mut satisfied = false;
837            for c in &cs {
838                let sub = explain(evaluator, node, *c, path_ctx, stack);
839                if sub.is_empty() {
840                    satisfied = true;
841                    break;
842                }
843                sub_reasons.extend(sub);
844            }
845            if satisfied {
846                Vec::new()
847            } else {
848                vec![Reason {
849                    value: node.clone(),
850                    path: path_ctx.map(str::to_string),
851                    shape: id,
852                    message: format!("none of {} alternative(s) satisfied", cs.len()),
853                    sub_reasons,
854                }]
855            }
856        }
857        Shape::Count {
858            path,
859            min,
860            max,
861            qualifier,
862        } => explain_count(evaluator, node, id, &path, min, max, qualifier, stack),
863    };
864    stack.remove(&key);
865    reasons
866}
867
868#[allow(clippy::too_many_arguments)]
869fn explain_count(
870    evaluator: &mut ShapeEvaluator<'_>,
871    node: &Term,
872    id: ShapeId,
873    path: &Path,
874    min: Option<u64>,
875    max: Option<u64>,
876    qualifier: ShapeId,
877    stack: &mut HashSet<(ShapeId, Term)>,
878) -> Vec<Reason> {
879    let path_str = path_to_string(path);
880    let matched: Vec<Term> = succ(evaluator.g, node, path)
881        .into_iter()
882        .filter(|u| evaluator.holds(u, qualifier))
883        .collect();
884    let n = matched.len() as u64;
885    let mut reasons = Vec::new();
886
887    if let Some(mx) = max
888        && n > mx
889    {
890        match evaluator.arena.get(qualifier).clone() {
891            // ∀path.inner encoded as ∃≤0 path.¬inner: drill into the offenders.
892            Shape::Not(inner) if mx == 0 => {
893                for u in &matched {
894                    reasons.extend(explain(evaluator, u, inner, Some(&path_str), stack));
895                }
896            }
897            _ => reasons.push(Reason {
898                value: node.clone(),
899                path: Some(path_str.clone()),
900                shape: id,
901                message: format!("at most {mx} value(s) may match along {path_str}, found {n}"),
902                sub_reasons: Vec::new(),
903            }),
904        }
905    }
906
907    if let Some(mn) = min
908        && n < mn
909    {
910        reasons.push(Reason {
911            value: node.clone(),
912            path: Some(path_str.clone()),
913            shape: id,
914            message: format!("at least {mn} value(s) required along {path_str}, found {n}"),
915            sub_reasons: Vec::new(),
916        });
917    }
918
919    reasons
920}
921
922/// The message for a `sh:sparql` violation, by SHACL §5.2.1 precedence: the
923/// result's own `?message` binding, then the constraint's (or shape's)
924/// `sh:message` (with `{$this}`/`{?var}` substitution), then a constructed
925/// description naming the shape and value.
926fn sparql_violation_message(
927    violation: &SparqlViolation,
928    constraint: &SparqlConstraint,
929    node: &Term,
930) -> String {
931    if let Some(message) = &violation.message {
932        return term_text(message);
933    }
934    if !constraint.messages.is_empty() {
935        return constraint
936            .messages
937            .iter()
938            .map(|m| apply_message_template(&term_text(m), node, &violation.bindings))
939            .collect::<Vec<_>>()
940            .join("; ");
941    }
942    let mut message = match &constraint.shape {
943        Some(shape) => format!("SPARQL constraint at {shape} not satisfied"),
944        None => "SPARQL constraint not satisfied".to_string(),
945    };
946    if let Some(value) = &violation.value {
947        message.push_str(&format!(" (value: {value})"));
948    }
949    message
950}
951
952/// Substitute `{$varName}` / `{?varName}` placeholders in a message template.
953///
954/// `$this` resolves to `focus`; all other names are looked up in `bindings`
955/// (keyed without the `$`/`?` sigil). Unresolved placeholders are left as-is.
956pub(crate) fn apply_message_template(
957    template: &str,
958    focus: &Term,
959    bindings: &HashMap<String, Term>,
960) -> String {
961    static RE: OnceLock<Regex> = OnceLock::new();
962    let re = RE
963        .get_or_init(|| Regex::new(r"\{(\$[A-Za-z_]\w*|\?[A-Za-z_]\w*)\}").expect("static regex"));
964    re.replace_all(template, |caps: &regex::Captures| {
965        let placeholder = &caps[1];
966        let name = &placeholder[1..]; // strip leading `$` or `?`
967        let term = if name == "this" {
968            Some(focus)
969        } else {
970            bindings.get(name)
971        };
972        term.map(|t| match t {
973            Term::NamedNode(n) => format!("<{}>", n.as_str()),
974            Term::BlankNode(b) => format!("_:{}", b.as_str()),
975            Term::Literal(l) => l.value().to_string(),
976        })
977        .unwrap_or_else(|| placeholder.to_string())
978    })
979    .to_string()
980}
981
982/// A term's human-facing text: a literal's lexical value, otherwise its RDF
983/// rendering (`<iri>` / `_:id`).
984fn term_text(term: &Term) -> String {
985    match term {
986        Term::Literal(literal) => literal.value().to_string(),
987        other => other.to_string(),
988    }
989}
990
991fn leaf(
992    ok: bool,
993    node: &Term,
994    id: ShapeId,
995    path_ctx: Option<&str>,
996    message: String,
997) -> Vec<Reason> {
998    if ok {
999        Vec::new()
1000    } else {
1001        vec![Reason {
1002            value: node.clone(),
1003            path: path_ctx.map(str::to_string),
1004            shape: id,
1005            message,
1006            sub_reasons: Vec::new(),
1007        }]
1008    }
1009}
1010
1011fn all_pairs_ordered(
1012    g: &dyn PathBackend,
1013    v: &Term,
1014    path: &Path,
1015    p: &NamedNode,
1016    allow_eq: bool,
1017) -> bool {
1018    let lhs = succ(g, v, path);
1019    let rhs = objects(g, v, p);
1020    for a in &lhs {
1021        for b in &rhs {
1022            match compare_terms(a, b) {
1023                Some(Ordering::Less) => {}
1024                Some(Ordering::Equal) if allow_eq => {}
1025                _ => return false,
1026            }
1027        }
1028    }
1029    true
1030}
1031
1032fn objects(g: &dyn PathBackend, v: &Term, p: &NamedNode) -> HashSet<Term> {
1033    succ(g, v, &Path::Pred(p.clone()))
1034}
1035
1036/// Predicates on `node` not allowed by a closed shape's set `q`.
1037fn closed_offenders(
1038    g: &dyn PathBackend,
1039    node: &Term,
1040    q: &BTreeSet<NamedNode>,
1041) -> BTreeSet<NamedNode> {
1042    g.out_predicates(node)
1043        .into_iter()
1044        .filter(|p| !q.contains(p))
1045        .collect()
1046}
1047
1048fn unique_lang(values: &HashSet<Term>) -> bool {
1049    let mut seen = HashSet::new();
1050    for term in values {
1051        if let Term::Literal(l) = term
1052            && let Some(lang) = l.language()
1053            && !seen.insert(lang.to_ascii_lowercase())
1054        {
1055            return false;
1056        }
1057    }
1058    true
1059}
1060
1061fn dedup_reasons(reasons: &mut Vec<Reason>) {
1062    let mut seen = HashSet::new();
1063    reasons.retain(|r| seen.insert((r.value.to_string(), r.message.clone())));
1064}
1065
1066fn subject_term(s: oxrdf::NamedOrBlankNodeRef) -> Term {
1067    crate::path::term_of(s.into_owned())
1068}
1069
1070/// Distinct subjects of triples with predicate `p`.
1071fn subjects_of(data: &Graph, p: &NamedNode) -> Vec<Term> {
1072    let mut seen = HashSet::new();
1073    data.triples_for_predicate(p.as_ref())
1074        .filter_map(|t| {
1075            let term = subject_term(t.subject);
1076            seen.insert(term.clone()).then_some(term)
1077        })
1078        .collect()
1079}
1080
1081/// Distinct objects of triples with predicate `p`.
1082fn objects_of(data: &Graph, p: &NamedNode) -> Vec<Term> {
1083    let mut seen = HashSet::new();
1084    data.triples_for_predicate(p.as_ref())
1085        .filter_map(|t| {
1086            let term = t.object.into_owned();
1087            seen.insert(term.clone()).then_some(term)
1088        })
1089        .collect()
1090}
1091
1092/// All distinct terms appearing as a subject or object in the graph.
1093fn all_nodes(g: &Graph) -> HashSet<Term> {
1094    let mut nodes = HashSet::new();
1095    for t in g.iter() {
1096        nodes.insert(subject_term(t.subject));
1097        nodes.insert(t.object.into_owned());
1098    }
1099    nodes
1100}
1101
1102/// Whether `term` appears in the graph's node domain.
1103fn graph_contains_term(g: &Graph, term: &Term) -> bool {
1104    node_of(term).is_some_and(|node| g.triples_for_subject(&node).next().is_some())
1105        || g.triples_for_object(term).next().is_some()
1106}
1107
1108#[cfg(test)]
1109mod tests {
1110    use super::*;
1111    use oxrdf::{NamedNode, Triple};
1112
1113    fn iri(local: &str) -> NamedNode {
1114        NamedNode::new(format!("http://ex/{local}")).unwrap()
1115    }
1116
1117    fn term(local: &str) -> Term {
1118        Term::NamedNode(iri(local))
1119    }
1120
1121    #[test]
1122    fn memoizes_shared_value_checks_across_focus_nodes() {
1123        let p = iri("p");
1124        let shared = term("shared");
1125        let mut graph = Graph::new();
1126        graph.insert(&Triple::new(iri("a"), p.clone(), shared.clone()));
1127        graph.insert(&Triple::new(iri("b"), p.clone(), shared.clone()));
1128
1129        let mut arena = ShapeArena::new();
1130        let qualifier = arena.insert(Shape::TestConst(shared));
1131        let root = arena.insert(Shape::Count {
1132            path: Path::Pred(p),
1133            min: Some(1),
1134            max: None,
1135            qualifier,
1136        });
1137        let sparql = SparqlExecutor::new(&graph).unwrap();
1138        crate::profile::enable();
1139        {
1140            let mut evaluator = ShapeEvaluator::new(&graph, &arena, &sparql);
1141            assert!(evaluator.holds(&term("a"), root));
1142            assert!(evaluator.holds(&term("b"), root));
1143        }
1144        let profile = crate::profile::take().unwrap();
1145        let cache = profile.shape_cache();
1146        assert_eq!(cache.evaluators, 1);
1147        assert!(cache.hits >= 1, "shared qualifier should hit the cache");
1148        assert_eq!(cache.peak_entries, 3);
1149        assert!(cache.estimated_peak_bytes > 0);
1150    }
1151
1152    #[test]
1153    fn does_not_cache_cycle_dependent_results() {
1154        // A := B ∧ false; B := A. While evaluating A, the B result is
1155        // provisionally true through the A back-edge, but the gfp solution is
1156        // A=false, B=false. Caching that provisional B=true would be unsound.
1157        let mut arena = ShapeArena::new();
1158        let a = arena.reserve();
1159        let b = arena.reserve();
1160        let bottom = arena.insert(Shape::Or(Vec::new()));
1161        arena.set(a, Shape::And(vec![b, bottom]));
1162        arena.set(b, Shape::And(vec![a]));
1163
1164        let graph = Graph::new();
1165        let sparql = SparqlExecutor::new(&graph).unwrap();
1166        let node = term("x");
1167
1168        crate::profile::enable();
1169        {
1170            let mut evaluator = ShapeEvaluator::new(&graph, &arena, &sparql);
1171            assert!(!evaluator.holds(&node, a));
1172            assert!(!evaluator.holds(&node, b));
1173        }
1174        let profile = crate::profile::take().unwrap();
1175        let cache = profile.shape_cache();
1176        assert!(cache.recursion_back_edges > 0);
1177        assert!(cache.non_cacheable_results > 0);
1178    }
1179}