Skip to main content

shifty_engine/
report.rs

1//! W3C `sh:ValidationReport` generation (component-granular, RDF-driven).
2//!
3//! Producing a spec-faithful report needs provenance the optimized algebra
4//! discards: each result carries `sh:sourceConstraintComponent`,
5//! `sh:sourceShape`, and `sh:resultPath`, and the granularity is one result per
6//! (focus, value node, component) — `sh:and`/`sh:or`/`sh:not`/`sh:node` report
7//! as a *unit* (they do not drill into sub-failures), while `sh:property`
8//! delegates to the nested shape. So this validator walks the shapes graph
9//! directly, reusing only the leaf evaluation primitives (`succ`,
10//! `value_type_holds`). It is separate from the algebra path used for fast
11//! conformance.
12//!
13//! Coverage is a growing subset of SHACL Core (see `docs/BACKLOG.md`).
14
15use crate::frozen::FrozenIndexedDataset;
16use crate::path::succ;
17use crate::sparql::SparqlExecutor;
18use crate::validate::{
19    ValidationGraphMode, ValidationOptions, apply_message_template, graph_union,
20};
21use crate::value::{compare_terms, value_type_holds};
22use oxrdf::{BlankNode, Graph, Literal, NamedNode, NamedNodeRef, NamedOrBlankNode, Term, Triple};
23use shifty_algebra::value_type::{Bound, ValueType};
24use shifty_algebra::{NodeKindSet, Path, Severity, SparqlConstraint, SparqlQueryKind};
25use shifty_parse::graph::{Loaded, term_to_node};
26use shifty_parse::lower::canonical_sparql_query;
27use shifty_parse::path::parse_path;
28use shifty_parse::vocab;
29use std::cell::RefCell;
30use std::cmp::Ordering;
31use std::collections::{HashMap, HashSet};
32
33/// One `sh:ValidationResult`.
34#[derive(Debug, Clone, PartialEq, Eq, Hash)]
35pub struct ValidationResult {
36    pub focus: Term,
37    /// `sh:resultPath` as the original RDF node (predicate IRI for simple paths).
38    pub path: Option<Term>,
39    pub value: Option<Term>,
40    pub component: NamedNode,
41    pub source_shape: Term,
42    /// `sh:resultSeverity` — the `sh:severity` declared on the source shape,
43    /// defaulting to `sh:Violation`.
44    pub severity: NamedNode,
45    /// `sh:resultMessage` — copied from `sh:message` on the source shape.
46    pub messages: Vec<Term>,
47}
48
49#[derive(Debug, Clone)]
50pub struct ValidationReport {
51    pub conforms: bool,
52    pub results: Vec<ValidationResult>,
53}
54
55/// Validate `data` against the shapes in `shapes`, producing a W3C report.
56pub fn validate_report(shapes: &Loaded, data: &Graph) -> ValidationReport {
57    validate_report_with_options(shapes, data, &ValidationOptions::default())
58}
59
60/// Validate and build a W3C report using an explicit severity policy.
61pub fn validate_report_with_options(
62    shapes: &Loaded,
63    data: &Graph,
64    options: &ValidationOptions,
65) -> ValidationReport {
66    let has_shapes_graph = shapes_reference_shapes_graph(shapes);
67    let frozen = if has_shapes_graph {
68        FrozenIndexedDataset::from_graphs(data, &shapes.graph)
69    } else {
70        FrozenIndexedDataset::from_graph(data)
71    };
72    validate_report_context(shapes, data, frozen, has_shapes_graph, options)
73}
74
75/// Validate split data and shapes graphs using the selected graph mode.
76pub fn validate_report_graphs(shapes: &Loaded, data: &Graph) -> ValidationReport {
77    validate_report_graphs_with_mode_and_options(
78        shapes,
79        data,
80        ValidationGraphMode::default(),
81        &ValidationOptions::default(),
82    )
83}
84
85/// Validate split data and shapes graphs using an explicit graph mode.
86pub fn validate_report_graphs_with_mode(
87    shapes: &Loaded,
88    data: &Graph,
89    mode: ValidationGraphMode,
90) -> ValidationReport {
91    validate_report_graphs_with_mode_and_options(shapes, data, mode, &ValidationOptions::default())
92}
93
94/// Validate split graphs with an explicit graph mode and severity policy.
95pub fn validate_report_graphs_with_mode_and_options(
96    shapes: &Loaded,
97    data: &Graph,
98    mode: ValidationGraphMode,
99    options: &ValidationOptions,
100) -> ValidationReport {
101    let has_shapes_graph = shapes_reference_shapes_graph(shapes);
102    match mode {
103        ValidationGraphMode::Data => {
104            let frozen = if has_shapes_graph {
105                FrozenIndexedDataset::from_graphs(data, &shapes.graph)
106            } else {
107                FrozenIndexedDataset::from_graph(data)
108            };
109            validate_report_context(shapes, data, frozen, has_shapes_graph, options)
110        }
111        ValidationGraphMode::Union => {
112            let frozen = if has_shapes_graph {
113                FrozenIndexedDataset::from_graph_union_with_shapes(data, &shapes.graph)
114            } else {
115                FrozenIndexedDataset::from_graph_union(data, &shapes.graph)
116            };
117            validate_report_context(shapes, data, frozen, has_shapes_graph, options)
118        }
119        ValidationGraphMode::UnionAll => {
120            let union = graph_union(data, &shapes.graph);
121            let frozen = if has_shapes_graph {
122                FrozenIndexedDataset::from_graphs(&union, &shapes.graph)
123            } else {
124                FrozenIndexedDataset::from_graph(&union)
125            };
126            validate_report_context(shapes, &union, frozen, has_shapes_graph, options)
127        }
128    }
129}
130
131fn validate_report_context(
132    shapes: &Loaded,
133    focus_data: &Graph,
134    frozen: FrozenIndexedDataset,
135    has_shapes_graph: bool,
136    options: &ValidationOptions,
137) -> ValidationReport {
138    // Only execute SPARQL target/constraint work when the shapes graph contains
139    // those features. Query execution shares the frozen validation dataset.
140    let needs_sparql = shapes
141        .graph
142        .triples_for_predicate(vocab::SH_SPARQL)
143        .next()
144        .is_some()
145        || shapes
146            .graph
147            .triples_for_predicate(vocab::SH_TARGET)
148            .next()
149            .is_some();
150    let sparql = SparqlExecutor::from_frozen(frozen, needs_sparql && has_shapes_graph);
151    // Index class membership once (instead of a forward scan over every node per
152    // class-target shape): this is the report path's analogue of the plan's
153    // backward `PathToConst` focus source, amortized across all shapes.
154    let has_explicit_class_target = shapes
155        .graph
156        .triples_for_predicate(vocab::SH_TARGET_CLASS)
157        .next()
158        .is_some();
159    let has_implicit_class_target = shapes.graph.iter().any(|triple| {
160        let subject = triple.subject.into_owned();
161        is_shape_node(shapes, &subject)
162            && (shapes.is_instance_of(&subject, vocab::RDFS_CLASS)
163                || shapes.is_instance_of(&subject, vocab::OWL_CLASS))
164    });
165    let needs_class_index = has_explicit_class_target || has_implicit_class_target;
166    let class_index = if needs_class_index {
167        build_class_index(
168            focus_data,
169            sparql
170                .frozen()
171                .expect("report validation always has a frozen dataset"),
172        )
173    } else {
174        HashMap::new()
175    };
176    let r = Reporter {
177        shapes,
178        focus_data,
179        sparql,
180        needs_sparql,
181        class_index,
182        path_cache: RefCell::new(HashMap::new()),
183    };
184    let mut results = Vec::new();
185    for shape in r.target_shapes() {
186        let foci = r.focus_nodes(&shape);
187        r.prefetch_sparql(&shape, &foci);
188        for focus in &foci {
189            let mut visited = HashSet::new();
190            r.collect(&shape, focus, &mut results, &mut visited);
191        }
192    }
193    if options.sort_results {
194        results.sort_by(|left, right| {
195            Severity::from_named_node(right.severity.clone())
196                .rank()
197                .cmp(&Severity::from_named_node(left.severity.clone()).rank())
198                .then_with(|| left.focus.to_string().cmp(&right.focus.to_string()))
199                .then_with(|| {
200                    left.source_shape
201                        .to_string()
202                        .cmp(&right.source_shape.to_string())
203                })
204                .then_with(|| left.component.as_str().cmp(right.component.as_str()))
205        });
206    }
207    ValidationReport {
208        conforms: !results.iter().any(|result| {
209            Severity::from_named_node(result.severity.clone()).meets(&options.minimum_severity)
210        }),
211        results,
212    }
213}
214
215/// Serialize a report as an RDF `sh:ValidationReport` graph (W3C shape).
216pub fn report_to_graph(report: &ValidationReport) -> Graph {
217    let mut g = Graph::new();
218    let root = BlankNode::default();
219    let t = |s: NamedOrBlankNode, p: NamedNodeRef, o: Term| Triple::new(s, p.into_owned(), o);
220
221    g.insert(&t(
222        root.clone().into(),
223        vocab::RDF_TYPE,
224        vocab::SH_VALIDATION_REPORT.into_owned().into(),
225    ));
226    g.insert(&t(
227        root.clone().into(),
228        vocab::SH_CONFORMS,
229        Literal::from(report.conforms).into(),
230    ));
231
232    for r in &report.results {
233        let rn = BlankNode::default();
234        g.insert(&t(root.clone().into(), vocab::SH_RESULT, rn.clone().into()));
235        g.insert(&t(
236            rn.clone().into(),
237            vocab::RDF_TYPE,
238            vocab::SH_VALIDATION_RESULT.into_owned().into(),
239        ));
240        g.insert(&t(rn.clone().into(), vocab::SH_FOCUS_NODE, r.focus.clone()));
241        if let Some(path) = &r.path {
242            g.insert(&t(rn.clone().into(), vocab::SH_RESULT_PATH, path.clone()));
243        }
244        if let Some(value) = &r.value {
245            g.insert(&t(rn.clone().into(), vocab::SH_VALUE, value.clone()));
246        }
247        g.insert(&t(
248            rn.clone().into(),
249            vocab::SH_RESULT_SEVERITY,
250            r.severity.clone().into(),
251        ));
252        g.insert(&t(
253            rn.clone().into(),
254            vocab::SH_SOURCE_CONSTRAINT_COMPONENT,
255            r.component.clone().into(),
256        ));
257        for msg in &r.messages {
258            g.insert(&t(rn.clone().into(), vocab::SH_RESULT_MESSAGE, msg.clone()));
259        }
260        g.insert(&t(
261            rn.into(),
262            vocab::SH_SOURCE_SHAPE,
263            r.source_shape.clone(),
264        ));
265    }
266    g
267}
268
269/// Substitute `{$varName}` / `{?varName}` placeholders in `sh:message` literals.
270///
271/// `$this` is resolved from `focus`; all other names are looked up in
272/// `bindings` (keyed without the `$`/`?` sigil). Unresolved placeholders are
273/// left as-is. Only `sh:Literal` messages are processed; IRI/blank-node
274/// message terms pass through unchanged.
275fn substitute_messages(
276    messages: &[Term],
277    focus: &Term,
278    bindings: &HashMap<String, Term>,
279) -> Vec<Term> {
280    messages
281        .iter()
282        .map(|msg| {
283            let Term::Literal(lit) = msg else {
284                return msg.clone();
285            };
286            let text = lit.value();
287            let substituted = apply_message_template(text, focus, bindings);
288            if substituted == text {
289                msg.clone()
290            } else {
291                Term::Literal(Literal::new_simple_literal(&substituted))
292            }
293        })
294        .collect()
295}
296
297struct Reporter<'a> {
298    shapes: &'a Loaded,
299    focus_data: &'a Graph,
300    sparql: SparqlExecutor,
301    needs_sparql: bool,
302    /// `class → focus-data instances` under `rdf:type / rdfs:subClassOf*`, built
303    /// once and shared by every `sh:targetClass` / implicit-class lookup.
304    class_index: HashMap<Term, Vec<Term>>,
305    /// Parsed `sh:path` per shape node, so `collect` does not re-parse the path
306    /// RDF on every (shape, focus) visit. `None` = shape has no/invalid path.
307    path_cache: RefCell<HashMap<NamedOrBlankNode, PathCacheEntry>>,
308}
309
310type Visited = HashSet<(NamedOrBlankNode, Term)>;
311
312/// Cached parsed path and its term representation for sh:path expressions
313type PathCacheEntry = (Option<Term>, Option<Path>);
314
315impl Reporter<'_> {
316    fn frozen(&self) -> &FrozenIndexedDataset {
317        self.sparql
318            .frozen()
319            .expect("report validation always has a frozen dataset")
320    }
321
322    fn target_shapes(&self) -> Vec<NamedOrBlankNode> {
323        let mut found: HashSet<NamedOrBlankNode> = HashSet::new();
324        for t in self.shapes.graph.iter() {
325            let p = t.predicate;
326            if p == vocab::SH_TARGET_NODE
327                || p == vocab::SH_TARGET_CLASS
328                || p == vocab::SH_TARGET_SUBJECTS_OF
329                || p == vocab::SH_TARGET_OBJECTS_OF
330            {
331                found.insert(t.subject.into_owned());
332            }
333            // SPARQL-based target: sh:target [ sh:select "…" ]
334            if p == vocab::SH_TARGET
335                && let Some(target) = term_to_node(&t.object.into_owned())
336                && self.shapes.object(&target, vocab::SH_SELECT).is_some()
337            {
338                found.insert(t.subject.into_owned());
339            }
340            // implicit class target: a shape that is also an rdfs:Class / owl:Class
341            if p == vocab::RDF_TYPE {
342                let s = t.subject.into_owned();
343                if self.is_class(&s) && self.is_shape(&s) {
344                    found.insert(s);
345                }
346            }
347        }
348        let mut v: Vec<_> = found.into_iter().collect();
349        v.sort_by_key(|n| n.to_string());
350        v
351    }
352
353    /// Does this node look like a SHACL shape (so its class-ness implies a target)?
354    fn is_shape(&self, n: &NamedOrBlankNode) -> bool {
355        is_shape_node(self.shapes, n)
356    }
357
358    fn is_class(&self, n: &NamedOrBlankNode) -> bool {
359        self.shapes.is_instance_of(n, vocab::RDFS_CLASS)
360            || self.shapes.is_instance_of(n, vocab::OWL_CLASS)
361    }
362
363    fn deactivated(&self, n: &NamedOrBlankNode) -> bool {
364        matches!(self.shapes.object(n, vocab::SH_DEACTIVATED),
365            Some(Term::Literal(ref l)) if l.value() == "true")
366    }
367
368    fn focus_nodes(&self, shape: &NamedOrBlankNode) -> Vec<Term> {
369        let mut nodes = Vec::new();
370        nodes.extend(self.shapes.objects(shape, vocab::SH_TARGET_NODE));
371        for c in self.shapes.objects(shape, vocab::SH_TARGET_CLASS) {
372            if let Some(instances) = self.class_index.get(&c) {
373                nodes.extend(instances.iter().cloned());
374            }
375        }
376        for p in self.shapes.objects(shape, vocab::SH_TARGET_SUBJECTS_OF) {
377            if let Term::NamedNode(n) = p {
378                nodes.extend(
379                    self.focus_data
380                        .triples_for_predicate(n.as_ref())
381                        .map(|t| node_term(t.subject)),
382                );
383            }
384        }
385        for p in self.shapes.objects(shape, vocab::SH_TARGET_OBJECTS_OF) {
386            if let Term::NamedNode(n) = p {
387                nodes.extend(
388                    self.focus_data
389                        .triples_for_predicate(n.as_ref())
390                        .map(|t| t.object.into_owned()),
391                );
392            }
393        }
394        // SPARQL-based targets: sh:target [ sh:select "…" ]. The query selects
395        // `?this` focus nodes from the context store.
396        if self.needs_sparql {
397            let exec = &self.sparql;
398            for target in self.shapes.objects(shape, vocab::SH_TARGET) {
399                let Some(target_node) = term_to_node(&target) else {
400                    continue;
401                };
402                let Some(Term::Literal(query)) = self.shapes.object(&target_node, vocab::SH_SELECT)
403                else {
404                    continue;
405                };
406                // Drop targets that fail to canonicalize, matching the lowering path.
407                let Ok((_, canonical)) =
408                    canonical_sparql_query(self.shapes, &target_node, query.value())
409                else {
410                    continue;
411                };
412                if let Ok(found) = exec.target_nodes(&canonical) {
413                    nodes.extend(found);
414                }
415            }
416        }
417        // implicit class target: instances of the shape (which is also a class)
418        if let NamedOrBlankNode::NamedNode(n) = shape
419            && self.is_class(shape)
420        {
421            let class = Term::NamedNode(n.clone());
422            if let Some(instances) = self.class_index.get(&class) {
423                nodes.extend(instances.iter().cloned());
424            }
425        }
426        let mut seen = HashSet::new();
427        nodes.retain(|t| seen.insert(t.clone()));
428        nodes
429    }
430
431    /// The shape's `sh:path` as both its raw RDF node (for `sh:resultPath`) and
432    /// the parsed path algebra, memoized so repeated visits don't re-parse it.
433    fn shape_path(&self, shape: &NamedOrBlankNode) -> (Option<Term>, Option<Path>) {
434        if let Some(cached) = self.path_cache.borrow().get(shape) {
435            return cached.clone();
436        }
437        let path_term = self.shapes.object(shape, vocab::SH_PATH);
438        let parsed = path_term
439            .as_ref()
440            .and_then(|t| parse_path(self.shapes, t).ok());
441        let entry = (path_term, parsed);
442        self.path_cache
443            .borrow_mut()
444            .insert(shape.clone(), entry.clone());
445        entry
446    }
447
448    /// Collect the results of validating `focus` against `shape`.
449    fn collect(
450        &self,
451        shape: &NamedOrBlankNode,
452        focus: &Term,
453        out: &mut Vec<ValidationResult>,
454        visited: &mut Visited,
455    ) {
456        if self.deactivated(shape) {
457            return; // deactivated shapes produce no results
458        }
459        let key = (shape.clone(), focus.clone());
460        if !visited.insert(key.clone()) {
461            return; // recursion: conform on the back-edge (gfp)
462        }
463
464        let (path_term, parsed) = self.shape_path(shape);
465        let value_nodes: Vec<Term> = match &parsed {
466            Some(p) => succ(self.frozen(), focus, p).into_iter().collect(),
467            None => vec![focus.clone()],
468        };
469        let severity = self.severity(shape);
470        let messages = self.messages(shape);
471        let push = |out: &mut Vec<ValidationResult>, value, component| {
472            out.push(ValidationResult {
473                focus: focus.clone(),
474                path: path_term.clone(),
475                value,
476                component,
477                source_shape: node_term_ref(shape),
478                severity: severity.clone(),
479                messages: messages.clone(),
480            });
481        };
482
483        // cardinality (only meaningful with a path)
484        if parsed.is_some() {
485            if let Some(min) = self.int(shape, vocab::SH_MIN_COUNT)
486                && (value_nodes.len() as u64) < min
487            {
488                push(out, None, vocab::SH_CC_MIN_COUNT.into_owned());
489            }
490            if let Some(max) = self.int(shape, vocab::SH_MAX_COUNT)
491                && (value_nodes.len() as u64) > max
492            {
493                push(out, None, vocab::SH_CC_MAX_COUNT.into_owned());
494            }
495        }
496
497        // sh:hasValue — one of the value nodes must equal the constant
498        for hv in self.shapes.objects(shape, vocab::SH_HAS_VALUE) {
499            if !value_nodes.contains(&hv) {
500                push(out, None, vocab::SH_CC_HAS_VALUE.into_owned());
501            }
502        }
503
504        self.collect_closed(shape, focus, &value_nodes, out);
505        self.collect_property_pairs(shape, focus, &path_term, &value_nodes, out);
506        self.collect_unique_lang(shape, focus, &path_term, &value_nodes, out);
507        self.collect_qualified_counts(shape, focus, &path_term, &value_nodes, out, visited);
508
509        // value-scoped components
510        for u in &value_nodes {
511            for (component, ok) in self.value_checks(shape, u, visited) {
512                if !ok {
513                    push(out, Some(u.clone()), component);
514                }
515            }
516        }
517
518        // nested property shapes: delegate (each value node is a focus for P)
519        for prop in self.shapes.objects(shape, vocab::SH_PROPERTY) {
520            if let Some(pn) = term_to_node(&prop) {
521                for u in &value_nodes {
522                    self.collect(&pn, u, out, visited);
523                }
524            }
525        }
526
527        self.collect_sparql(shape, focus, &path_term, &parsed, out);
528
529        visited.remove(&key);
530    }
531
532    /// `sh:sparql` constraints (SHACL-SPARQL). Each `SELECT`/`ASK` query runs for
533    /// the focus node against the context store; every solution (or a `true`
534    /// `ASK`) is one `sh:SPARQLConstraintComponent` result. A `value`/`path`
535    /// projected by the query overrides the value node / `sh:resultPath`.
536    /// Build the [`SparqlConstraint`] for a `sh:sparql` constraint node, applying
537    /// the same canonicalization the lowering path uses. `None` when the node has
538    /// neither `sh:select` nor `sh:ask`, or when canonicalization fails (matching
539    /// the lowering path, which omits such constraints with a diagnostic).
540    fn build_sparql_constraint(
541        &self,
542        shape: &NamedOrBlankNode,
543        constraint_node: &NamedOrBlankNode,
544        parsed_path: &Option<Path>,
545    ) -> Option<SparqlConstraint> {
546        let (kind, raw) = if let Some(Term::Literal(query)) =
547            self.shapes.object(constraint_node, vocab::SH_SELECT)
548        {
549            (SparqlQueryKind::Select, query.value().to_string())
550        } else if let Some(Term::Literal(query)) =
551            self.shapes.object(constraint_node, vocab::SH_ASK)
552        {
553            (SparqlQueryKind::Ask, query.value().to_string())
554        } else {
555            return None;
556        };
557        let (_, query) = canonical_sparql_query(self.shapes, constraint_node, &raw).ok()?;
558        Some(SparqlConstraint {
559            kind,
560            query,
561            path: parsed_path.clone(),
562            shape: Some(node_term_ref(shape)),
563            // The report path resolves messages itself, so the constraint's own
564            // message slot is left empty here.
565            messages: Vec::new(),
566        })
567    }
568
569    /// Batch-evaluate a shape's direct `sh:sparql` constraints over its whole
570    /// focus set before the per-focus walk, so fallback queries run once over a
571    /// `VALUES` table (doc §189) rather than once per focus.
572    fn prefetch_sparql(&self, shape: &NamedOrBlankNode, foci: &[Term]) {
573        if !self.needs_sparql || foci.len() < 2 {
574            return;
575        }
576        let (_, parsed_path) = self.shape_path(shape);
577        for constraint_term in self.shapes.objects(shape, vocab::SH_SPARQL) {
578            let Some(constraint_node) = term_to_node(&constraint_term) else {
579                continue;
580            };
581            if let Some(constraint) =
582                self.build_sparql_constraint(shape, &constraint_node, &parsed_path)
583            {
584                let _ = self.sparql.prefetch_constraint(&constraint, foci);
585            }
586        }
587    }
588
589    fn collect_sparql(
590        &self,
591        shape: &NamedOrBlankNode,
592        focus: &Term,
593        path_term: &Option<Term>,
594        parsed_path: &Option<Path>,
595        out: &mut Vec<ValidationResult>,
596    ) {
597        if !self.needs_sparql {
598            return;
599        }
600        let sparql = &self.sparql;
601        let severity = self.severity(shape);
602        for constraint_term in self.shapes.objects(shape, vocab::SH_SPARQL) {
603            let Some(constraint_node) = term_to_node(&constraint_term) else {
604                continue;
605            };
606            let Some(constraint) =
607                self.build_sparql_constraint(shape, &constraint_node, parsed_path)
608            else {
609                continue;
610            };
611            // Mirror lower.rs §179-184: constraint-node sh:message takes
612            // precedence; absent that, fall back to the owning shape's sh:message.
613            let raw_messages = {
614                let on_constraint = self.shapes.objects(&constraint_node, vocab::SH_MESSAGE);
615                if on_constraint.is_empty() {
616                    self.messages(shape)
617                } else {
618                    on_constraint
619                }
620            };
621            match sparql.constraint_violations(&constraint, focus) {
622                Ok(violations) => {
623                    for violation in violations {
624                        let messages =
625                            substitute_messages(&raw_messages, focus, &violation.bindings);
626                        // SHACL-AF §8.4.1: for SELECT constraints, when ?value is
627                        // not projected, the focus node itself is used as sh:value.
628                        let value = violation.value.or_else(|| match constraint.kind {
629                            SparqlQueryKind::Select => Some(focus.clone()),
630                            SparqlQueryKind::Ask => None,
631                        });
632                        out.push(ValidationResult {
633                            focus: focus.clone(),
634                            path: violation.path.or_else(|| path_term.clone()),
635                            value,
636                            component: vocab::SH_CC_SPARQL.into_owned(),
637                            source_shape: node_term_ref(shape),
638                            severity: severity.clone(),
639                            messages,
640                        });
641                    }
642                }
643                // Runtime failure (e.g. complex-path prebinding is unsupported):
644                // fail closed, matching the algebra validator.
645                Err(_) => out.push(ValidationResult {
646                    focus: focus.clone(),
647                    path: path_term.clone(),
648                    value: None,
649                    component: vocab::SH_CC_SPARQL.into_owned(),
650                    source_shape: node_term_ref(shape),
651                    severity: severity.clone(),
652                    messages: raw_messages,
653                }),
654            }
655        }
656    }
657
658    fn collect_closed(
659        &self,
660        shape: &NamedOrBlankNode,
661        focus: &Term,
662        value_nodes: &[Term],
663        out: &mut Vec<ValidationResult>,
664    ) {
665        if !self.bool(shape, vocab::SH_CLOSED) {
666            return;
667        }
668        let mut allowed = HashSet::new();
669        for prop in self.shapes.objects(shape, vocab::SH_PROPERTY) {
670            let Some(prop) = term_to_node(&prop) else {
671                continue;
672            };
673            if let Some(Term::NamedNode(path)) = self.shapes.object(&prop, vocab::SH_PATH) {
674                allowed.insert(path);
675            }
676        }
677        for list in self.shapes.objects(shape, vocab::SH_IGNORED_PROPERTIES) {
678            for term in self.shapes.read_list(&list) {
679                if let Term::NamedNode(predicate) = term {
680                    allowed.insert(predicate);
681                }
682            }
683        }
684        for value_node in value_nodes {
685            for (predicate, object) in self.frozen().outgoing(value_node) {
686                if allowed.contains(&predicate) {
687                    continue;
688                }
689                out.push(ValidationResult {
690                    focus: focus.clone(),
691                    path: Some(Term::NamedNode(predicate)),
692                    value: Some(object),
693                    component: vocab::SH_CC_CLOSED.into_owned(),
694                    source_shape: node_term_ref(shape),
695                    severity: self.severity(shape),
696                    messages: self.messages(shape),
697                });
698            }
699        }
700    }
701
702    fn collect_property_pairs(
703        &self,
704        shape: &NamedOrBlankNode,
705        focus: &Term,
706        path: &Option<Term>,
707        value_nodes: &[Term],
708        out: &mut Vec<ValidationResult>,
709    ) {
710        for predicate in self.shapes.objects(shape, vocab::SH_EQUALS) {
711            let Term::NamedNode(predicate) = predicate else {
712                continue;
713            };
714            let other = succ(self.frozen(), focus, &Path::Pred(predicate));
715            for value in value_nodes.iter().filter(|value| !other.contains(*value)) {
716                self.push(
717                    out,
718                    shape,
719                    focus,
720                    path.clone(),
721                    Some((*value).clone()),
722                    vocab::SH_CC_EQUALS,
723                );
724            }
725            for value in other.iter().filter(|value| !value_nodes.contains(*value)) {
726                self.push(
727                    out,
728                    shape,
729                    focus,
730                    path.clone(),
731                    Some(value.clone()),
732                    vocab::SH_CC_EQUALS,
733                );
734            }
735        }
736        for predicate in self.shapes.objects(shape, vocab::SH_DISJOINT) {
737            let Term::NamedNode(predicate) = predicate else {
738                continue;
739            };
740            let other = succ(self.frozen(), focus, &Path::Pred(predicate));
741            for value in value_nodes.iter().filter(|value| other.contains(*value)) {
742                self.push(
743                    out,
744                    shape,
745                    focus,
746                    path.clone(),
747                    Some((*value).clone()),
748                    vocab::SH_CC_DISJOINT,
749                );
750            }
751        }
752        for (constraint, component, inclusive) in [
753            (vocab::SH_LESS_THAN, vocab::SH_CC_LESS_THAN, false),
754            (
755                vocab::SH_LESS_THAN_OR_EQUALS,
756                vocab::SH_CC_LESS_THAN_OR_EQUALS,
757                true,
758            ),
759        ] {
760            for predicate in self.shapes.objects(shape, constraint) {
761                let Term::NamedNode(predicate) = predicate else {
762                    continue;
763                };
764                for left in value_nodes {
765                    for right in succ(self.frozen(), focus, &Path::Pred(predicate.clone())) {
766                        let ordering = compare_terms(left, &right);
767                        let passes = ordering == Some(Ordering::Less)
768                            || inclusive && ordering == Some(Ordering::Equal);
769                        if !passes {
770                            self.push(
771                                out,
772                                shape,
773                                focus,
774                                path.clone(),
775                                Some(left.clone()),
776                                component,
777                            );
778                        }
779                    }
780                }
781            }
782        }
783    }
784
785    fn collect_unique_lang(
786        &self,
787        shape: &NamedOrBlankNode,
788        focus: &Term,
789        path: &Option<Term>,
790        value_nodes: &[Term],
791        out: &mut Vec<ValidationResult>,
792    ) {
793        if !self.bool(shape, vocab::SH_UNIQUE_LANG) {
794            return;
795        }
796        let mut counts = HashMap::new();
797        for value in value_nodes {
798            if let Term::Literal(literal) = value
799                && let Some(language) = literal.language()
800            {
801                *counts
802                    .entry(language.to_ascii_lowercase())
803                    .or_insert(0usize) += 1;
804            }
805        }
806        for _ in counts.values().filter(|count| **count > 1) {
807            self.push(
808                out,
809                shape,
810                focus,
811                path.clone(),
812                None,
813                vocab::SH_CC_UNIQUE_LANG,
814            );
815        }
816    }
817
818    fn collect_qualified_counts(
819        &self,
820        shape: &NamedOrBlankNode,
821        focus: &Term,
822        path: &Option<Term>,
823        value_nodes: &[Term],
824        out: &mut Vec<ValidationResult>,
825        visited: &mut Visited,
826    ) {
827        for qualifier in self.shapes.objects(shape, vocab::SH_QUALIFIED_VALUE_SHAPE) {
828            let Some(qualifier) = term_to_node(&qualifier) else {
829                continue;
830            };
831            let siblings = if self.bool(shape, vocab::SH_QUALIFIED_VALUE_SHAPES_DISJOINT) {
832                self.sibling_qualified_shapes(shape, &qualifier)
833            } else {
834                Vec::new()
835            };
836            let count = value_nodes
837                .iter()
838                .filter(|value| {
839                    self.conforms(&qualifier, value, visited)
840                        && siblings
841                            .iter()
842                            .all(|sibling| !self.conforms(sibling, value, visited))
843                })
844                .count() as u64;
845            if let Some(min) = self.int(shape, vocab::SH_QUALIFIED_MIN_COUNT)
846                && count < min
847            {
848                self.push(
849                    out,
850                    shape,
851                    focus,
852                    path.clone(),
853                    None,
854                    vocab::SH_CC_QUALIFIED_MIN_COUNT,
855                );
856            }
857            if let Some(max) = self.int(shape, vocab::SH_QUALIFIED_MAX_COUNT)
858                && count > max
859            {
860                self.push(
861                    out,
862                    shape,
863                    focus,
864                    path.clone(),
865                    None,
866                    vocab::SH_CC_QUALIFIED_MAX_COUNT,
867                );
868            }
869        }
870    }
871
872    fn sibling_qualified_shapes(
873        &self,
874        shape: &NamedOrBlankNode,
875        qualifier: &NamedOrBlankNode,
876    ) -> Vec<NamedOrBlankNode> {
877        let shape_term = node_term_ref(shape);
878        let mut siblings = HashSet::new();
879        for triple in self.shapes.graph.triples_for_predicate(vocab::SH_PROPERTY) {
880            if triple.object != shape_term.as_ref() {
881                continue;
882            }
883            let parent = triple.subject.into_owned();
884            for property in self.shapes.objects(&parent, vocab::SH_PROPERTY) {
885                let Some(property) = term_to_node(&property) else {
886                    continue;
887                };
888                for qualifier in self
889                    .shapes
890                    .objects(&property, vocab::SH_QUALIFIED_VALUE_SHAPE)
891                {
892                    if let Some(qualifier) = term_to_node(&qualifier) {
893                        siblings.insert(qualifier);
894                    }
895                }
896            }
897        }
898        siblings.remove(qualifier);
899        siblings.into_iter().collect()
900    }
901
902    fn push(
903        &self,
904        out: &mut Vec<ValidationResult>,
905        shape: &NamedOrBlankNode,
906        focus: &Term,
907        path: Option<Term>,
908        value: Option<Term>,
909        component: NamedNodeRef<'static>,
910    ) {
911        let mut bindings = HashMap::new();
912        if let Some(v) = &value {
913            bindings.insert("value".to_string(), v.clone());
914        }
915        if let Some(p) = &path {
916            bindings.insert("path".to_string(), p.clone());
917        }
918        let raw = self.messages(shape);
919        let messages = substitute_messages(&raw, focus, &bindings);
920        out.push(ValidationResult {
921            focus: focus.clone(),
922            path,
923            value,
924            component: component.into_owned(),
925            source_shape: node_term_ref(shape),
926            severity: self.severity(shape),
927            messages,
928        });
929    }
930
931    /// Read `sh:message` values from `shape` to propagate as `sh:resultMessage`.
932    fn messages(&self, shape: &NamedOrBlankNode) -> Vec<Term> {
933        self.shapes.objects(shape, vocab::SH_MESSAGE)
934    }
935
936    fn conforms(&self, shape: &NamedOrBlankNode, focus: &Term, visited: &mut Visited) -> bool {
937        let mut scratch = Vec::new();
938        self.collect(shape, focus, &mut scratch, visited);
939        scratch.is_empty()
940    }
941
942    /// Each value-scoped constraint component on `shape` and whether it holds at
943    /// value node `u`. `sh:and`/`or`/`not`/`node` report as a unit.
944    fn value_checks(
945        &self,
946        shape: &NamedOrBlankNode,
947        u: &Term,
948        visited: &mut Visited,
949    ) -> Vec<(NamedNode, bool)> {
950        let mut checks = Vec::new();
951
952        for c in self.shapes.objects(shape, vocab::SH_CLASS) {
953            checks.push((vocab::SH_CC_CLASS.into_owned(), self.is_instance(u, &c)));
954        }
955        for d in self.shapes.objects(shape, vocab::SH_DATATYPE) {
956            if let Term::NamedNode(dt) = d {
957                let ok = value_type_holds(&ValueType::Datatype(dt), u);
958                checks.push((vocab::SH_CC_DATATYPE.into_owned(), ok));
959            }
960        }
961        for k in self.shapes.objects(shape, vocab::SH_NODE_KIND) {
962            if let Some(set) = map_node_kind(&k) {
963                checks.push((vocab::SH_CC_NODE_KIND.into_owned(), set.matches(u)));
964            }
965        }
966        // numeric ranges (each bound is its own component)
967        for (pred_iri, comp, inclusive) in [
968            (vocab::SH_MIN_INCLUSIVE, vocab::SH_CC_MIN_INCLUSIVE, true),
969            (vocab::SH_MIN_EXCLUSIVE, vocab::SH_CC_MIN_EXCLUSIVE, false),
970        ] {
971            if let Some(Term::Literal(b)) = self.shapes.object(shape, pred_iri) {
972                let vt = ValueType::NumericRange {
973                    lo: Some(Bound {
974                        value: b,
975                        inclusive,
976                    }),
977                    hi: None,
978                };
979                checks.push((comp.into_owned(), value_type_holds(&vt, u)));
980            }
981        }
982        for (pred_iri, comp, inclusive) in [
983            (vocab::SH_MAX_INCLUSIVE, vocab::SH_CC_MAX_INCLUSIVE, true),
984            (vocab::SH_MAX_EXCLUSIVE, vocab::SH_CC_MAX_EXCLUSIVE, false),
985        ] {
986            if let Some(Term::Literal(b)) = self.shapes.object(shape, pred_iri) {
987                let vt = ValueType::NumericRange {
988                    lo: None,
989                    hi: Some(Bound {
990                        value: b,
991                        inclusive,
992                    }),
993                };
994                checks.push((comp.into_owned(), value_type_holds(&vt, u)));
995            }
996        }
997        // length / pattern
998        let min_len = self.int(shape, vocab::SH_MIN_LENGTH);
999        let max_len = self.int(shape, vocab::SH_MAX_LENGTH);
1000        if let Some(m) = min_len {
1001            let vt = ValueType::Length {
1002                min: Some(m),
1003                max: None,
1004            };
1005            checks.push((
1006                vocab::SH_CC_MIN_LENGTH.into_owned(),
1007                value_type_holds(&vt, u),
1008            ));
1009        }
1010        if let Some(m) = max_len {
1011            let vt = ValueType::Length {
1012                min: None,
1013                max: Some(m),
1014            };
1015            checks.push((
1016                vocab::SH_CC_MAX_LENGTH.into_owned(),
1017                value_type_holds(&vt, u),
1018            ));
1019        }
1020        if let Some(Term::Literal(re)) = self.shapes.object(shape, vocab::SH_PATTERN) {
1021            let flags = match self.shapes.object(shape, vocab::SH_FLAGS) {
1022                Some(Term::Literal(f)) => f.value().to_string(),
1023                _ => String::new(),
1024            };
1025            let vt = ValueType::Pattern {
1026                regex: re.value().to_string(),
1027                flags,
1028            };
1029            checks.push((vocab::SH_CC_PATTERN.into_owned(), value_type_holds(&vt, u)));
1030        }
1031        // sh:in
1032        for list in self.shapes.objects(shape, vocab::SH_IN) {
1033            let members = self.shapes.read_list(&list);
1034            checks.push((vocab::SH_CC_IN.into_owned(), members.contains(u)));
1035        }
1036        for list in self.shapes.objects(shape, vocab::SH_LANGUAGE_IN) {
1037            let languages = self
1038                .shapes
1039                .read_list(&list)
1040                .into_iter()
1041                .filter_map(|term| match term {
1042                    Term::Literal(literal) => Some(literal.value().to_string()),
1043                    _ => None,
1044                })
1045                .collect();
1046            checks.push((
1047                vocab::SH_CC_LANGUAGE_IN.into_owned(),
1048                value_type_holds(&ValueType::LangIn(languages), u),
1049            ));
1050        }
1051
1052        // logical (unit results)
1053        for list in self.shapes.objects(shape, vocab::SH_AND) {
1054            let ok = self
1055                .shapes
1056                .read_list(&list)
1057                .iter()
1058                .filter_map(term_to_node)
1059                .all(|m| self.conforms(&m, u, visited));
1060            checks.push((vocab::SH_CC_AND.into_owned(), ok));
1061        }
1062        for list in self.shapes.objects(shape, vocab::SH_OR) {
1063            let ok = self
1064                .shapes
1065                .read_list(&list)
1066                .iter()
1067                .filter_map(term_to_node)
1068                .any(|m| self.conforms(&m, u, visited));
1069            checks.push((vocab::SH_CC_OR.into_owned(), ok));
1070        }
1071        for list in self.shapes.objects(shape, vocab::SH_XONE) {
1072            let count = self
1073                .shapes
1074                .read_list(&list)
1075                .iter()
1076                .filter_map(term_to_node)
1077                .filter(|m| self.conforms(m, u, visited))
1078                .count();
1079            checks.push((vocab::SH_CC_XONE.into_owned(), count == 1));
1080        }
1081        for n in self.shapes.objects(shape, vocab::SH_NOT) {
1082            if let Some(nn) = term_to_node(&n) {
1083                checks.push((
1084                    vocab::SH_CC_NOT.into_owned(),
1085                    !self.conforms(&nn, u, visited),
1086                ));
1087            }
1088        }
1089        for n in self.shapes.objects(shape, vocab::SH_NODE) {
1090            if let Some(nn) = term_to_node(&n) {
1091                checks.push((
1092                    vocab::SH_CC_NODE.into_owned(),
1093                    self.conforms(&nn, u, visited),
1094                ));
1095            }
1096        }
1097
1098        checks
1099    }
1100
1101    fn is_instance(&self, u: &Term, class: &Term) -> bool {
1102        succ(self.frozen(), u, &class_path()).contains(class)
1103    }
1104
1105    fn int(&self, s: &NamedOrBlankNode, p: NamedNodeRef) -> Option<u64> {
1106        match self.shapes.object(s, p) {
1107            Some(Term::Literal(l)) => l.value().parse().ok(),
1108            _ => None,
1109        }
1110    }
1111
1112    fn bool(&self, s: &NamedOrBlankNode, p: NamedNodeRef) -> bool {
1113        matches!(
1114            self.shapes.object(s, p),
1115            Some(Term::Literal(ref literal)) if matches!(literal.value(), "true" | "1")
1116        )
1117    }
1118
1119    /// `sh:resultSeverity` for results from `shape`: its declared `sh:severity`
1120    /// (an IRI such as `sh:Warning`/`sh:Info`), defaulting to `sh:Violation`.
1121    fn severity(&self, shape: &NamedOrBlankNode) -> NamedNode {
1122        match self.shapes.object(shape, vocab::SH_SEVERITY) {
1123            Some(Term::NamedNode(n)) => n,
1124            _ => vocab::SH_VIOLATION.into_owned(),
1125        }
1126    }
1127}
1128
1129fn is_shape_node(shapes: &Loaded, node: &NamedOrBlankNode) -> bool {
1130    shapes.has_type(node, vocab::SH_NODE_SHAPE)
1131        || shapes.has_type(node, vocab::SH_PROPERTY_SHAPE)
1132        || [
1133            vocab::SH_PROPERTY,
1134            vocab::SH_NODE,
1135            vocab::SH_AND,
1136            vocab::SH_OR,
1137            vocab::SH_NOT,
1138            vocab::SH_XONE,
1139            vocab::SH_DATATYPE,
1140            vocab::SH_CLASS,
1141            vocab::SH_NODE_KIND,
1142            vocab::SH_IN,
1143            vocab::SH_HAS_VALUE,
1144            vocab::SH_PROPERTY,
1145        ]
1146        .iter()
1147        .any(|predicate| shapes.object(node, *predicate).is_some())
1148}
1149
1150/// Whether any `sh:select` / `sh:ask` query references `$shapesGraph`, so the
1151/// shapes graph must be mirrored into a named graph for evaluation.
1152fn shapes_reference_shapes_graph(shapes: &Loaded) -> bool {
1153    [vocab::SH_SELECT, vocab::SH_ASK].iter().any(|predicate| {
1154        shapes.graph.triples_for_predicate(*predicate).any(
1155            |t| matches!(t.object, oxrdf::TermRef::Literal(l) if l.value().contains("shapesGraph")),
1156        )
1157    })
1158}
1159
1160fn class_path() -> Path {
1161    Path::seq(vec![
1162        Path::Pred(vocab::rdf_type()),
1163        Path::star(Path::Pred(vocab::rdfs_subclassof())),
1164    ])
1165}
1166
1167/// Index `class → focus-data instances` under `rdf:type / rdfs:subClassOf*`.
1168///
1169/// One pass over the `rdf:type` triples replaces the per-shape forward scan
1170/// (`graph_nodes(data).filter(node is instance of c)`), which was
1171/// `O(shapes × nodes × type-closure)`. Each instance is attributed to every
1172/// superclass of its declared type, and the reflexive `subClassOf*` closure of
1173/// each distinct type is computed at most once. Only nodes present in the focus
1174/// (data) graph are indexed, matching the original target-selection semantics.
1175fn build_class_index(
1176    focus_data: &Graph,
1177    frozen: &FrozenIndexedDataset,
1178) -> HashMap<Term, Vec<Term>> {
1179    let focus_nodes = graph_nodes(focus_data);
1180    let subclass_star = Path::star(Path::Pred(vocab::rdfs_subclassof()));
1181    let mut supers: HashMap<Term, Vec<Term>> = HashMap::new();
1182    let mut index: HashMap<Term, Vec<Term>> = HashMap::new();
1183    let mut seen: HashSet<(Term, Term)> = HashSet::new();
1184    for (node, ty) in frozen.triples_for_predicate(&vocab::rdf_type()) {
1185        if !focus_nodes.contains(&node) {
1186            continue;
1187        }
1188        let classes = supers
1189            .entry(ty.clone())
1190            .or_insert_with(|| succ(frozen, &ty, &subclass_star).into_iter().collect());
1191        for class in classes.iter() {
1192            if seen.insert((class.clone(), node.clone())) {
1193                index.entry(class.clone()).or_default().push(node.clone());
1194            }
1195        }
1196    }
1197    index
1198}
1199
1200fn graph_nodes(graph: &Graph) -> HashSet<Term> {
1201    let mut nodes = HashSet::new();
1202    for triple in graph.iter() {
1203        nodes.insert(node_term(triple.subject));
1204        nodes.insert(triple.object.into_owned());
1205    }
1206    nodes
1207}
1208
1209fn node_term(s: oxrdf::NamedOrBlankNodeRef) -> Term {
1210    crate::path::term_of(s.into_owned())
1211}
1212
1213fn node_term_ref(s: &NamedOrBlankNode) -> Term {
1214    match s {
1215        NamedOrBlankNode::NamedNode(n) => Term::NamedNode(n.clone()),
1216        NamedOrBlankNode::BlankNode(b) => Term::BlankNode(b.clone()),
1217    }
1218}
1219
1220fn map_node_kind(term: &Term) -> Option<NodeKindSet> {
1221    let Term::NamedNode(n) = term else {
1222        return None;
1223    };
1224    let r = n.as_ref();
1225    Some(if r == vocab::SH_IRI {
1226        NodeKindSet::IRI
1227    } else if r == vocab::SH_BLANK_NODE {
1228        NodeKindSet::BLANK_NODE
1229    } else if r == vocab::SH_LITERAL {
1230        NodeKindSet::LITERAL
1231    } else if r == vocab::SH_BLANK_NODE_OR_IRI {
1232        NodeKindSet::BLANK_NODE_OR_IRI
1233    } else if r == vocab::SH_BLANK_NODE_OR_LITERAL {
1234        NodeKindSet::BLANK_NODE_OR_LITERAL
1235    } else if r == vocab::SH_IRI_OR_LITERAL {
1236        NodeKindSet::IRI_OR_LITERAL
1237    } else {
1238        return None;
1239    })
1240}