Skip to main content

shifty_engine/
report.rs

1//! W3C `sh:ValidationReport` generation (component-granular, RDF-driven).
2//!
3//! Producing a spec-faithful report needs provenance the optimized algebra
4//! discards: each result carries `sh:sourceConstraintComponent`,
5//! `sh:sourceShape`, and `sh:resultPath`, and the granularity is one result per
6//! (focus, value node, component) — `sh:and`/`sh:or`/`sh:not`/`sh:node` report
7//! as a *unit* (they do not drill into sub-failures), while `sh:property`
8//! delegates to the nested shape. So this validator walks the shapes graph
9//! directly, reusing only the leaf evaluation primitives (`succ`,
10//! `value_type_holds`). It is separate from the algebra path used for fast
11//! conformance.
12//!
13//! Coverage is a growing subset of SHACL Core (see `docs/BACKLOG.md`).
14
15use crate::frozen::FrozenIndexedDataset;
16use crate::path::succ;
17use crate::sparql::SparqlExecutor;
18use crate::validate::{ValidationGraphMode, apply_message_template, graph_union};
19use crate::value::{compare_terms, value_type_holds};
20use oxrdf::{BlankNode, Graph, Literal, NamedNode, NamedNodeRef, NamedOrBlankNode, Term, Triple};
21use shifty_algebra::value_type::{Bound, ValueType};
22use shifty_algebra::{NodeKindSet, Path, SparqlConstraint, SparqlQueryKind};
23use shifty_parse::graph::{Loaded, term_to_node};
24use shifty_parse::lower::canonical_sparql_query;
25use shifty_parse::path::parse_path;
26use shifty_parse::vocab;
27use std::cell::RefCell;
28use std::cmp::Ordering;
29use std::collections::{HashMap, HashSet};
30
31/// One `sh:ValidationResult`.
32#[derive(Debug, Clone, PartialEq, Eq, Hash)]
33pub struct ValidationResult {
34    pub focus: Term,
35    /// `sh:resultPath` as the original RDF node (predicate IRI for simple paths).
36    pub path: Option<Term>,
37    pub value: Option<Term>,
38    pub component: NamedNode,
39    pub source_shape: Term,
40    /// `sh:resultSeverity` — the `sh:severity` declared on the source shape,
41    /// defaulting to `sh:Violation`.
42    pub severity: NamedNode,
43    /// `sh:resultMessage` — copied from `sh:message` on the source shape.
44    pub messages: Vec<Term>,
45}
46
47#[derive(Debug, Clone)]
48pub struct ValidationReport {
49    pub conforms: bool,
50    pub results: Vec<ValidationResult>,
51}
52
53/// Validate `data` against the shapes in `shapes`, producing a W3C report.
54pub fn validate_report(shapes: &Loaded, data: &Graph) -> ValidationReport {
55    let has_shapes_graph = shapes_reference_shapes_graph(shapes);
56    let frozen = if has_shapes_graph {
57        FrozenIndexedDataset::from_graphs(data, &shapes.graph)
58    } else {
59        FrozenIndexedDataset::from_graph(data)
60    };
61    validate_report_context(shapes, data, frozen, has_shapes_graph)
62}
63
64/// Validate split data and shapes graphs using the selected graph mode.
65pub fn validate_report_graphs(shapes: &Loaded, data: &Graph) -> ValidationReport {
66    validate_report_graphs_with_mode(shapes, data, ValidationGraphMode::default())
67}
68
69/// Validate split data and shapes graphs using an explicit graph mode.
70pub fn validate_report_graphs_with_mode(
71    shapes: &Loaded,
72    data: &Graph,
73    mode: ValidationGraphMode,
74) -> ValidationReport {
75    let has_shapes_graph = shapes_reference_shapes_graph(shapes);
76    match mode {
77        ValidationGraphMode::Data => {
78            let frozen = if has_shapes_graph {
79                FrozenIndexedDataset::from_graphs(data, &shapes.graph)
80            } else {
81                FrozenIndexedDataset::from_graph(data)
82            };
83            validate_report_context(shapes, data, frozen, has_shapes_graph)
84        }
85        ValidationGraphMode::Union => {
86            let frozen = if has_shapes_graph {
87                FrozenIndexedDataset::from_graph_union_with_shapes(data, &shapes.graph)
88            } else {
89                FrozenIndexedDataset::from_graph_union(data, &shapes.graph)
90            };
91            validate_report_context(shapes, data, frozen, has_shapes_graph)
92        }
93        ValidationGraphMode::UnionAll => {
94            let union = graph_union(data, &shapes.graph);
95            let frozen = if has_shapes_graph {
96                FrozenIndexedDataset::from_graphs(&union, &shapes.graph)
97            } else {
98                FrozenIndexedDataset::from_graph(&union)
99            };
100            validate_report_context(shapes, &union, frozen, has_shapes_graph)
101        }
102    }
103}
104
105fn validate_report_context(
106    shapes: &Loaded,
107    focus_data: &Graph,
108    frozen: FrozenIndexedDataset,
109    has_shapes_graph: bool,
110) -> ValidationReport {
111    // Only execute SPARQL target/constraint work when the shapes graph contains
112    // those features. Query execution shares the frozen validation dataset.
113    let needs_sparql = shapes
114        .graph
115        .triples_for_predicate(vocab::SH_SPARQL)
116        .next()
117        .is_some()
118        || shapes
119            .graph
120            .triples_for_predicate(vocab::SH_TARGET)
121            .next()
122            .is_some();
123    let sparql = SparqlExecutor::from_frozen(frozen, needs_sparql && has_shapes_graph);
124    // Index class membership once (instead of a forward scan over every node per
125    // class-target shape): this is the report path's analogue of the plan's
126    // backward `PathToConst` focus source, amortized across all shapes.
127    let has_explicit_class_target = shapes
128        .graph
129        .triples_for_predicate(vocab::SH_TARGET_CLASS)
130        .next()
131        .is_some();
132    let has_implicit_class_target = shapes.graph.iter().any(|triple| {
133        let subject = triple.subject.into_owned();
134        is_shape_node(shapes, &subject)
135            && (shapes.is_instance_of(&subject, vocab::RDFS_CLASS)
136                || shapes.is_instance_of(&subject, vocab::OWL_CLASS))
137    });
138    let needs_class_index = has_explicit_class_target || has_implicit_class_target;
139    let class_index = if needs_class_index {
140        build_class_index(
141            focus_data,
142            sparql
143                .frozen()
144                .expect("report validation always has a frozen dataset"),
145        )
146    } else {
147        HashMap::new()
148    };
149    let r = Reporter {
150        shapes,
151        focus_data,
152        sparql,
153        needs_sparql,
154        class_index,
155        path_cache: RefCell::new(HashMap::new()),
156    };
157    let mut results = Vec::new();
158    for shape in r.target_shapes() {
159        let foci = r.focus_nodes(&shape);
160        r.prefetch_sparql(&shape, &foci);
161        for focus in &foci {
162            let mut visited = HashSet::new();
163            r.collect(&shape, focus, &mut results, &mut visited);
164        }
165    }
166    ValidationReport {
167        conforms: results.is_empty(),
168        results,
169    }
170}
171
172/// Serialize a report as an RDF `sh:ValidationReport` graph (W3C shape).
173pub fn report_to_graph(report: &ValidationReport) -> Graph {
174    let mut g = Graph::new();
175    let root = BlankNode::default();
176    let t = |s: NamedOrBlankNode, p: NamedNodeRef, o: Term| Triple::new(s, p.into_owned(), o);
177
178    g.insert(&t(
179        root.clone().into(),
180        vocab::RDF_TYPE,
181        vocab::SH_VALIDATION_REPORT.into_owned().into(),
182    ));
183    g.insert(&t(
184        root.clone().into(),
185        vocab::SH_CONFORMS,
186        Literal::from(report.conforms).into(),
187    ));
188
189    for r in &report.results {
190        let rn = BlankNode::default();
191        g.insert(&t(root.clone().into(), vocab::SH_RESULT, rn.clone().into()));
192        g.insert(&t(
193            rn.clone().into(),
194            vocab::RDF_TYPE,
195            vocab::SH_VALIDATION_RESULT.into_owned().into(),
196        ));
197        g.insert(&t(rn.clone().into(), vocab::SH_FOCUS_NODE, r.focus.clone()));
198        if let Some(path) = &r.path {
199            g.insert(&t(rn.clone().into(), vocab::SH_RESULT_PATH, path.clone()));
200        }
201        if let Some(value) = &r.value {
202            g.insert(&t(rn.clone().into(), vocab::SH_VALUE, value.clone()));
203        }
204        g.insert(&t(
205            rn.clone().into(),
206            vocab::SH_RESULT_SEVERITY,
207            r.severity.clone().into(),
208        ));
209        g.insert(&t(
210            rn.clone().into(),
211            vocab::SH_SOURCE_CONSTRAINT_COMPONENT,
212            r.component.clone().into(),
213        ));
214        for msg in &r.messages {
215            g.insert(&t(rn.clone().into(), vocab::SH_RESULT_MESSAGE, msg.clone()));
216        }
217        g.insert(&t(
218            rn.into(),
219            vocab::SH_SOURCE_SHAPE,
220            r.source_shape.clone(),
221        ));
222    }
223    g
224}
225
226/// Substitute `{$varName}` / `{?varName}` placeholders in `sh:message` literals.
227///
228/// `$this` is resolved from `focus`; all other names are looked up in
229/// `bindings` (keyed without the `$`/`?` sigil). Unresolved placeholders are
230/// left as-is. Only `sh:Literal` messages are processed; IRI/blank-node
231/// message terms pass through unchanged.
232fn substitute_messages(
233    messages: &[Term],
234    focus: &Term,
235    bindings: &HashMap<String, Term>,
236) -> Vec<Term> {
237    messages
238        .iter()
239        .map(|msg| {
240            let Term::Literal(lit) = msg else {
241                return msg.clone();
242            };
243            let text = lit.value();
244            let substituted = apply_message_template(text, focus, bindings);
245            if substituted == text {
246                msg.clone()
247            } else {
248                Term::Literal(Literal::new_simple_literal(&substituted))
249            }
250        })
251        .collect()
252}
253
254struct Reporter<'a> {
255    shapes: &'a Loaded,
256    focus_data: &'a Graph,
257    sparql: SparqlExecutor,
258    needs_sparql: bool,
259    /// `class → focus-data instances` under `rdf:type / rdfs:subClassOf*`, built
260    /// once and shared by every `sh:targetClass` / implicit-class lookup.
261    class_index: HashMap<Term, Vec<Term>>,
262    /// Parsed `sh:path` per shape node, so `collect` does not re-parse the path
263    /// RDF on every (shape, focus) visit. `None` = shape has no/invalid path.
264    path_cache: RefCell<HashMap<NamedOrBlankNode, PathCacheEntry>>,
265}
266
267type Visited = HashSet<(NamedOrBlankNode, Term)>;
268
269/// Cached parsed path and its term representation for sh:path expressions
270type PathCacheEntry = (Option<Term>, Option<Path>);
271
272impl Reporter<'_> {
273    fn frozen(&self) -> &FrozenIndexedDataset {
274        self.sparql
275            .frozen()
276            .expect("report validation always has a frozen dataset")
277    }
278
279    fn target_shapes(&self) -> Vec<NamedOrBlankNode> {
280        let mut found: HashSet<NamedOrBlankNode> = HashSet::new();
281        for t in self.shapes.graph.iter() {
282            let p = t.predicate;
283            if p == vocab::SH_TARGET_NODE
284                || p == vocab::SH_TARGET_CLASS
285                || p == vocab::SH_TARGET_SUBJECTS_OF
286                || p == vocab::SH_TARGET_OBJECTS_OF
287            {
288                found.insert(t.subject.into_owned());
289            }
290            // SPARQL-based target: sh:target [ sh:select "…" ]
291            if p == vocab::SH_TARGET
292                && let Some(target) = term_to_node(&t.object.into_owned())
293                && self.shapes.object(&target, vocab::SH_SELECT).is_some()
294            {
295                found.insert(t.subject.into_owned());
296            }
297            // implicit class target: a shape that is also an rdfs:Class / owl:Class
298            if p == vocab::RDF_TYPE {
299                let s = t.subject.into_owned();
300                if self.is_class(&s) && self.is_shape(&s) {
301                    found.insert(s);
302                }
303            }
304        }
305        let mut v: Vec<_> = found.into_iter().collect();
306        v.sort_by_key(|n| n.to_string());
307        v
308    }
309
310    /// Does this node look like a SHACL shape (so its class-ness implies a target)?
311    fn is_shape(&self, n: &NamedOrBlankNode) -> bool {
312        is_shape_node(self.shapes, n)
313    }
314
315    fn is_class(&self, n: &NamedOrBlankNode) -> bool {
316        self.shapes.is_instance_of(n, vocab::RDFS_CLASS)
317            || self.shapes.is_instance_of(n, vocab::OWL_CLASS)
318    }
319
320    fn deactivated(&self, n: &NamedOrBlankNode) -> bool {
321        matches!(self.shapes.object(n, vocab::SH_DEACTIVATED),
322            Some(Term::Literal(ref l)) if l.value() == "true")
323    }
324
325    fn focus_nodes(&self, shape: &NamedOrBlankNode) -> Vec<Term> {
326        let mut nodes = Vec::new();
327        nodes.extend(self.shapes.objects(shape, vocab::SH_TARGET_NODE));
328        for c in self.shapes.objects(shape, vocab::SH_TARGET_CLASS) {
329            if let Some(instances) = self.class_index.get(&c) {
330                nodes.extend(instances.iter().cloned());
331            }
332        }
333        for p in self.shapes.objects(shape, vocab::SH_TARGET_SUBJECTS_OF) {
334            if let Term::NamedNode(n) = p {
335                nodes.extend(
336                    self.focus_data
337                        .triples_for_predicate(n.as_ref())
338                        .map(|t| node_term(t.subject)),
339                );
340            }
341        }
342        for p in self.shapes.objects(shape, vocab::SH_TARGET_OBJECTS_OF) {
343            if let Term::NamedNode(n) = p {
344                nodes.extend(
345                    self.focus_data
346                        .triples_for_predicate(n.as_ref())
347                        .map(|t| t.object.into_owned()),
348                );
349            }
350        }
351        // SPARQL-based targets: sh:target [ sh:select "…" ]. The query selects
352        // `?this` focus nodes from the context store.
353        if self.needs_sparql {
354            let exec = &self.sparql;
355            for target in self.shapes.objects(shape, vocab::SH_TARGET) {
356                let Some(target_node) = term_to_node(&target) else {
357                    continue;
358                };
359                let Some(Term::Literal(query)) = self.shapes.object(&target_node, vocab::SH_SELECT)
360                else {
361                    continue;
362                };
363                // Drop targets that fail to canonicalize, matching the lowering path.
364                let Ok((_, canonical)) =
365                    canonical_sparql_query(self.shapes, &target_node, query.value())
366                else {
367                    continue;
368                };
369                if let Ok(found) = exec.target_nodes(&canonical) {
370                    nodes.extend(found);
371                }
372            }
373        }
374        // implicit class target: instances of the shape (which is also a class)
375        if let NamedOrBlankNode::NamedNode(n) = shape
376            && self.is_class(shape)
377        {
378            let class = Term::NamedNode(n.clone());
379            if let Some(instances) = self.class_index.get(&class) {
380                nodes.extend(instances.iter().cloned());
381            }
382        }
383        let mut seen = HashSet::new();
384        nodes.retain(|t| seen.insert(t.clone()));
385        nodes
386    }
387
388    /// The shape's `sh:path` as both its raw RDF node (for `sh:resultPath`) and
389    /// the parsed path algebra, memoized so repeated visits don't re-parse it.
390    fn shape_path(&self, shape: &NamedOrBlankNode) -> (Option<Term>, Option<Path>) {
391        if let Some(cached) = self.path_cache.borrow().get(shape) {
392            return cached.clone();
393        }
394        let path_term = self.shapes.object(shape, vocab::SH_PATH);
395        let parsed = path_term
396            .as_ref()
397            .and_then(|t| parse_path(self.shapes, t).ok());
398        let entry = (path_term, parsed);
399        self.path_cache
400            .borrow_mut()
401            .insert(shape.clone(), entry.clone());
402        entry
403    }
404
405    /// Collect the results of validating `focus` against `shape`.
406    fn collect(
407        &self,
408        shape: &NamedOrBlankNode,
409        focus: &Term,
410        out: &mut Vec<ValidationResult>,
411        visited: &mut Visited,
412    ) {
413        if self.deactivated(shape) {
414            return; // deactivated shapes produce no results
415        }
416        let key = (shape.clone(), focus.clone());
417        if !visited.insert(key.clone()) {
418            return; // recursion: conform on the back-edge (gfp)
419        }
420
421        let (path_term, parsed) = self.shape_path(shape);
422        let value_nodes: Vec<Term> = match &parsed {
423            Some(p) => succ(self.frozen(), focus, p).into_iter().collect(),
424            None => vec![focus.clone()],
425        };
426        let severity = self.severity(shape);
427        let messages = self.messages(shape);
428        let push = |out: &mut Vec<ValidationResult>, value, component| {
429            out.push(ValidationResult {
430                focus: focus.clone(),
431                path: path_term.clone(),
432                value,
433                component,
434                source_shape: node_term_ref(shape),
435                severity: severity.clone(),
436                messages: messages.clone(),
437            });
438        };
439
440        // cardinality (only meaningful with a path)
441        if parsed.is_some() {
442            if let Some(min) = self.int(shape, vocab::SH_MIN_COUNT)
443                && (value_nodes.len() as u64) < min
444            {
445                push(out, None, vocab::SH_CC_MIN_COUNT.into_owned());
446            }
447            if let Some(max) = self.int(shape, vocab::SH_MAX_COUNT)
448                && (value_nodes.len() as u64) > max
449            {
450                push(out, None, vocab::SH_CC_MAX_COUNT.into_owned());
451            }
452        }
453
454        // sh:hasValue — one of the value nodes must equal the constant
455        for hv in self.shapes.objects(shape, vocab::SH_HAS_VALUE) {
456            if !value_nodes.contains(&hv) {
457                push(out, None, vocab::SH_CC_HAS_VALUE.into_owned());
458            }
459        }
460
461        self.collect_closed(shape, focus, &value_nodes, out);
462        self.collect_property_pairs(shape, focus, &path_term, &value_nodes, out);
463        self.collect_unique_lang(shape, focus, &path_term, &value_nodes, out);
464        self.collect_qualified_counts(shape, focus, &path_term, &value_nodes, out, visited);
465
466        // value-scoped components
467        for u in &value_nodes {
468            for (component, ok) in self.value_checks(shape, u, visited) {
469                if !ok {
470                    push(out, Some(u.clone()), component);
471                }
472            }
473        }
474
475        // nested property shapes: delegate (each value node is a focus for P)
476        for prop in self.shapes.objects(shape, vocab::SH_PROPERTY) {
477            if let Some(pn) = term_to_node(&prop) {
478                for u in &value_nodes {
479                    self.collect(&pn, u, out, visited);
480                }
481            }
482        }
483
484        self.collect_sparql(shape, focus, &path_term, &parsed, out);
485
486        visited.remove(&key);
487    }
488
489    /// `sh:sparql` constraints (SHACL-SPARQL). Each `SELECT`/`ASK` query runs for
490    /// the focus node against the context store; every solution (or a `true`
491    /// `ASK`) is one `sh:SPARQLConstraintComponent` result. A `value`/`path`
492    /// projected by the query overrides the value node / `sh:resultPath`.
493    /// Build the [`SparqlConstraint`] for a `sh:sparql` constraint node, applying
494    /// the same canonicalization the lowering path uses. `None` when the node has
495    /// neither `sh:select` nor `sh:ask`, or when canonicalization fails (matching
496    /// the lowering path, which omits such constraints with a diagnostic).
497    fn build_sparql_constraint(
498        &self,
499        shape: &NamedOrBlankNode,
500        constraint_node: &NamedOrBlankNode,
501        parsed_path: &Option<Path>,
502    ) -> Option<SparqlConstraint> {
503        let (kind, raw) = if let Some(Term::Literal(query)) =
504            self.shapes.object(constraint_node, vocab::SH_SELECT)
505        {
506            (SparqlQueryKind::Select, query.value().to_string())
507        } else if let Some(Term::Literal(query)) =
508            self.shapes.object(constraint_node, vocab::SH_ASK)
509        {
510            (SparqlQueryKind::Ask, query.value().to_string())
511        } else {
512            return None;
513        };
514        let (_, query) = canonical_sparql_query(self.shapes, constraint_node, &raw).ok()?;
515        Some(SparqlConstraint {
516            kind,
517            query,
518            path: parsed_path.clone(),
519            shape: Some(node_term_ref(shape)),
520            // The report path resolves messages itself, so the constraint's own
521            // message slot is left empty here.
522            messages: Vec::new(),
523        })
524    }
525
526    /// Batch-evaluate a shape's direct `sh:sparql` constraints over its whole
527    /// focus set before the per-focus walk, so fallback queries run once over a
528    /// `VALUES` table (doc §189) rather than once per focus.
529    fn prefetch_sparql(&self, shape: &NamedOrBlankNode, foci: &[Term]) {
530        if !self.needs_sparql || foci.len() < 2 {
531            return;
532        }
533        let (_, parsed_path) = self.shape_path(shape);
534        for constraint_term in self.shapes.objects(shape, vocab::SH_SPARQL) {
535            let Some(constraint_node) = term_to_node(&constraint_term) else {
536                continue;
537            };
538            if let Some(constraint) =
539                self.build_sparql_constraint(shape, &constraint_node, &parsed_path)
540            {
541                let _ = self.sparql.prefetch_constraint(&constraint, foci);
542            }
543        }
544    }
545
546    fn collect_sparql(
547        &self,
548        shape: &NamedOrBlankNode,
549        focus: &Term,
550        path_term: &Option<Term>,
551        parsed_path: &Option<Path>,
552        out: &mut Vec<ValidationResult>,
553    ) {
554        if !self.needs_sparql {
555            return;
556        }
557        let sparql = &self.sparql;
558        let severity = self.severity(shape);
559        for constraint_term in self.shapes.objects(shape, vocab::SH_SPARQL) {
560            let Some(constraint_node) = term_to_node(&constraint_term) else {
561                continue;
562            };
563            let Some(constraint) =
564                self.build_sparql_constraint(shape, &constraint_node, parsed_path)
565            else {
566                continue;
567            };
568            // Mirror lower.rs §179-184: constraint-node sh:message takes
569            // precedence; absent that, fall back to the owning shape's sh:message.
570            let raw_messages = {
571                let on_constraint = self.shapes.objects(&constraint_node, vocab::SH_MESSAGE);
572                if on_constraint.is_empty() {
573                    self.messages(shape)
574                } else {
575                    on_constraint
576                }
577            };
578            match sparql.constraint_violations(&constraint, focus) {
579                Ok(violations) => {
580                    for violation in violations {
581                        let messages =
582                            substitute_messages(&raw_messages, focus, &violation.bindings);
583                        out.push(ValidationResult {
584                            focus: focus.clone(),
585                            path: violation.path.or_else(|| path_term.clone()),
586                            value: violation.value,
587                            component: vocab::SH_CC_SPARQL.into_owned(),
588                            source_shape: node_term_ref(shape),
589                            severity: severity.clone(),
590                            messages,
591                        });
592                    }
593                }
594                // Runtime failure (e.g. complex-path prebinding is unsupported):
595                // fail closed, matching the algebra validator.
596                Err(_) => out.push(ValidationResult {
597                    focus: focus.clone(),
598                    path: path_term.clone(),
599                    value: None,
600                    component: vocab::SH_CC_SPARQL.into_owned(),
601                    source_shape: node_term_ref(shape),
602                    severity: severity.clone(),
603                    messages: raw_messages,
604                }),
605            }
606        }
607    }
608
609    fn collect_closed(
610        &self,
611        shape: &NamedOrBlankNode,
612        focus: &Term,
613        value_nodes: &[Term],
614        out: &mut Vec<ValidationResult>,
615    ) {
616        if !self.bool(shape, vocab::SH_CLOSED) {
617            return;
618        }
619        let mut allowed = HashSet::new();
620        for prop in self.shapes.objects(shape, vocab::SH_PROPERTY) {
621            let Some(prop) = term_to_node(&prop) else {
622                continue;
623            };
624            if let Some(Term::NamedNode(path)) = self.shapes.object(&prop, vocab::SH_PATH) {
625                allowed.insert(path);
626            }
627        }
628        for list in self.shapes.objects(shape, vocab::SH_IGNORED_PROPERTIES) {
629            for term in self.shapes.read_list(&list) {
630                if let Term::NamedNode(predicate) = term {
631                    allowed.insert(predicate);
632                }
633            }
634        }
635        for value_node in value_nodes {
636            for (predicate, object) in self.frozen().outgoing(value_node) {
637                if allowed.contains(&predicate) {
638                    continue;
639                }
640                out.push(ValidationResult {
641                    focus: focus.clone(),
642                    path: Some(Term::NamedNode(predicate)),
643                    value: Some(object),
644                    component: vocab::SH_CC_CLOSED.into_owned(),
645                    source_shape: node_term_ref(shape),
646                    severity: self.severity(shape),
647                    messages: self.messages(shape),
648                });
649            }
650        }
651    }
652
653    fn collect_property_pairs(
654        &self,
655        shape: &NamedOrBlankNode,
656        focus: &Term,
657        path: &Option<Term>,
658        value_nodes: &[Term],
659        out: &mut Vec<ValidationResult>,
660    ) {
661        for predicate in self.shapes.objects(shape, vocab::SH_EQUALS) {
662            let Term::NamedNode(predicate) = predicate else {
663                continue;
664            };
665            let other = succ(self.frozen(), focus, &Path::Pred(predicate));
666            for value in value_nodes.iter().filter(|value| !other.contains(*value)) {
667                self.push(
668                    out,
669                    shape,
670                    focus,
671                    path.clone(),
672                    Some((*value).clone()),
673                    vocab::SH_CC_EQUALS,
674                );
675            }
676            for value in other.iter().filter(|value| !value_nodes.contains(*value)) {
677                self.push(
678                    out,
679                    shape,
680                    focus,
681                    path.clone(),
682                    Some(value.clone()),
683                    vocab::SH_CC_EQUALS,
684                );
685            }
686        }
687        for predicate in self.shapes.objects(shape, vocab::SH_DISJOINT) {
688            let Term::NamedNode(predicate) = predicate else {
689                continue;
690            };
691            let other = succ(self.frozen(), focus, &Path::Pred(predicate));
692            for value in value_nodes.iter().filter(|value| other.contains(*value)) {
693                self.push(
694                    out,
695                    shape,
696                    focus,
697                    path.clone(),
698                    Some((*value).clone()),
699                    vocab::SH_CC_DISJOINT,
700                );
701            }
702        }
703        for (constraint, component, inclusive) in [
704            (vocab::SH_LESS_THAN, vocab::SH_CC_LESS_THAN, false),
705            (
706                vocab::SH_LESS_THAN_OR_EQUALS,
707                vocab::SH_CC_LESS_THAN_OR_EQUALS,
708                true,
709            ),
710        ] {
711            for predicate in self.shapes.objects(shape, constraint) {
712                let Term::NamedNode(predicate) = predicate else {
713                    continue;
714                };
715                for left in value_nodes {
716                    for right in succ(self.frozen(), focus, &Path::Pred(predicate.clone())) {
717                        let ordering = compare_terms(left, &right);
718                        let passes = ordering == Some(Ordering::Less)
719                            || inclusive && ordering == Some(Ordering::Equal);
720                        if !passes {
721                            self.push(
722                                out,
723                                shape,
724                                focus,
725                                path.clone(),
726                                Some(left.clone()),
727                                component,
728                            );
729                        }
730                    }
731                }
732            }
733        }
734    }
735
736    fn collect_unique_lang(
737        &self,
738        shape: &NamedOrBlankNode,
739        focus: &Term,
740        path: &Option<Term>,
741        value_nodes: &[Term],
742        out: &mut Vec<ValidationResult>,
743    ) {
744        if !self.bool(shape, vocab::SH_UNIQUE_LANG) {
745            return;
746        }
747        let mut counts = HashMap::new();
748        for value in value_nodes {
749            if let Term::Literal(literal) = value
750                && let Some(language) = literal.language()
751            {
752                *counts
753                    .entry(language.to_ascii_lowercase())
754                    .or_insert(0usize) += 1;
755            }
756        }
757        for _ in counts.values().filter(|count| **count > 1) {
758            self.push(
759                out,
760                shape,
761                focus,
762                path.clone(),
763                None,
764                vocab::SH_CC_UNIQUE_LANG,
765            );
766        }
767    }
768
769    fn collect_qualified_counts(
770        &self,
771        shape: &NamedOrBlankNode,
772        focus: &Term,
773        path: &Option<Term>,
774        value_nodes: &[Term],
775        out: &mut Vec<ValidationResult>,
776        visited: &mut Visited,
777    ) {
778        for qualifier in self.shapes.objects(shape, vocab::SH_QUALIFIED_VALUE_SHAPE) {
779            let Some(qualifier) = term_to_node(&qualifier) else {
780                continue;
781            };
782            let siblings = if self.bool(shape, vocab::SH_QUALIFIED_VALUE_SHAPES_DISJOINT) {
783                self.sibling_qualified_shapes(shape, &qualifier)
784            } else {
785                Vec::new()
786            };
787            let count = value_nodes
788                .iter()
789                .filter(|value| {
790                    self.conforms(&qualifier, value, visited)
791                        && siblings
792                            .iter()
793                            .all(|sibling| !self.conforms(sibling, value, visited))
794                })
795                .count() as u64;
796            if let Some(min) = self.int(shape, vocab::SH_QUALIFIED_MIN_COUNT)
797                && count < min
798            {
799                self.push(
800                    out,
801                    shape,
802                    focus,
803                    path.clone(),
804                    None,
805                    vocab::SH_CC_QUALIFIED_MIN_COUNT,
806                );
807            }
808            if let Some(max) = self.int(shape, vocab::SH_QUALIFIED_MAX_COUNT)
809                && count > max
810            {
811                self.push(
812                    out,
813                    shape,
814                    focus,
815                    path.clone(),
816                    None,
817                    vocab::SH_CC_QUALIFIED_MAX_COUNT,
818                );
819            }
820        }
821    }
822
823    fn sibling_qualified_shapes(
824        &self,
825        shape: &NamedOrBlankNode,
826        qualifier: &NamedOrBlankNode,
827    ) -> Vec<NamedOrBlankNode> {
828        let shape_term = node_term_ref(shape);
829        let mut siblings = HashSet::new();
830        for triple in self.shapes.graph.triples_for_predicate(vocab::SH_PROPERTY) {
831            if triple.object != shape_term.as_ref() {
832                continue;
833            }
834            let parent = triple.subject.into_owned();
835            for property in self.shapes.objects(&parent, vocab::SH_PROPERTY) {
836                let Some(property) = term_to_node(&property) else {
837                    continue;
838                };
839                for qualifier in self
840                    .shapes
841                    .objects(&property, vocab::SH_QUALIFIED_VALUE_SHAPE)
842                {
843                    if let Some(qualifier) = term_to_node(&qualifier) {
844                        siblings.insert(qualifier);
845                    }
846                }
847            }
848        }
849        siblings.remove(qualifier);
850        siblings.into_iter().collect()
851    }
852
853    fn push(
854        &self,
855        out: &mut Vec<ValidationResult>,
856        shape: &NamedOrBlankNode,
857        focus: &Term,
858        path: Option<Term>,
859        value: Option<Term>,
860        component: NamedNodeRef<'static>,
861    ) {
862        let mut bindings = HashMap::new();
863        if let Some(v) = &value {
864            bindings.insert("value".to_string(), v.clone());
865        }
866        if let Some(p) = &path {
867            bindings.insert("path".to_string(), p.clone());
868        }
869        let raw = self.messages(shape);
870        let messages = substitute_messages(&raw, focus, &bindings);
871        out.push(ValidationResult {
872            focus: focus.clone(),
873            path,
874            value,
875            component: component.into_owned(),
876            source_shape: node_term_ref(shape),
877            severity: self.severity(shape),
878            messages,
879        });
880    }
881
882    /// Read `sh:message` values from `shape` to propagate as `sh:resultMessage`.
883    fn messages(&self, shape: &NamedOrBlankNode) -> Vec<Term> {
884        self.shapes.objects(shape, vocab::SH_MESSAGE)
885    }
886
887    fn conforms(&self, shape: &NamedOrBlankNode, focus: &Term, visited: &mut Visited) -> bool {
888        let mut scratch = Vec::new();
889        self.collect(shape, focus, &mut scratch, visited);
890        scratch.is_empty()
891    }
892
893    /// Each value-scoped constraint component on `shape` and whether it holds at
894    /// value node `u`. `sh:and`/`or`/`not`/`node` report as a unit.
895    fn value_checks(
896        &self,
897        shape: &NamedOrBlankNode,
898        u: &Term,
899        visited: &mut Visited,
900    ) -> Vec<(NamedNode, bool)> {
901        let mut checks = Vec::new();
902
903        for c in self.shapes.objects(shape, vocab::SH_CLASS) {
904            checks.push((vocab::SH_CC_CLASS.into_owned(), self.is_instance(u, &c)));
905        }
906        for d in self.shapes.objects(shape, vocab::SH_DATATYPE) {
907            if let Term::NamedNode(dt) = d {
908                let ok = value_type_holds(&ValueType::Datatype(dt), u);
909                checks.push((vocab::SH_CC_DATATYPE.into_owned(), ok));
910            }
911        }
912        for k in self.shapes.objects(shape, vocab::SH_NODE_KIND) {
913            if let Some(set) = map_node_kind(&k) {
914                checks.push((vocab::SH_CC_NODE_KIND.into_owned(), set.matches(u)));
915            }
916        }
917        // numeric ranges (each bound is its own component)
918        for (pred_iri, comp, inclusive) in [
919            (vocab::SH_MIN_INCLUSIVE, vocab::SH_CC_MIN_INCLUSIVE, true),
920            (vocab::SH_MIN_EXCLUSIVE, vocab::SH_CC_MIN_EXCLUSIVE, false),
921        ] {
922            if let Some(Term::Literal(b)) = self.shapes.object(shape, pred_iri) {
923                let vt = ValueType::NumericRange {
924                    lo: Some(Bound {
925                        value: b,
926                        inclusive,
927                    }),
928                    hi: None,
929                };
930                checks.push((comp.into_owned(), value_type_holds(&vt, u)));
931            }
932        }
933        for (pred_iri, comp, inclusive) in [
934            (vocab::SH_MAX_INCLUSIVE, vocab::SH_CC_MAX_INCLUSIVE, true),
935            (vocab::SH_MAX_EXCLUSIVE, vocab::SH_CC_MAX_EXCLUSIVE, false),
936        ] {
937            if let Some(Term::Literal(b)) = self.shapes.object(shape, pred_iri) {
938                let vt = ValueType::NumericRange {
939                    lo: None,
940                    hi: Some(Bound {
941                        value: b,
942                        inclusive,
943                    }),
944                };
945                checks.push((comp.into_owned(), value_type_holds(&vt, u)));
946            }
947        }
948        // length / pattern
949        let min_len = self.int(shape, vocab::SH_MIN_LENGTH);
950        let max_len = self.int(shape, vocab::SH_MAX_LENGTH);
951        if let Some(m) = min_len {
952            let vt = ValueType::Length {
953                min: Some(m),
954                max: None,
955            };
956            checks.push((
957                vocab::SH_CC_MIN_LENGTH.into_owned(),
958                value_type_holds(&vt, u),
959            ));
960        }
961        if let Some(m) = max_len {
962            let vt = ValueType::Length {
963                min: None,
964                max: Some(m),
965            };
966            checks.push((
967                vocab::SH_CC_MAX_LENGTH.into_owned(),
968                value_type_holds(&vt, u),
969            ));
970        }
971        if let Some(Term::Literal(re)) = self.shapes.object(shape, vocab::SH_PATTERN) {
972            let flags = match self.shapes.object(shape, vocab::SH_FLAGS) {
973                Some(Term::Literal(f)) => f.value().to_string(),
974                _ => String::new(),
975            };
976            let vt = ValueType::Pattern {
977                regex: re.value().to_string(),
978                flags,
979            };
980            checks.push((vocab::SH_CC_PATTERN.into_owned(), value_type_holds(&vt, u)));
981        }
982        // sh:in
983        for list in self.shapes.objects(shape, vocab::SH_IN) {
984            let members = self.shapes.read_list(&list);
985            checks.push((vocab::SH_CC_IN.into_owned(), members.contains(u)));
986        }
987        for list in self.shapes.objects(shape, vocab::SH_LANGUAGE_IN) {
988            let languages = self
989                .shapes
990                .read_list(&list)
991                .into_iter()
992                .filter_map(|term| match term {
993                    Term::Literal(literal) => Some(literal.value().to_string()),
994                    _ => None,
995                })
996                .collect();
997            checks.push((
998                vocab::SH_CC_LANGUAGE_IN.into_owned(),
999                value_type_holds(&ValueType::LangIn(languages), u),
1000            ));
1001        }
1002
1003        // logical (unit results)
1004        for list in self.shapes.objects(shape, vocab::SH_AND) {
1005            let ok = self
1006                .shapes
1007                .read_list(&list)
1008                .iter()
1009                .filter_map(term_to_node)
1010                .all(|m| self.conforms(&m, u, visited));
1011            checks.push((vocab::SH_CC_AND.into_owned(), ok));
1012        }
1013        for list in self.shapes.objects(shape, vocab::SH_OR) {
1014            let ok = self
1015                .shapes
1016                .read_list(&list)
1017                .iter()
1018                .filter_map(term_to_node)
1019                .any(|m| self.conforms(&m, u, visited));
1020            checks.push((vocab::SH_CC_OR.into_owned(), ok));
1021        }
1022        for list in self.shapes.objects(shape, vocab::SH_XONE) {
1023            let count = self
1024                .shapes
1025                .read_list(&list)
1026                .iter()
1027                .filter_map(term_to_node)
1028                .filter(|m| self.conforms(m, u, visited))
1029                .count();
1030            checks.push((vocab::SH_CC_XONE.into_owned(), count == 1));
1031        }
1032        for n in self.shapes.objects(shape, vocab::SH_NOT) {
1033            if let Some(nn) = term_to_node(&n) {
1034                checks.push((
1035                    vocab::SH_CC_NOT.into_owned(),
1036                    !self.conforms(&nn, u, visited),
1037                ));
1038            }
1039        }
1040        for n in self.shapes.objects(shape, vocab::SH_NODE) {
1041            if let Some(nn) = term_to_node(&n) {
1042                checks.push((
1043                    vocab::SH_CC_NODE.into_owned(),
1044                    self.conforms(&nn, u, visited),
1045                ));
1046            }
1047        }
1048
1049        checks
1050    }
1051
1052    fn is_instance(&self, u: &Term, class: &Term) -> bool {
1053        succ(self.frozen(), u, &class_path()).contains(class)
1054    }
1055
1056    fn int(&self, s: &NamedOrBlankNode, p: NamedNodeRef) -> Option<u64> {
1057        match self.shapes.object(s, p) {
1058            Some(Term::Literal(l)) => l.value().parse().ok(),
1059            _ => None,
1060        }
1061    }
1062
1063    fn bool(&self, s: &NamedOrBlankNode, p: NamedNodeRef) -> bool {
1064        matches!(
1065            self.shapes.object(s, p),
1066            Some(Term::Literal(ref literal)) if matches!(literal.value(), "true" | "1")
1067        )
1068    }
1069
1070    /// `sh:resultSeverity` for results from `shape`: its declared `sh:severity`
1071    /// (an IRI such as `sh:Warning`/`sh:Info`), defaulting to `sh:Violation`.
1072    fn severity(&self, shape: &NamedOrBlankNode) -> NamedNode {
1073        match self.shapes.object(shape, vocab::SH_SEVERITY) {
1074            Some(Term::NamedNode(n)) => n,
1075            _ => vocab::SH_VIOLATION.into_owned(),
1076        }
1077    }
1078}
1079
1080fn is_shape_node(shapes: &Loaded, node: &NamedOrBlankNode) -> bool {
1081    shapes.has_type(node, vocab::SH_NODE_SHAPE)
1082        || shapes.has_type(node, vocab::SH_PROPERTY_SHAPE)
1083        || [
1084            vocab::SH_PROPERTY,
1085            vocab::SH_NODE,
1086            vocab::SH_AND,
1087            vocab::SH_OR,
1088            vocab::SH_NOT,
1089            vocab::SH_XONE,
1090            vocab::SH_DATATYPE,
1091            vocab::SH_CLASS,
1092            vocab::SH_NODE_KIND,
1093            vocab::SH_IN,
1094            vocab::SH_HAS_VALUE,
1095            vocab::SH_PROPERTY,
1096        ]
1097        .iter()
1098        .any(|predicate| shapes.object(node, *predicate).is_some())
1099}
1100
1101/// Whether any `sh:select` / `sh:ask` query references `$shapesGraph`, so the
1102/// shapes graph must be mirrored into a named graph for evaluation.
1103fn shapes_reference_shapes_graph(shapes: &Loaded) -> bool {
1104    [vocab::SH_SELECT, vocab::SH_ASK].iter().any(|predicate| {
1105        shapes.graph.triples_for_predicate(*predicate).any(
1106            |t| matches!(t.object, oxrdf::TermRef::Literal(l) if l.value().contains("shapesGraph")),
1107        )
1108    })
1109}
1110
1111fn class_path() -> Path {
1112    Path::seq(vec![
1113        Path::Pred(vocab::rdf_type()),
1114        Path::star(Path::Pred(vocab::rdfs_subclassof())),
1115    ])
1116}
1117
1118/// Index `class → focus-data instances` under `rdf:type / rdfs:subClassOf*`.
1119///
1120/// One pass over the `rdf:type` triples replaces the per-shape forward scan
1121/// (`graph_nodes(data).filter(node is instance of c)`), which was
1122/// `O(shapes × nodes × type-closure)`. Each instance is attributed to every
1123/// superclass of its declared type, and the reflexive `subClassOf*` closure of
1124/// each distinct type is computed at most once. Only nodes present in the focus
1125/// (data) graph are indexed, matching the original target-selection semantics.
1126fn build_class_index(
1127    focus_data: &Graph,
1128    frozen: &FrozenIndexedDataset,
1129) -> HashMap<Term, Vec<Term>> {
1130    let focus_nodes = graph_nodes(focus_data);
1131    let subclass_star = Path::star(Path::Pred(vocab::rdfs_subclassof()));
1132    let mut supers: HashMap<Term, Vec<Term>> = HashMap::new();
1133    let mut index: HashMap<Term, Vec<Term>> = HashMap::new();
1134    let mut seen: HashSet<(Term, Term)> = HashSet::new();
1135    for (node, ty) in frozen.triples_for_predicate(&vocab::rdf_type()) {
1136        if !focus_nodes.contains(&node) {
1137            continue;
1138        }
1139        let classes = supers
1140            .entry(ty.clone())
1141            .or_insert_with(|| succ(frozen, &ty, &subclass_star).into_iter().collect());
1142        for class in classes.iter() {
1143            if seen.insert((class.clone(), node.clone())) {
1144                index.entry(class.clone()).or_default().push(node.clone());
1145            }
1146        }
1147    }
1148    index
1149}
1150
1151fn graph_nodes(graph: &Graph) -> HashSet<Term> {
1152    let mut nodes = HashSet::new();
1153    for triple in graph.iter() {
1154        nodes.insert(node_term(triple.subject));
1155        nodes.insert(triple.object.into_owned());
1156    }
1157    nodes
1158}
1159
1160fn node_term(s: oxrdf::NamedOrBlankNodeRef) -> Term {
1161    crate::path::term_of(s.into_owned())
1162}
1163
1164fn node_term_ref(s: &NamedOrBlankNode) -> Term {
1165    match s {
1166        NamedOrBlankNode::NamedNode(n) => Term::NamedNode(n.clone()),
1167        NamedOrBlankNode::BlankNode(b) => Term::BlankNode(b.clone()),
1168    }
1169}
1170
1171fn map_node_kind(term: &Term) -> Option<NodeKindSet> {
1172    let Term::NamedNode(n) = term else {
1173        return None;
1174    };
1175    let r = n.as_ref();
1176    Some(if r == vocab::SH_IRI {
1177        NodeKindSet::IRI
1178    } else if r == vocab::SH_BLANK_NODE {
1179        NodeKindSet::BLANK_NODE
1180    } else if r == vocab::SH_LITERAL {
1181        NodeKindSet::LITERAL
1182    } else if r == vocab::SH_BLANK_NODE_OR_IRI {
1183        NodeKindSet::BLANK_NODE_OR_IRI
1184    } else if r == vocab::SH_BLANK_NODE_OR_LITERAL {
1185        NodeKindSet::BLANK_NODE_OR_LITERAL
1186    } else if r == vocab::SH_IRI_OR_LITERAL {
1187        NodeKindSet::IRI_OR_LITERAL
1188    } else {
1189        return None;
1190    })
1191}