1use crate::frozen::FrozenIndexedDataset;
13use crate::path::{PathBackend, node_of, pred, succ};
14use crate::profile::ShapeCacheSample;
15use crate::sparql::{SparqlExecutor, SparqlViolation};
16use crate::value::{compare_terms, value_type_holds};
17use oxrdf::{Graph, NamedNode, Term};
18use regex::Regex;
19use serde::{Deserialize, Serialize};
20use shifty_algebra::render::{path_to_string, shape_to_string};
21use shifty_algebra::{
22 Path, Schema, Selector, Severity, Shape, ShapeArena, ShapeId, SparqlConstraint,
23};
24use shifty_opt::{FocusSource, PhysicalPlan, analyze};
25use std::cmp::Ordering;
26use std::collections::{BTreeSet, HashMap, HashSet};
27use std::fmt;
28use std::sync::OnceLock;
29
30#[derive(Debug, Clone, Copy)]
31struct EvalResult {
32 holds: bool,
33 cacheable: bool,
34}
35
36#[derive(Default)]
37struct EvalState {
38 memo: HashMap<(ShapeId, Term), bool>,
39 active: HashSet<(ShapeId, Term)>,
40 telemetry: Option<ShapeCacheSample>,
41}
42
43pub(crate) struct ShapeEvaluator<'a> {
49 g: &'a dyn PathBackend,
50 arena: &'a ShapeArena,
51 sparql: &'a SparqlExecutor,
52 state: EvalState,
53}
54
55impl<'a> ShapeEvaluator<'a> {
56 pub(crate) fn new(
57 g: &'a dyn PathBackend,
58 arena: &'a ShapeArena,
59 sparql: &'a SparqlExecutor,
60 ) -> Self {
61 Self {
62 g,
63 arena,
64 sparql,
65 state: EvalState {
66 telemetry: crate::profile::is_enabled().then(ShapeCacheSample::default),
67 ..EvalState::default()
68 },
69 }
70 }
71
72 pub(crate) fn holds(&mut self, node: &Term, id: ShapeId) -> bool {
73 holds_memoized(self.g, self.arena, node, id, self.sparql, &mut self.state).holds
74 }
75
76 pub(crate) fn sparql(&self) -> &SparqlExecutor {
77 self.sparql
78 }
79
80 pub(crate) fn backend(&self) -> &dyn PathBackend {
82 self.g
83 }
84
85 pub(crate) fn arena(&self) -> &ShapeArena {
87 self.arena
88 }
89}
90
91impl Drop for ShapeEvaluator<'_> {
92 fn drop(&mut self) {
93 let Some(mut sample) = self.state.telemetry else {
94 return;
95 };
96 sample.entries = self.state.memo.len();
97 sample.estimated_bytes = estimated_memo_bytes(&self.state.memo);
98 crate::profile::record_shape_cache(sample);
99 }
100}
101
102#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
104pub struct Reason {
105 pub value: Term,
107 pub path: Option<String>,
110 pub shape: ShapeId,
112 #[serde(default)]
114 pub severity: Severity,
115 pub message: String,
116 #[serde(default, skip_serializing_if = "Vec::is_empty")]
119 pub sub_reasons: Vec<Reason>,
120}
121
122#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
124pub struct Violation {
125 pub focus: Term,
126 pub statement: usize,
128 pub severity: Severity,
130 pub reasons: Vec<Reason>,
131}
132
133#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
135pub struct ValidationOutcome {
136 pub conforms: bool,
137 pub violations: Vec<Violation>,
138}
139
140#[derive(Debug, Clone, PartialEq, Eq)]
142pub struct ValidationOptions {
143 pub minimum_severity: Severity,
146 pub sort_results: bool,
149}
150
151impl Default for ValidationOptions {
152 fn default() -> Self {
153 Self {
154 minimum_severity: Severity::Info,
155 sort_results: true,
156 }
157 }
158}
159
160fn most_severe(reasons: &[Reason]) -> Severity {
161 reasons
162 .iter()
163 .max_by_key(|reason| reason.severity.rank())
164 .map(|reason| reason.severity.clone())
165 .unwrap_or(Severity::Violation)
166}
167
168fn conforms_at_threshold(violations: &[Violation], minimum: &Severity) -> bool {
169 !violations
170 .iter()
171 .flat_map(|violation| &violation.reasons)
172 .any(|reason| reason.severity.meets(minimum))
173}
174
175fn sort_violations(violations: &mut [Violation], enabled: bool) {
176 if enabled {
177 violations.sort_by(|left, right| {
178 right
179 .severity
180 .rank()
181 .cmp(&left.severity.rank())
182 .then_with(|| left.focus.to_string().cmp(&right.focus.to_string()))
183 .then_with(|| left.statement.cmp(&right.statement))
184 });
185 }
186}
187
188#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
190pub enum ValidationGraphMode {
191 Data,
193 #[default]
196 Union,
197 UnionAll,
199}
200
201#[derive(Debug, Clone, PartialEq, Eq)]
205pub struct NonStratifiable {
206 pub components: Vec<Vec<ShapeId>>,
207}
208
209impl fmt::Display for NonStratifiable {
210 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
211 write!(f, "non-stratifiable schema (recursion through negation): ")?;
212 for (i, c) in self.components.iter().enumerate() {
213 if i > 0 {
214 write!(f, "; ")?;
215 }
216 let ids: Vec<String> = c.iter().map(|s| format!("@{}", s.0)).collect();
217 write!(f, "{{{}}}", ids.join(" "))?;
218 }
219 Ok(())
220 }
221}
222
223impl std::error::Error for NonStratifiable {}
224
225pub fn validate(data: &Graph, schema: &Schema) -> Result<ValidationOutcome, NonStratifiable> {
234 validate_with_options(data, schema, &ValidationOptions::default())
235}
236
237pub fn validate_with_options(
239 data: &Graph,
240 schema: &Schema,
241 options: &ValidationOptions,
242) -> Result<ValidationOutcome, NonStratifiable> {
243 validate_with_context_and_options(data, data, schema, options)
244}
245
246pub fn validate_graphs(
248 data: &Graph,
249 shapes: &Graph,
250 schema: &Schema,
251) -> Result<ValidationOutcome, NonStratifiable> {
252 validate_graphs_with_mode_and_options(
253 data,
254 shapes,
255 schema,
256 ValidationGraphMode::default(),
257 &ValidationOptions::default(),
258 )
259}
260
261pub fn validate_graphs_with_mode(
263 data: &Graph,
264 shapes: &Graph,
265 schema: &Schema,
266 mode: ValidationGraphMode,
267) -> Result<ValidationOutcome, NonStratifiable> {
268 validate_graphs_with_mode_and_options(data, shapes, schema, mode, &ValidationOptions::default())
269}
270
271pub fn validate_graphs_with_mode_and_options(
273 data: &Graph,
274 shapes: &Graph,
275 schema: &Schema,
276 mode: ValidationGraphMode,
277 options: &ValidationOptions,
278) -> Result<ValidationOutcome, NonStratifiable> {
279 match mode {
280 ValidationGraphMode::Data => {
281 let uses_shapes = uses_shapes_graph(&schema.arena);
282 let frozen = if uses_shapes {
283 FrozenIndexedDataset::from_graphs(data, shapes)
284 } else {
285 FrozenIndexedDataset::from_graph(data)
286 };
287 validate_with_frozen(data, schema, frozen, uses_shapes, options)
288 }
289 ValidationGraphMode::Union => {
290 let uses_shapes = uses_shapes_graph(&schema.arena);
291 let frozen = if uses_shapes {
292 FrozenIndexedDataset::from_graph_union_with_shapes(data, shapes)
293 } else {
294 FrozenIndexedDataset::from_graph_union(data, shapes)
295 };
296 validate_with_frozen(data, schema, frozen, uses_shapes, options)
297 }
298 ValidationGraphMode::UnionAll => {
299 let union = graph_union(data, shapes);
300 validate_with_context_and_options(&union, &union, schema, options)
301 }
302 }
303}
304
305pub fn validate_with_context(
309 data: &Graph,
310 context: &Graph,
311 schema: &Schema,
312) -> Result<ValidationOutcome, NonStratifiable> {
313 validate_with_context_and_options(data, context, schema, &ValidationOptions::default())
314}
315
316pub fn validate_with_context_and_options(
318 data: &Graph,
319 context: &Graph,
320 schema: &Schema,
321 options: &ValidationOptions,
322) -> Result<ValidationOutcome, NonStratifiable> {
323 let uses_shapes = uses_shapes_graph(&schema.arena);
324 let frozen = if uses_shapes {
325 FrozenIndexedDataset::from_graphs(context, context)
326 } else {
327 FrozenIndexedDataset::from_graph(context)
328 };
329 validate_with_frozen(data, schema, frozen, uses_shapes, options)
330}
331
332fn validate_with_frozen(
333 data: &Graph,
334 schema: &Schema,
335 frozen: FrozenIndexedDataset,
336 has_shapes_graph: bool,
337 options: &ValidationOptions,
338) -> Result<ValidationOutcome, NonStratifiable> {
339 let strat = analyze(&schema.arena);
340 if !strat.stratifiable {
341 let components = strat
342 .strata
343 .iter()
344 .filter(|s| !s.stratifiable)
345 .map(|s| s.shapes.clone())
346 .collect();
347 return Err(NonStratifiable { components });
348 }
349
350 let sparql = SparqlExecutor::from_frozen(frozen, has_shapes_graph);
351 let backend = sparql
352 .frozen()
353 .expect("validation executor always has a frozen dataset");
354 let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
355 let mut violations = Vec::new();
356 for (i, st) in schema.statements.iter().enumerate() {
357 let label = schema
358 .names
359 .get(&st.shape)
360 .cloned()
361 .unwrap_or_else(|| format!("@{}", st.shape.0));
362 let foci = focus_nodes_with_evaluator(data, &st.selector, &mut evaluator);
363 prefetch_sparql_constraints(&schema.arena, st.shape, &foci, &sparql);
364 for v in foci {
365 let t = std::time::Instant::now();
366 let mut stack = HashSet::new();
367 let mut reasons = explain(
368 &mut evaluator,
369 &v,
370 st.shape,
371 None,
372 &Severity::Violation,
373 &mut stack,
374 );
375 crate::profile::record_shape(&label, t.elapsed().as_micros() as u64);
376 dedup_reasons(&mut reasons);
377 if !reasons.is_empty() {
378 let severity = most_severe(&reasons);
379 violations.push(Violation {
380 focus: v,
381 statement: i,
382 severity,
383 reasons,
384 });
385 }
386 }
387 }
388 sort_violations(&mut violations, options.sort_results);
389 Ok(ValidationOutcome {
390 conforms: conforms_at_threshold(&violations, &options.minimum_severity),
391 violations,
392 })
393}
394
395pub(crate) fn uses_shapes_graph(arena: &ShapeArena) -> bool {
398 (0..arena.len()).any(|i| {
399 matches!(arena.get(ShapeId(i as u32)), Shape::Sparql(c) if c.query.contains("shapesGraph"))
400 })
401}
402
403pub(crate) fn graph_union(left: &Graph, right: &Graph) -> Graph {
404 let mut union = left.clone();
405 for triple in right.iter() {
406 union.insert(triple);
407 }
408 union
409}
410
411pub fn validate_plan(
417 data: &Graph,
418 plan: &PhysicalPlan,
419) -> Result<ValidationOutcome, NonStratifiable> {
420 validate_plan_with_options(data, plan, &ValidationOptions::default())
421}
422
423pub fn validate_plan_with_options(
425 data: &Graph,
426 plan: &PhysicalPlan,
427 options: &ValidationOptions,
428) -> Result<ValidationOutcome, NonStratifiable> {
429 validate_plan_with_context_and_options(data, data, plan, options)
430}
431
432pub fn validate_plan_graphs(
434 data: &Graph,
435 shapes: &Graph,
436 plan: &PhysicalPlan,
437) -> Result<ValidationOutcome, NonStratifiable> {
438 validate_plan_graphs_with_mode_and_options(
439 data,
440 shapes,
441 plan,
442 ValidationGraphMode::default(),
443 &ValidationOptions::default(),
444 )
445}
446
447pub fn validate_plan_graphs_with_mode(
449 data: &Graph,
450 shapes: &Graph,
451 plan: &PhysicalPlan,
452 mode: ValidationGraphMode,
453) -> Result<ValidationOutcome, NonStratifiable> {
454 validate_plan_graphs_with_mode_and_options(
455 data,
456 shapes,
457 plan,
458 mode,
459 &ValidationOptions::default(),
460 )
461}
462
463pub fn validate_plan_graphs_with_mode_and_options(
465 data: &Graph,
466 shapes: &Graph,
467 plan: &PhysicalPlan,
468 mode: ValidationGraphMode,
469 options: &ValidationOptions,
470) -> Result<ValidationOutcome, NonStratifiable> {
471 match mode {
472 ValidationGraphMode::Data => {
473 let uses_shapes = uses_shapes_graph(&plan.arena);
474 let frozen = if uses_shapes {
475 FrozenIndexedDataset::from_graphs(data, shapes)
476 } else {
477 FrozenIndexedDataset::from_graph(data)
478 };
479 validate_plan_with_frozen(data, plan, frozen, uses_shapes, options)
480 }
481 ValidationGraphMode::Union => {
482 let uses_shapes = uses_shapes_graph(&plan.arena);
483 let frozen = if uses_shapes {
484 FrozenIndexedDataset::from_graph_union_with_shapes(data, shapes)
485 } else {
486 FrozenIndexedDataset::from_graph_union(data, shapes)
487 };
488 validate_plan_with_frozen(data, plan, frozen, uses_shapes, options)
489 }
490 ValidationGraphMode::UnionAll => {
491 let union = graph_union(data, shapes);
492 validate_plan_with_context_and_options(&union, &union, plan, options)
493 }
494 }
495}
496
497pub fn validate_plan_with_context(
499 data: &Graph,
500 context: &Graph,
501 plan: &PhysicalPlan,
502) -> Result<ValidationOutcome, NonStratifiable> {
503 validate_plan_with_context_and_options(data, context, plan, &ValidationOptions::default())
504}
505
506pub fn validate_plan_with_context_and_options(
508 data: &Graph,
509 context: &Graph,
510 plan: &PhysicalPlan,
511 options: &ValidationOptions,
512) -> Result<ValidationOutcome, NonStratifiable> {
513 let uses_shapes = uses_shapes_graph(&plan.arena);
514 let frozen = if uses_shapes {
515 FrozenIndexedDataset::from_graphs(context, context)
516 } else {
517 FrozenIndexedDataset::from_graph(context)
518 };
519 validate_plan_with_frozen(data, plan, frozen, uses_shapes, options)
520}
521
522fn validate_plan_with_frozen(
523 data: &Graph,
524 plan: &PhysicalPlan,
525 frozen: FrozenIndexedDataset,
526 has_shapes_graph: bool,
527 options: &ValidationOptions,
528) -> Result<ValidationOutcome, NonStratifiable> {
529 let strat = analyze(&plan.arena);
530 if !strat.stratifiable {
531 let components = strat
532 .strata
533 .iter()
534 .filter(|s| !s.stratifiable)
535 .map(|s| s.shapes.clone())
536 .collect();
537 return Err(NonStratifiable { components });
538 }
539
540 let sparql = SparqlExecutor::from_frozen(frozen, has_shapes_graph);
541 let backend = sparql
542 .frozen()
543 .expect("validation executor always has a frozen dataset");
544 let mut evaluator = ShapeEvaluator::new(backend, &plan.arena, &sparql);
545 let mut violations = Vec::new();
546 for (i, sp) in plan.statements.iter().enumerate() {
547 let label = plan
548 .names
549 .get(&sp.shape)
550 .cloned()
551 .unwrap_or_else(|| format!("@{}", sp.shape.0));
552 let foci = focus_for_source(data, &sp.source, &mut evaluator);
553 prefetch_sparql_constraints(&plan.arena, sp.shape, &foci, &sparql);
554 for v in foci {
555 let t = std::time::Instant::now();
556 let mut stack = HashSet::new();
557 let mut reasons = explain(
558 &mut evaluator,
559 &v,
560 sp.shape,
561 None,
562 &Severity::Violation,
563 &mut stack,
564 );
565 crate::profile::record_shape(&label, t.elapsed().as_micros() as u64);
566 dedup_reasons(&mut reasons);
567 if !reasons.is_empty() {
568 let severity = most_severe(&reasons);
569 violations.push(Violation {
570 focus: v,
571 statement: i,
572 severity,
573 reasons,
574 });
575 }
576 }
577 }
578 sort_violations(&mut violations, options.sort_results);
579 Ok(ValidationOutcome {
580 conforms: conforms_at_threshold(&violations, &options.minimum_severity),
581 violations,
582 })
583}
584
585fn focus_for_source(
587 data: &Graph,
588 source: &FocusSource,
589 evaluator: &mut ShapeEvaluator<'_>,
590) -> Vec<Term> {
591 match source {
592 FocusSource::SubjectsOf(p) => subjects_of(data, p),
593 FocusSource::ObjectsOf(p) => objects_of(data, p),
594 FocusSource::Node(c) => vec![c.clone()],
595 FocusSource::PathToConst { path, target } => pred(evaluator.g, target, path)
597 .into_iter()
598 .filter(|node| graph_contains_term(data, node))
599 .collect(),
600 FocusSource::ScanFilter { path, qualifier } => all_nodes(data)
601 .into_iter()
602 .filter(|v| {
603 succ(evaluator.g, v, path)
604 .iter()
605 .any(|u| evaluator.holds(u, *qualifier))
606 })
607 .collect(),
608 FocusSource::Sparql(target) => {
609 let candidates = all_nodes(data);
610 evaluator
611 .sparql
612 .target_nodes(&target.query)
613 .unwrap_or_default()
614 .into_iter()
615 .filter(|node| candidates.contains(node))
616 .collect()
617 }
618 }
619}
620
621pub fn focus_nodes(data: &Graph, sel: &Selector, arena: &ShapeArena) -> Vec<Term> {
623 let sparql =
624 SparqlExecutor::new(data).expect("building an in-memory Oxigraph store should succeed");
625 let mut evaluator = ShapeEvaluator::new(data, arena, &sparql);
626 focus_nodes_with_evaluator(data, sel, &mut evaluator)
627}
628
629pub(crate) fn focus_nodes_with(
630 data: &Graph,
631 backend: &dyn PathBackend,
632 sel: &Selector,
633 arena: &ShapeArena,
634 sparql: &SparqlExecutor,
635) -> Vec<Term> {
636 let mut evaluator = ShapeEvaluator::new(backend, arena, sparql);
637 focus_nodes_with_evaluator(data, sel, &mut evaluator)
638}
639
640fn focus_nodes_with_evaluator(
641 data: &Graph,
642 sel: &Selector,
643 evaluator: &mut ShapeEvaluator<'_>,
644) -> Vec<Term> {
645 match sel {
646 Selector::HasOut(q) => subjects_of(data, q),
647 Selector::HasIn(q) => objects_of(data, q),
648 Selector::IsConst(c) => vec![c.clone()],
649 Selector::HasPath(path, qual) => match evaluator.arena.get(*qual) {
650 Shape::TestConst(target) => pred(evaluator.g, target, path)
655 .into_iter()
656 .filter(|node| graph_contains_term(data, node))
657 .collect(),
658 _ => all_nodes(data)
659 .into_iter()
660 .filter(|v| {
661 succ(evaluator.g, v, path)
662 .iter()
663 .any(|u| evaluator.holds(u, *qual))
664 })
665 .collect(),
666 },
667 Selector::Sparql(target) => {
668 let candidates = all_nodes(data);
669 evaluator
670 .sparql
671 .target_nodes(&target.query)
672 .unwrap_or_default()
673 .into_iter()
674 .filter(|node| candidates.contains(node))
675 .collect()
676 }
677 }
678}
679
680fn holds_memoized(
681 g: &dyn PathBackend,
682 arena: &ShapeArena,
683 v: &Term,
684 id: ShapeId,
685 sparql: &SparqlExecutor,
686 state: &mut EvalState,
687) -> EvalResult {
688 let key = (id, v.clone());
689 if let Some(&holds) = state.memo.get(&key) {
690 if let Some(telemetry) = state.telemetry.as_mut() {
691 telemetry.hits += 1;
692 }
693 return EvalResult {
694 holds,
695 cacheable: true,
696 };
697 }
698 if let Some(telemetry) = state.telemetry.as_mut() {
699 telemetry.misses += 1;
700 }
701 if !state.active.insert(key.clone()) {
702 if let Some(telemetry) = state.telemetry.as_mut() {
703 telemetry.recursion_back_edges += 1;
704 }
705 return EvalResult {
706 holds: true,
707 cacheable: false,
708 }; }
710 let result = match arena.get(id) {
711 Shape::Annotated { shape, .. } => holds_memoized(g, arena, v, *shape, sparql, state),
712 Shape::Top | Shape::Pending => EvalResult {
713 holds: true,
714 cacheable: true,
715 },
716 Shape::Sparql(constraint) => EvalResult {
717 holds: sparql
718 .constraint_violations(constraint, v)
719 .is_ok_and(|violations| violations.is_empty()),
720 cacheable: true,
721 },
722 Shape::TestConst(c) => EvalResult {
723 holds: v == c,
724 cacheable: true,
725 },
726 Shape::TestType(t) => EvalResult {
727 holds: value_type_holds(t, v),
728 cacheable: true,
729 },
730 Shape::TestKind(k) => EvalResult {
731 holds: k.matches(v),
732 cacheable: true,
733 },
734 Shape::Closed(q) => EvalResult {
735 holds: closed_offenders(g, v, q).is_empty(),
736 cacheable: true,
737 },
738 Shape::Eq(path, p) => EvalResult {
739 holds: succ(g, v, path) == objects(g, v, p),
740 cacheable: true,
741 },
742 Shape::Disj(path, p) => EvalResult {
743 holds: succ(g, v, path).is_disjoint(&objects(g, v, p)),
744 cacheable: true,
745 },
746 Shape::Lt(path, p) => EvalResult {
747 holds: all_pairs_ordered(g, v, path, p, false),
748 cacheable: true,
749 },
750 Shape::Le(path, p) => EvalResult {
751 holds: all_pairs_ordered(g, v, path, p, true),
752 cacheable: true,
753 },
754 Shape::UniqueLang(path) => EvalResult {
755 holds: unique_lang(&succ(g, v, path)),
756 cacheable: true,
757 },
758 Shape::Not(c) => {
759 let child = holds_memoized(g, arena, v, *c, sparql, state);
760 EvalResult {
761 holds: !child.holds,
762 cacheable: child.cacheable,
763 }
764 }
765 Shape::And(cs) => {
766 let mut result = EvalResult {
767 holds: true,
768 cacheable: true,
769 };
770 for child in cs {
771 let child = holds_memoized(g, arena, v, *child, sparql, state);
772 result.cacheable &= child.cacheable;
773 if !child.holds {
774 result.holds = false;
775 break;
776 }
777 }
778 result
779 }
780 Shape::Or(cs) => {
781 let mut result = EvalResult {
782 holds: false,
783 cacheable: true,
784 };
785 for child in cs {
786 let child = holds_memoized(g, arena, v, *child, sparql, state);
787 result.cacheable &= child.cacheable;
788 if child.holds {
789 result.holds = true;
790 break;
791 }
792 }
793 result
794 }
795 Shape::Count {
796 path,
797 min,
798 max,
799 qualifier,
800 } => {
801 let mut n = 0;
802 let mut cacheable = true;
803 for value in succ(g, v, path) {
804 let qualified = holds_memoized(g, arena, &value, *qualifier, sparql, state);
805 cacheable &= qualified.cacheable;
806 n += u64::from(qualified.holds);
807 }
808 EvalResult {
809 holds: min.is_none_or(|m| n >= m) && max.is_none_or(|m| n <= m),
810 cacheable,
811 }
812 }
813 };
814 state.active.remove(&key);
815 if result.cacheable {
816 state.memo.insert(key, result.holds);
817 if let Some(telemetry) = state.telemetry.as_mut() {
818 telemetry.insertions += 1;
819 }
820 } else if let Some(telemetry) = state.telemetry.as_mut() {
821 telemetry.non_cacheable_results += 1;
822 }
823 result
824}
825
826fn estimated_memo_bytes(memo: &HashMap<(ShapeId, Term), bool>) -> usize {
827 const CONTROL_BYTE_ESTIMATE: usize = 1;
828 let bucket_bytes =
829 memo.capacity() * (std::mem::size_of::<((ShapeId, Term), bool)>() + CONTROL_BYTE_ESTIMATE);
830 bucket_bytes
831 + memo
832 .keys()
833 .map(|(_, term)| estimated_term_heap_bytes(term))
834 .sum::<usize>()
835}
836
837fn estimated_term_heap_bytes(term: &Term) -> usize {
838 match term {
839 Term::NamedNode(node) => node.as_str().len(),
840 Term::BlankNode(node) => node.as_str().len(),
841 Term::Literal(literal) => {
842 literal.value().len()
843 + literal.language().map_or_else(
844 || {
845 let datatype = literal.datatype();
846 if datatype.as_str() == "http://www.w3.org/2001/XMLSchema#string" {
847 0
848 } else {
849 datatype.as_str().len()
850 }
851 },
852 str::len,
853 )
854 }
855 }
856}
857
858fn prefetch_sparql_constraints(
867 arena: &ShapeArena,
868 root: ShapeId,
869 foci: &[Term],
870 sparql: &SparqlExecutor,
871) {
872 if foci.len() < 2 {
873 return;
874 }
875 let mut constraints = Vec::new();
876 let mut seen = HashSet::new();
877 collect_focus_sparql(arena, root, &mut seen, &mut constraints);
878 for constraint in constraints {
879 let _ = sparql.prefetch_constraint(constraint, foci);
880 }
881}
882
883fn collect_focus_sparql<'a>(
884 arena: &'a ShapeArena,
885 id: ShapeId,
886 seen: &mut HashSet<ShapeId>,
887 out: &mut Vec<&'a SparqlConstraint>,
888) {
889 if !seen.insert(id) {
890 return; }
892 match arena.get(id) {
893 Shape::Annotated { shape, .. } => collect_focus_sparql(arena, *shape, seen, out),
894 Shape::Sparql(constraint) => out.push(constraint),
895 Shape::Not(inner) => collect_focus_sparql(arena, *inner, seen, out),
896 Shape::And(ids) | Shape::Or(ids) => {
897 for &child in ids {
898 collect_focus_sparql(arena, child, seen, out);
899 }
900 }
901 _ => {}
903 }
904}
905
906fn explain(
909 evaluator: &mut ShapeEvaluator<'_>,
910 node: &Term,
911 id: ShapeId,
912 path_ctx: Option<&str>,
913 severity: &Severity,
914 stack: &mut HashSet<(ShapeId, Term)>,
915) -> Vec<Reason> {
916 let key = (id, node.clone());
917 if !stack.insert(key.clone()) {
918 return Vec::new(); }
920 if evaluator.holds(node, id) {
921 stack.remove(&key);
922 return Vec::new();
923 }
924 let reasons = match evaluator.arena.get(id).clone() {
925 Shape::Annotated {
926 severity: source_severity,
927 shape,
928 } => explain(evaluator, node, shape, path_ctx, &source_severity, stack),
929 Shape::Top | Shape::Pending => Vec::new(),
930 Shape::Sparql(constraint) => {
931 match evaluator.sparql.constraint_violations(&constraint, node) {
932 Ok(violations) => violations
933 .into_iter()
934 .map(|violation| {
935 let message = sparql_violation_message(&violation, &constraint, node);
938 Reason {
939 value: violation.value.unwrap_or_else(|| node.clone()),
940 path: violation
941 .path
942 .map(|path| path.to_string())
943 .or_else(|| path_ctx.map(str::to_string))
944 .or_else(|| constraint.path.as_ref().map(path_to_string)),
945 message,
946 shape: id,
947 severity: severity.clone(),
948 sub_reasons: Vec::new(),
949 }
950 })
951 .collect(),
952 Err(error) => vec![Reason {
953 value: node.clone(),
954 path: path_ctx.map(str::to_string),
955 shape: id,
956 severity: severity.clone(),
957 message: format!("SPARQL constraint evaluation failed: {error}"),
958 sub_reasons: Vec::new(),
959 }],
960 }
961 }
962 Shape::TestConst(_)
963 | Shape::TestType(_)
964 | Shape::TestKind(_)
965 | Shape::Eq(..)
966 | Shape::Disj(..)
967 | Shape::Lt(..)
968 | Shape::Le(..)
969 | Shape::UniqueLang(_) => leaf(
970 evaluator.holds(node, id),
971 node,
972 id,
973 path_ctx,
974 severity,
975 format!("{} not satisfied", shape_to_string(evaluator.arena, id)),
976 ),
977 Shape::Closed(q) => {
978 let bad = closed_offenders(evaluator.g, node, &q);
979 if bad.is_empty() {
980 Vec::new()
981 } else {
982 let preds: Vec<String> = bad.iter().map(|p| p.to_string()).collect();
983 vec![Reason {
984 value: node.clone(),
985 path: path_ctx.map(str::to_string),
986 shape: id,
987 severity: severity.clone(),
988 message: format!("closed: unexpected predicate(s) {}", preds.join(", ")),
989 sub_reasons: Vec::new(),
990 }]
991 }
992 }
993 Shape::Not(c) => {
994 if explain(evaluator, node, c, path_ctx, severity, stack).is_empty() {
995 vec![Reason {
996 value: node.clone(),
997 path: path_ctx.map(str::to_string),
998 shape: id,
999 severity: severity.clone(),
1000 message: "negated shape unexpectedly held".to_string(),
1001 sub_reasons: Vec::new(),
1002 }]
1003 } else {
1004 Vec::new()
1005 }
1006 }
1007 Shape::And(cs) => cs
1008 .iter()
1009 .flat_map(|c| explain(evaluator, node, *c, path_ctx, severity, stack))
1010 .collect(),
1011 Shape::Or(cs) => {
1012 let mut sub_reasons = Vec::new();
1013 let mut satisfied = false;
1014 for c in &cs {
1015 let sub = explain(evaluator, node, *c, path_ctx, severity, stack);
1016 if sub.is_empty() {
1017 satisfied = true;
1018 break;
1019 }
1020 sub_reasons.extend(sub);
1021 }
1022 if satisfied {
1023 Vec::new()
1024 } else {
1025 vec![Reason {
1026 value: node.clone(),
1027 path: path_ctx.map(str::to_string),
1028 shape: id,
1029 severity: severity.clone(),
1030 message: format!("none of {} alternative(s) satisfied", cs.len()),
1031 sub_reasons,
1032 }]
1033 }
1034 }
1035 Shape::Count {
1036 path,
1037 min,
1038 max,
1039 qualifier,
1040 } => explain_count(
1041 evaluator, node, id, &path, min, max, qualifier, severity, stack,
1042 ),
1043 };
1044 stack.remove(&key);
1045 reasons
1046}
1047
1048#[allow(clippy::too_many_arguments)]
1049fn explain_count(
1050 evaluator: &mut ShapeEvaluator<'_>,
1051 node: &Term,
1052 id: ShapeId,
1053 path: &Path,
1054 min: Option<u64>,
1055 max: Option<u64>,
1056 qualifier: ShapeId,
1057 severity: &Severity,
1058 stack: &mut HashSet<(ShapeId, Term)>,
1059) -> Vec<Reason> {
1060 let path_str = path_to_string(path);
1061 let matched: Vec<Term> = succ(evaluator.g, node, path)
1062 .into_iter()
1063 .filter(|u| evaluator.holds(u, qualifier))
1064 .collect();
1065 let n = matched.len() as u64;
1066 let mut reasons = Vec::new();
1067
1068 if let Some(mx) = max
1069 && n > mx
1070 {
1071 match evaluator.arena.get(qualifier).clone() {
1072 Shape::Not(inner) if mx == 0 => {
1074 for u in &matched {
1075 reasons.extend(explain(
1076 evaluator,
1077 u,
1078 inner,
1079 Some(&path_str),
1080 severity,
1081 stack,
1082 ));
1083 }
1084 }
1085 _ => reasons.push(Reason {
1086 value: node.clone(),
1087 path: Some(path_str.clone()),
1088 shape: id,
1089 severity: severity.clone(),
1090 message: format!("at most {mx} value(s) may match along {path_str}, found {n}"),
1091 sub_reasons: Vec::new(),
1092 }),
1093 }
1094 }
1095
1096 if let Some(mn) = min
1097 && n < mn
1098 {
1099 reasons.push(Reason {
1100 value: node.clone(),
1101 path: Some(path_str.clone()),
1102 shape: id,
1103 severity: severity.clone(),
1104 message: format!("at least {mn} value(s) required along {path_str}, found {n}"),
1105 sub_reasons: Vec::new(),
1106 });
1107 }
1108
1109 reasons
1110}
1111
1112fn sparql_violation_message(
1117 violation: &SparqlViolation,
1118 constraint: &SparqlConstraint,
1119 node: &Term,
1120) -> String {
1121 if let Some(message) = &violation.message {
1122 return term_text(message);
1123 }
1124 if !constraint.messages.is_empty() {
1125 return constraint
1126 .messages
1127 .iter()
1128 .map(|m| apply_message_template(&term_text(m), node, &violation.bindings))
1129 .collect::<Vec<_>>()
1130 .join("; ");
1131 }
1132 let mut message = match &constraint.shape {
1133 Some(shape) => format!("SPARQL constraint at {shape} not satisfied"),
1134 None => "SPARQL constraint not satisfied".to_string(),
1135 };
1136 if let Some(value) = &violation.value {
1137 message.push_str(&format!(" (value: {value})"));
1138 }
1139 message
1140}
1141
1142pub(crate) fn apply_message_template(
1147 template: &str,
1148 focus: &Term,
1149 bindings: &HashMap<String, Term>,
1150) -> String {
1151 static RE: OnceLock<Regex> = OnceLock::new();
1152 let re = RE
1153 .get_or_init(|| Regex::new(r"\{(\$[A-Za-z_]\w*|\?[A-Za-z_]\w*)\}").expect("static regex"));
1154 re.replace_all(template, |caps: ®ex::Captures| {
1155 let placeholder = &caps[1];
1156 let name = &placeholder[1..]; let term = if name == "this" {
1158 Some(focus)
1159 } else {
1160 bindings.get(name)
1161 };
1162 term.map(|t| match t {
1163 Term::NamedNode(n) => format!("<{}>", n.as_str()),
1164 Term::BlankNode(b) => format!("_:{}", b.as_str()),
1165 Term::Literal(l) => l.value().to_string(),
1166 })
1167 .unwrap_or_else(|| placeholder.to_string())
1168 })
1169 .to_string()
1170}
1171
1172fn term_text(term: &Term) -> String {
1175 match term {
1176 Term::Literal(literal) => literal.value().to_string(),
1177 other => other.to_string(),
1178 }
1179}
1180
1181fn leaf(
1182 ok: bool,
1183 node: &Term,
1184 id: ShapeId,
1185 path_ctx: Option<&str>,
1186 severity: &Severity,
1187 message: String,
1188) -> Vec<Reason> {
1189 if ok {
1190 Vec::new()
1191 } else {
1192 vec![Reason {
1193 value: node.clone(),
1194 path: path_ctx.map(str::to_string),
1195 shape: id,
1196 severity: severity.clone(),
1197 message,
1198 sub_reasons: Vec::new(),
1199 }]
1200 }
1201}
1202
1203fn all_pairs_ordered(
1204 g: &dyn PathBackend,
1205 v: &Term,
1206 path: &Path,
1207 p: &NamedNode,
1208 allow_eq: bool,
1209) -> bool {
1210 let lhs = succ(g, v, path);
1211 let rhs = objects(g, v, p);
1212 for a in &lhs {
1213 for b in &rhs {
1214 match compare_terms(a, b) {
1215 Some(Ordering::Less) => {}
1216 Some(Ordering::Equal) if allow_eq => {}
1217 _ => return false,
1218 }
1219 }
1220 }
1221 true
1222}
1223
1224fn objects(g: &dyn PathBackend, v: &Term, p: &NamedNode) -> HashSet<Term> {
1225 succ(g, v, &Path::Pred(p.clone()))
1226}
1227
1228fn closed_offenders(
1230 g: &dyn PathBackend,
1231 node: &Term,
1232 q: &BTreeSet<NamedNode>,
1233) -> BTreeSet<NamedNode> {
1234 g.out_predicates(node)
1235 .into_iter()
1236 .filter(|p| !q.contains(p))
1237 .collect()
1238}
1239
1240fn unique_lang(values: &HashSet<Term>) -> bool {
1241 let mut seen = HashSet::new();
1242 for term in values {
1243 if let Term::Literal(l) = term
1244 && let Some(lang) = l.language()
1245 && !seen.insert(lang.to_ascii_lowercase())
1246 {
1247 return false;
1248 }
1249 }
1250 true
1251}
1252
1253fn dedup_reasons(reasons: &mut Vec<Reason>) {
1254 let mut seen = HashSet::new();
1255 reasons.retain(|r| {
1256 seen.insert((
1257 r.value.to_string(),
1258 r.message.clone(),
1259 r.severity.as_str().to_string(),
1260 ))
1261 });
1262}
1263
1264fn subject_term(s: oxrdf::NamedOrBlankNodeRef) -> Term {
1265 crate::path::term_of(s.into_owned())
1266}
1267
1268fn subjects_of(data: &Graph, p: &NamedNode) -> Vec<Term> {
1270 let mut seen = HashSet::new();
1271 data.triples_for_predicate(p.as_ref())
1272 .filter_map(|t| {
1273 let term = subject_term(t.subject);
1274 seen.insert(term.clone()).then_some(term)
1275 })
1276 .collect()
1277}
1278
1279fn objects_of(data: &Graph, p: &NamedNode) -> Vec<Term> {
1281 let mut seen = HashSet::new();
1282 data.triples_for_predicate(p.as_ref())
1283 .filter_map(|t| {
1284 let term = t.object.into_owned();
1285 seen.insert(term.clone()).then_some(term)
1286 })
1287 .collect()
1288}
1289
1290fn all_nodes(g: &Graph) -> HashSet<Term> {
1292 let mut nodes = HashSet::new();
1293 for t in g.iter() {
1294 nodes.insert(subject_term(t.subject));
1295 nodes.insert(t.object.into_owned());
1296 }
1297 nodes
1298}
1299
1300fn graph_contains_term(g: &Graph, term: &Term) -> bool {
1302 node_of(term).is_some_and(|node| g.triples_for_subject(&node).next().is_some())
1303 || g.triples_for_object(term).next().is_some()
1304}
1305
1306#[cfg(test)]
1307mod tests {
1308 use super::*;
1309 use oxrdf::{NamedNode, Triple};
1310
1311 fn iri(local: &str) -> NamedNode {
1312 NamedNode::new(format!("http://ex/{local}")).unwrap()
1313 }
1314
1315 fn term(local: &str) -> Term {
1316 Term::NamedNode(iri(local))
1317 }
1318
1319 #[test]
1320 fn memoizes_shared_value_checks_across_focus_nodes() {
1321 let p = iri("p");
1322 let shared = term("shared");
1323 let mut graph = Graph::new();
1324 graph.insert(&Triple::new(iri("a"), p.clone(), shared.clone()));
1325 graph.insert(&Triple::new(iri("b"), p.clone(), shared.clone()));
1326
1327 let mut arena = ShapeArena::new();
1328 let qualifier = arena.insert(Shape::TestConst(shared));
1329 let root = arena.insert(Shape::Count {
1330 path: Path::Pred(p),
1331 min: Some(1),
1332 max: None,
1333 qualifier,
1334 });
1335 let sparql = SparqlExecutor::new(&graph).unwrap();
1336 crate::profile::enable();
1337 {
1338 let mut evaluator = ShapeEvaluator::new(&graph, &arena, &sparql);
1339 assert!(evaluator.holds(&term("a"), root));
1340 assert!(evaluator.holds(&term("b"), root));
1341 }
1342 let profile = crate::profile::take().unwrap();
1343 let cache = profile.shape_cache();
1344 assert_eq!(cache.evaluators, 1);
1345 assert!(cache.hits >= 1, "shared qualifier should hit the cache");
1346 assert_eq!(cache.peak_entries, 3);
1347 assert!(cache.estimated_peak_bytes > 0);
1348 }
1349
1350 #[test]
1351 fn does_not_cache_cycle_dependent_results() {
1352 let mut arena = ShapeArena::new();
1356 let a = arena.reserve();
1357 let b = arena.reserve();
1358 let bottom = arena.insert(Shape::Or(Vec::new()));
1359 arena.set(a, Shape::And(vec![b, bottom]));
1360 arena.set(b, Shape::And(vec![a]));
1361
1362 let graph = Graph::new();
1363 let sparql = SparqlExecutor::new(&graph).unwrap();
1364 let node = term("x");
1365
1366 crate::profile::enable();
1367 {
1368 let mut evaluator = ShapeEvaluator::new(&graph, &arena, &sparql);
1369 assert!(!evaluator.holds(&node, a));
1370 assert!(!evaluator.holds(&node, b));
1371 }
1372 let profile = crate::profile::take().unwrap();
1373 let cache = profile.shape_cache();
1374 assert!(cache.recursion_back_edges > 0);
1375 assert!(cache.non_cacheable_results > 0);
1376 }
1377}