1use crate::frozen::FrozenIndexedDataset;
13use crate::path::{PathBackend, node_of, pred, succ};
14use crate::profile::ShapeCacheSample;
15use crate::sparql::{SparqlExecutor, SparqlViolation};
16use crate::value::{compare_terms, value_type_holds};
17use oxrdf::{Graph, NamedNode, Term};
18use regex::Regex;
19use serde::{Deserialize, Serialize};
20use shifty_algebra::render::{path_to_string, shape_to_string};
21use shifty_algebra::{
22 Path, Schema, Selector, Severity, Shape, ShapeArena, ShapeId, SparqlConstraint,
23};
24use shifty_opt::{FocusSource, PhysicalPlan, analyze};
25use std::cmp::Ordering;
26use std::collections::{BTreeSet, HashMap, HashSet};
27use std::fmt;
28use std::sync::OnceLock;
29
30#[derive(Debug, Clone, Copy)]
31struct EvalResult {
32 holds: bool,
33 cacheable: bool,
34}
35
36#[derive(Default)]
37struct EvalState {
38 memo: HashMap<(ShapeId, Term), bool>,
39 active: HashSet<(ShapeId, Term)>,
40 telemetry: Option<ShapeCacheSample>,
41}
42
43pub(crate) struct ShapeEvaluator<'a> {
49 g: &'a dyn PathBackend,
50 arena: &'a ShapeArena,
51 sparql: &'a SparqlExecutor,
52 state: EvalState,
53}
54
55impl<'a> ShapeEvaluator<'a> {
56 pub(crate) fn new(
57 g: &'a dyn PathBackend,
58 arena: &'a ShapeArena,
59 sparql: &'a SparqlExecutor,
60 ) -> Self {
61 Self {
62 g,
63 arena,
64 sparql,
65 state: EvalState {
66 telemetry: crate::profile::is_enabled().then(ShapeCacheSample::default),
67 ..EvalState::default()
68 },
69 }
70 }
71
72 pub(crate) fn holds(&mut self, node: &Term, id: ShapeId) -> bool {
73 holds_memoized(self.g, self.arena, node, id, self.sparql, &mut self.state).holds
74 }
75
76 pub(crate) fn sparql(&self) -> &SparqlExecutor {
77 self.sparql
78 }
79
80 pub(crate) fn backend(&self) -> &dyn PathBackend {
82 self.g
83 }
84
85 pub(crate) fn arena(&self) -> &ShapeArena {
87 self.arena
88 }
89}
90
91impl Drop for ShapeEvaluator<'_> {
92 fn drop(&mut self) {
93 let Some(mut sample) = self.state.telemetry else {
94 return;
95 };
96 sample.entries = self.state.memo.len();
97 sample.estimated_bytes = estimated_memo_bytes(&self.state.memo);
98 crate::profile::record_shape_cache(sample);
99 }
100}
101
102#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
104pub struct Reason {
105 pub value: Term,
107 pub path: Option<String>,
110 pub shape: ShapeId,
112 #[serde(default)]
114 pub severity: Severity,
115 pub message: String,
116 #[serde(default, skip_serializing_if = "Vec::is_empty")]
119 pub sub_reasons: Vec<Reason>,
120}
121
122#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
124pub struct Violation {
125 pub focus: Term,
126 pub statement: usize,
128 pub severity: Severity,
130 pub reasons: Vec<Reason>,
131}
132
133#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
135pub struct ValidationOutcome {
136 pub conforms: bool,
137 pub violations: Vec<Violation>,
138}
139
140#[derive(Debug, Clone, PartialEq, Eq)]
142pub struct ValidationOptions {
143 pub minimum_severity: Severity,
146 pub sort_results: bool,
149}
150
151impl Default for ValidationOptions {
152 fn default() -> Self {
153 Self {
154 minimum_severity: Severity::Info,
155 sort_results: true,
156 }
157 }
158}
159
160fn most_severe(reasons: &[Reason]) -> Severity {
161 reasons
162 .iter()
163 .max_by_key(|reason| reason.severity.rank())
164 .map(|reason| reason.severity.clone())
165 .unwrap_or(Severity::Violation)
166}
167
168fn conforms_at_threshold(violations: &[Violation], minimum: &Severity) -> bool {
169 !violations
170 .iter()
171 .flat_map(|violation| &violation.reasons)
172 .any(|reason| reason.severity.meets(minimum))
173}
174
175fn sort_violations(violations: &mut [Violation], enabled: bool) {
176 if enabled {
177 violations.sort_by(|left, right| {
178 right
179 .severity
180 .rank()
181 .cmp(&left.severity.rank())
182 .then_with(|| left.focus.to_string().cmp(&right.focus.to_string()))
183 .then_with(|| left.statement.cmp(&right.statement))
184 });
185 }
186}
187
188#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
190pub enum ValidationGraphMode {
191 Data,
193 #[default]
196 Union,
197 UnionAll,
199}
200
201#[derive(Debug, Clone, PartialEq, Eq)]
205pub struct NonStratifiable {
206 pub components: Vec<Vec<ShapeId>>,
207}
208
209impl fmt::Display for NonStratifiable {
210 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
211 write!(f, "non-stratifiable schema (recursion through negation): ")?;
212 for (i, c) in self.components.iter().enumerate() {
213 if i > 0 {
214 write!(f, "; ")?;
215 }
216 let ids: Vec<String> = c.iter().map(|s| format!("@{}", s.0)).collect();
217 write!(f, "{{{}}}", ids.join(" "))?;
218 }
219 Ok(())
220 }
221}
222
223impl std::error::Error for NonStratifiable {}
224
225pub fn validate(data: &Graph, schema: &Schema) -> Result<ValidationOutcome, NonStratifiable> {
234 validate_with_options(data, schema, &ValidationOptions::default())
235}
236
237pub fn validate_with_options(
239 data: &Graph,
240 schema: &Schema,
241 options: &ValidationOptions,
242) -> Result<ValidationOutcome, NonStratifiable> {
243 validate_with_context_and_options(data, data, schema, options)
244}
245
246pub fn validate_graphs(
248 data: &Graph,
249 shapes: &Graph,
250 schema: &Schema,
251) -> Result<ValidationOutcome, NonStratifiable> {
252 validate_graphs_with_mode_and_options(
253 data,
254 shapes,
255 schema,
256 ValidationGraphMode::default(),
257 &ValidationOptions::default(),
258 )
259}
260
261pub fn validate_graphs_with_mode(
263 data: &Graph,
264 shapes: &Graph,
265 schema: &Schema,
266 mode: ValidationGraphMode,
267) -> Result<ValidationOutcome, NonStratifiable> {
268 validate_graphs_with_mode_and_options(data, shapes, schema, mode, &ValidationOptions::default())
269}
270
271pub fn validate_graphs_with_mode_and_options(
273 data: &Graph,
274 shapes: &Graph,
275 schema: &Schema,
276 mode: ValidationGraphMode,
277 options: &ValidationOptions,
278) -> Result<ValidationOutcome, NonStratifiable> {
279 match mode {
280 ValidationGraphMode::Data => {
281 let uses_shapes = uses_shapes_graph(&schema.arena);
282 let frozen = if uses_shapes {
283 FrozenIndexedDataset::from_graphs(data, shapes)
284 } else {
285 FrozenIndexedDataset::from_graph(data)
286 };
287 validate_with_frozen(data, schema, frozen, uses_shapes, options)
288 }
289 ValidationGraphMode::Union => {
290 let uses_shapes = uses_shapes_graph(&schema.arena);
291 let frozen = if uses_shapes {
292 FrozenIndexedDataset::from_graph_union_with_shapes(data, shapes)
293 } else {
294 FrozenIndexedDataset::from_graph_union(data, shapes)
295 };
296 validate_with_frozen(data, schema, frozen, uses_shapes, options)
297 }
298 ValidationGraphMode::UnionAll => {
299 let union = graph_union(data, shapes);
300 validate_with_context_and_options(&union, &union, schema, options)
301 }
302 }
303}
304
305pub fn validate_with_context(
309 data: &Graph,
310 context: &Graph,
311 schema: &Schema,
312) -> Result<ValidationOutcome, NonStratifiable> {
313 validate_with_context_and_options(data, context, schema, &ValidationOptions::default())
314}
315
316pub fn validate_with_context_and_options(
318 data: &Graph,
319 context: &Graph,
320 schema: &Schema,
321 options: &ValidationOptions,
322) -> Result<ValidationOutcome, NonStratifiable> {
323 let uses_shapes = uses_shapes_graph(&schema.arena);
324 let frozen = if uses_shapes {
325 FrozenIndexedDataset::from_graphs(context, context)
326 } else {
327 FrozenIndexedDataset::from_graph(context)
328 };
329 validate_with_frozen(data, schema, frozen, uses_shapes, options)
330}
331
332fn validate_with_frozen(
333 data: &Graph,
334 schema: &Schema,
335 frozen: FrozenIndexedDataset,
336 has_shapes_graph: bool,
337 options: &ValidationOptions,
338) -> Result<ValidationOutcome, NonStratifiable> {
339 let strat = analyze(&schema.arena);
340 if !strat.stratifiable {
341 let components = strat
342 .strata
343 .iter()
344 .filter(|s| !s.stratifiable)
345 .map(|s| s.shapes.clone())
346 .collect();
347 return Err(NonStratifiable { components });
348 }
349
350 let sparql = SparqlExecutor::from_frozen(frozen, has_shapes_graph);
351 let backend = sparql
352 .frozen()
353 .expect("validation executor always has a frozen dataset");
354 let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
355 let mut violations = Vec::new();
356 for (i, st) in schema.statements.iter().enumerate() {
357 let label = schema
358 .names
359 .get(&st.shape)
360 .cloned()
361 .unwrap_or_else(|| format!("@{}", st.shape.0));
362 let foci = focus_nodes_with_evaluator(data, &st.selector, &mut evaluator);
363 prefetch_sparql_constraints(&schema.arena, st.shape, &foci, &sparql);
364 for v in foci {
365 let t = std::time::Instant::now();
366 let mut stack = HashSet::new();
367 let mut reasons = explain(
368 &mut evaluator,
369 &v,
370 st.shape,
371 None,
372 &Severity::Violation,
373 &mut stack,
374 );
375 crate::profile::record_shape(&label, t.elapsed().as_micros() as u64);
376 dedup_reasons(&mut reasons);
377 if !reasons.is_empty() {
378 let severity = most_severe(&reasons);
379 violations.push(Violation {
380 focus: v,
381 statement: i,
382 severity,
383 reasons,
384 });
385 }
386 }
387 }
388 sort_violations(&mut violations, options.sort_results);
389 Ok(ValidationOutcome {
390 conforms: conforms_at_threshold(&violations, &options.minimum_severity),
391 violations,
392 })
393}
394
395pub(crate) fn uses_shapes_graph(arena: &ShapeArena) -> bool {
398 (0..arena.len()).any(|i| {
399 matches!(arena.get(ShapeId(i as u32)), Shape::Sparql(c) if c.query.contains("shapesGraph"))
400 })
401}
402
403pub fn graph_union(left: &Graph, right: &Graph) -> Graph {
407 let mut union = left.clone();
408 for triple in right.iter() {
409 union.insert(triple);
410 }
411 union
412}
413
414pub fn validate_plan(
420 data: &Graph,
421 plan: &PhysicalPlan,
422) -> Result<ValidationOutcome, NonStratifiable> {
423 validate_plan_with_options(data, plan, &ValidationOptions::default())
424}
425
426pub fn validate_plan_with_options(
428 data: &Graph,
429 plan: &PhysicalPlan,
430 options: &ValidationOptions,
431) -> Result<ValidationOutcome, NonStratifiable> {
432 validate_plan_with_context_and_options(data, data, plan, options)
433}
434
435pub fn validate_plan_graphs(
437 data: &Graph,
438 shapes: &Graph,
439 plan: &PhysicalPlan,
440) -> Result<ValidationOutcome, NonStratifiable> {
441 validate_plan_graphs_with_mode_and_options(
442 data,
443 shapes,
444 plan,
445 ValidationGraphMode::default(),
446 &ValidationOptions::default(),
447 )
448}
449
450pub fn validate_plan_graphs_with_mode(
452 data: &Graph,
453 shapes: &Graph,
454 plan: &PhysicalPlan,
455 mode: ValidationGraphMode,
456) -> Result<ValidationOutcome, NonStratifiable> {
457 validate_plan_graphs_with_mode_and_options(
458 data,
459 shapes,
460 plan,
461 mode,
462 &ValidationOptions::default(),
463 )
464}
465
466pub fn validate_plan_graphs_with_mode_and_options(
468 data: &Graph,
469 shapes: &Graph,
470 plan: &PhysicalPlan,
471 mode: ValidationGraphMode,
472 options: &ValidationOptions,
473) -> Result<ValidationOutcome, NonStratifiable> {
474 match mode {
475 ValidationGraphMode::Data => {
476 let uses_shapes = uses_shapes_graph(&plan.arena);
477 let frozen = if uses_shapes {
478 FrozenIndexedDataset::from_graphs(data, shapes)
479 } else {
480 FrozenIndexedDataset::from_graph(data)
481 };
482 validate_plan_with_frozen(data, plan, frozen, uses_shapes, options)
483 }
484 ValidationGraphMode::Union => {
485 let uses_shapes = uses_shapes_graph(&plan.arena);
486 let frozen = if uses_shapes {
487 FrozenIndexedDataset::from_graph_union_with_shapes(data, shapes)
488 } else {
489 FrozenIndexedDataset::from_graph_union(data, shapes)
490 };
491 validate_plan_with_frozen(data, plan, frozen, uses_shapes, options)
492 }
493 ValidationGraphMode::UnionAll => {
494 let union = graph_union(data, shapes);
495 validate_plan_with_context_and_options(&union, &union, plan, options)
496 }
497 }
498}
499
500pub fn validate_plan_with_context(
502 data: &Graph,
503 context: &Graph,
504 plan: &PhysicalPlan,
505) -> Result<ValidationOutcome, NonStratifiable> {
506 validate_plan_with_context_and_options(data, context, plan, &ValidationOptions::default())
507}
508
509pub fn validate_plan_with_context_and_options(
511 data: &Graph,
512 context: &Graph,
513 plan: &PhysicalPlan,
514 options: &ValidationOptions,
515) -> Result<ValidationOutcome, NonStratifiable> {
516 let uses_shapes = uses_shapes_graph(&plan.arena);
517 let frozen = if uses_shapes {
518 FrozenIndexedDataset::from_graphs(context, context)
519 } else {
520 FrozenIndexedDataset::from_graph(context)
521 };
522 validate_plan_with_frozen(data, plan, frozen, uses_shapes, options)
523}
524
525fn validate_plan_with_frozen(
526 data: &Graph,
527 plan: &PhysicalPlan,
528 frozen: FrozenIndexedDataset,
529 has_shapes_graph: bool,
530 options: &ValidationOptions,
531) -> Result<ValidationOutcome, NonStratifiable> {
532 let strat = analyze(&plan.arena);
533 if !strat.stratifiable {
534 let components = strat
535 .strata
536 .iter()
537 .filter(|s| !s.stratifiable)
538 .map(|s| s.shapes.clone())
539 .collect();
540 return Err(NonStratifiable { components });
541 }
542
543 let sparql = SparqlExecutor::from_frozen(frozen, has_shapes_graph);
544 let backend = sparql
545 .frozen()
546 .expect("validation executor always has a frozen dataset");
547 let mut evaluator = ShapeEvaluator::new(backend, &plan.arena, &sparql);
548 let mut violations = Vec::new();
549 for (i, sp) in plan.statements.iter().enumerate() {
550 let label = plan
551 .names
552 .get(&sp.shape)
553 .cloned()
554 .unwrap_or_else(|| format!("@{}", sp.shape.0));
555 let foci = focus_for_source(data, &sp.source, &mut evaluator);
556 prefetch_sparql_constraints(&plan.arena, sp.shape, &foci, &sparql);
557 for v in foci {
558 let t = std::time::Instant::now();
559 let mut stack = HashSet::new();
560 let mut reasons = explain(
561 &mut evaluator,
562 &v,
563 sp.shape,
564 None,
565 &Severity::Violation,
566 &mut stack,
567 );
568 crate::profile::record_shape(&label, t.elapsed().as_micros() as u64);
569 dedup_reasons(&mut reasons);
570 if !reasons.is_empty() {
571 let severity = most_severe(&reasons);
572 violations.push(Violation {
573 focus: v,
574 statement: i,
575 severity,
576 reasons,
577 });
578 }
579 }
580 }
581 sort_violations(&mut violations, options.sort_results);
582 Ok(ValidationOutcome {
583 conforms: conforms_at_threshold(&violations, &options.minimum_severity),
584 violations,
585 })
586}
587
588fn focus_for_source(
590 data: &Graph,
591 source: &FocusSource,
592 evaluator: &mut ShapeEvaluator<'_>,
593) -> Vec<Term> {
594 match source {
595 FocusSource::SubjectsOf(p) => subjects_of(data, p),
596 FocusSource::ObjectsOf(p) => objects_of(data, p),
597 FocusSource::Node(c) => vec![c.clone()],
598 FocusSource::PathToConst { path, target } => pred(evaluator.g, target, path)
600 .into_iter()
601 .filter(|node| graph_contains_term(data, node))
602 .collect(),
603 FocusSource::ScanFilter { path, qualifier } => all_nodes(data)
604 .into_iter()
605 .filter(|v| {
606 succ(evaluator.g, v, path)
607 .iter()
608 .any(|u| evaluator.holds(u, *qualifier))
609 })
610 .collect(),
611 FocusSource::Sparql(target) => {
612 let candidates = all_nodes(data);
613 evaluator
614 .sparql
615 .target_nodes(&target.query)
616 .unwrap_or_default()
617 .into_iter()
618 .filter(|node| candidates.contains(node))
619 .collect()
620 }
621 }
622}
623
624pub fn focus_nodes(data: &Graph, sel: &Selector, arena: &ShapeArena) -> Vec<Term> {
626 let sparql =
627 SparqlExecutor::new(data).expect("building an in-memory Oxigraph store should succeed");
628 let mut evaluator = ShapeEvaluator::new(data, arena, &sparql);
629 focus_nodes_with_evaluator(data, sel, &mut evaluator)
630}
631
632pub(crate) fn focus_nodes_with(
633 data: &Graph,
634 backend: &dyn PathBackend,
635 sel: &Selector,
636 arena: &ShapeArena,
637 sparql: &SparqlExecutor,
638) -> Vec<Term> {
639 let mut evaluator = ShapeEvaluator::new(backend, arena, sparql);
640 focus_nodes_with_evaluator(data, sel, &mut evaluator)
641}
642
643fn focus_nodes_with_evaluator(
644 data: &Graph,
645 sel: &Selector,
646 evaluator: &mut ShapeEvaluator<'_>,
647) -> Vec<Term> {
648 match sel {
649 Selector::HasOut(q) => subjects_of(data, q),
650 Selector::HasIn(q) => objects_of(data, q),
651 Selector::IsConst(c) => vec![c.clone()],
652 Selector::HasPath(path, qual) => match evaluator.arena.get(*qual) {
653 Shape::TestConst(target) => pred(evaluator.g, target, path)
658 .into_iter()
659 .filter(|node| graph_contains_term(data, node))
660 .collect(),
661 _ => all_nodes(data)
662 .into_iter()
663 .filter(|v| {
664 succ(evaluator.g, v, path)
665 .iter()
666 .any(|u| evaluator.holds(u, *qual))
667 })
668 .collect(),
669 },
670 Selector::Sparql(target) => {
671 let candidates = all_nodes(data);
672 evaluator
673 .sparql
674 .target_nodes(&target.query)
675 .unwrap_or_default()
676 .into_iter()
677 .filter(|node| candidates.contains(node))
678 .collect()
679 }
680 }
681}
682
683fn holds_memoized(
684 g: &dyn PathBackend,
685 arena: &ShapeArena,
686 v: &Term,
687 id: ShapeId,
688 sparql: &SparqlExecutor,
689 state: &mut EvalState,
690) -> EvalResult {
691 let key = (id, v.clone());
692 if let Some(&holds) = state.memo.get(&key) {
693 if let Some(telemetry) = state.telemetry.as_mut() {
694 telemetry.hits += 1;
695 }
696 return EvalResult {
697 holds,
698 cacheable: true,
699 };
700 }
701 if let Some(telemetry) = state.telemetry.as_mut() {
702 telemetry.misses += 1;
703 }
704 if !state.active.insert(key.clone()) {
705 if let Some(telemetry) = state.telemetry.as_mut() {
706 telemetry.recursion_back_edges += 1;
707 }
708 return EvalResult {
709 holds: true,
710 cacheable: false,
711 }; }
713 let result = match arena.get(id) {
714 Shape::Annotated { shape, .. } => holds_memoized(g, arena, v, *shape, sparql, state),
715 Shape::Top | Shape::Pending => EvalResult {
716 holds: true,
717 cacheable: true,
718 },
719 Shape::Sparql(constraint) => EvalResult {
720 holds: sparql
721 .constraint_violations(constraint, v)
722 .is_ok_and(|violations| violations.is_empty()),
723 cacheable: true,
724 },
725 Shape::TestConst(c) => EvalResult {
726 holds: v == c,
727 cacheable: true,
728 },
729 Shape::TestType(t) => EvalResult {
730 holds: value_type_holds(t, v),
731 cacheable: true,
732 },
733 Shape::TestKind(k) => EvalResult {
734 holds: k.matches(v),
735 cacheable: true,
736 },
737 Shape::Closed(q) => EvalResult {
738 holds: closed_offenders(g, v, q).is_empty(),
739 cacheable: true,
740 },
741 Shape::Eq(path, p) => EvalResult {
742 holds: succ(g, v, path) == objects(g, v, p),
743 cacheable: true,
744 },
745 Shape::Disj(path, p) => EvalResult {
746 holds: succ(g, v, path).is_disjoint(&objects(g, v, p)),
747 cacheable: true,
748 },
749 Shape::Lt(path, p) => EvalResult {
750 holds: all_pairs_ordered(g, v, path, p, false),
751 cacheable: true,
752 },
753 Shape::Le(path, p) => EvalResult {
754 holds: all_pairs_ordered(g, v, path, p, true),
755 cacheable: true,
756 },
757 Shape::UniqueLang(path) => EvalResult {
758 holds: unique_lang(&succ(g, v, path)),
759 cacheable: true,
760 },
761 Shape::Not(c) => {
762 let child = holds_memoized(g, arena, v, *c, sparql, state);
763 EvalResult {
764 holds: !child.holds,
765 cacheable: child.cacheable,
766 }
767 }
768 Shape::And(cs) => {
769 let mut result = EvalResult {
770 holds: true,
771 cacheable: true,
772 };
773 for child in cs {
774 let child = holds_memoized(g, arena, v, *child, sparql, state);
775 result.cacheable &= child.cacheable;
776 if !child.holds {
777 result.holds = false;
778 break;
779 }
780 }
781 result
782 }
783 Shape::Or(cs) => {
784 let mut result = EvalResult {
785 holds: false,
786 cacheable: true,
787 };
788 for child in cs {
789 let child = holds_memoized(g, arena, v, *child, sparql, state);
790 result.cacheable &= child.cacheable;
791 if child.holds {
792 result.holds = true;
793 break;
794 }
795 }
796 result
797 }
798 Shape::Count {
799 path,
800 min,
801 max,
802 qualifier,
803 } => {
804 let mut n = 0;
805 let mut cacheable = true;
806 for value in succ(g, v, path) {
807 let qualified = holds_memoized(g, arena, &value, *qualifier, sparql, state);
808 cacheable &= qualified.cacheable;
809 n += u64::from(qualified.holds);
810 }
811 EvalResult {
812 holds: min.is_none_or(|m| n >= m) && max.is_none_or(|m| n <= m),
813 cacheable,
814 }
815 }
816 };
817 state.active.remove(&key);
818 if result.cacheable {
819 state.memo.insert(key, result.holds);
820 if let Some(telemetry) = state.telemetry.as_mut() {
821 telemetry.insertions += 1;
822 }
823 } else if let Some(telemetry) = state.telemetry.as_mut() {
824 telemetry.non_cacheable_results += 1;
825 }
826 result
827}
828
829fn estimated_memo_bytes(memo: &HashMap<(ShapeId, Term), bool>) -> usize {
830 const CONTROL_BYTE_ESTIMATE: usize = 1;
831 let bucket_bytes =
832 memo.capacity() * (std::mem::size_of::<((ShapeId, Term), bool)>() + CONTROL_BYTE_ESTIMATE);
833 bucket_bytes
834 + memo
835 .keys()
836 .map(|(_, term)| estimated_term_heap_bytes(term))
837 .sum::<usize>()
838}
839
840fn estimated_term_heap_bytes(term: &Term) -> usize {
841 match term {
842 Term::NamedNode(node) => node.as_str().len(),
843 Term::BlankNode(node) => node.as_str().len(),
844 Term::Literal(literal) => {
845 literal.value().len()
846 + literal.language().map_or_else(
847 || {
848 let datatype = literal.datatype();
849 if datatype.as_str() == "http://www.w3.org/2001/XMLSchema#string" {
850 0
851 } else {
852 datatype.as_str().len()
853 }
854 },
855 str::len,
856 )
857 }
858 }
859}
860
861fn prefetch_sparql_constraints(
870 arena: &ShapeArena,
871 root: ShapeId,
872 foci: &[Term],
873 sparql: &SparqlExecutor,
874) {
875 if foci.len() < 2 {
876 return;
877 }
878 let mut constraints = Vec::new();
879 let mut seen = HashSet::new();
880 collect_focus_sparql(arena, root, &mut seen, &mut constraints);
881 for constraint in constraints {
882 let _ = sparql.prefetch_constraint(constraint, foci);
883 }
884}
885
886fn collect_focus_sparql<'a>(
887 arena: &'a ShapeArena,
888 id: ShapeId,
889 seen: &mut HashSet<ShapeId>,
890 out: &mut Vec<&'a SparqlConstraint>,
891) {
892 if !seen.insert(id) {
893 return; }
895 match arena.get(id) {
896 Shape::Annotated { shape, .. } => collect_focus_sparql(arena, *shape, seen, out),
897 Shape::Sparql(constraint) => out.push(constraint),
898 Shape::Not(inner) => collect_focus_sparql(arena, *inner, seen, out),
899 Shape::And(ids) | Shape::Or(ids) => {
900 for &child in ids {
901 collect_focus_sparql(arena, child, seen, out);
902 }
903 }
904 _ => {}
906 }
907}
908
909fn explain(
912 evaluator: &mut ShapeEvaluator<'_>,
913 node: &Term,
914 id: ShapeId,
915 path_ctx: Option<&str>,
916 severity: &Severity,
917 stack: &mut HashSet<(ShapeId, Term)>,
918) -> Vec<Reason> {
919 let key = (id, node.clone());
920 if !stack.insert(key.clone()) {
921 return Vec::new(); }
923 if evaluator.holds(node, id) {
924 stack.remove(&key);
925 return Vec::new();
926 }
927 let reasons = match evaluator.arena.get(id).clone() {
928 Shape::Annotated {
929 severity: source_severity,
930 shape,
931 } => explain(evaluator, node, shape, path_ctx, &source_severity, stack),
932 Shape::Top | Shape::Pending => Vec::new(),
933 Shape::Sparql(constraint) => {
934 match evaluator.sparql.constraint_violations(&constraint, node) {
935 Ok(violations) => violations
936 .into_iter()
937 .map(|violation| {
938 let message = sparql_violation_message(&violation, &constraint, node);
941 Reason {
942 value: violation.value.unwrap_or_else(|| node.clone()),
943 path: violation
944 .path
945 .map(|path| path.to_string())
946 .or_else(|| path_ctx.map(str::to_string))
947 .or_else(|| constraint.path.as_ref().map(path_to_string)),
948 message,
949 shape: id,
950 severity: severity.clone(),
951 sub_reasons: Vec::new(),
952 }
953 })
954 .collect(),
955 Err(error) => vec![Reason {
956 value: node.clone(),
957 path: path_ctx.map(str::to_string),
958 shape: id,
959 severity: severity.clone(),
960 message: format!("SPARQL constraint evaluation failed: {error}"),
961 sub_reasons: Vec::new(),
962 }],
963 }
964 }
965 Shape::TestConst(_)
966 | Shape::TestType(_)
967 | Shape::TestKind(_)
968 | Shape::Eq(..)
969 | Shape::Disj(..)
970 | Shape::Lt(..)
971 | Shape::Le(..)
972 | Shape::UniqueLang(_) => leaf(
973 evaluator.holds(node, id),
974 node,
975 id,
976 path_ctx,
977 severity,
978 format!("{} not satisfied", shape_to_string(evaluator.arena, id)),
979 ),
980 Shape::Closed(q) => {
981 let bad = closed_offenders(evaluator.g, node, &q);
982 if bad.is_empty() {
983 Vec::new()
984 } else {
985 let preds: Vec<String> = bad.iter().map(|p| p.to_string()).collect();
986 vec![Reason {
987 value: node.clone(),
988 path: path_ctx.map(str::to_string),
989 shape: id,
990 severity: severity.clone(),
991 message: format!("closed: unexpected predicate(s) {}", preds.join(", ")),
992 sub_reasons: Vec::new(),
993 }]
994 }
995 }
996 Shape::Not(c) => {
997 if explain(evaluator, node, c, path_ctx, severity, stack).is_empty() {
998 vec![Reason {
999 value: node.clone(),
1000 path: path_ctx.map(str::to_string),
1001 shape: id,
1002 severity: severity.clone(),
1003 message: "negated shape unexpectedly held".to_string(),
1004 sub_reasons: Vec::new(),
1005 }]
1006 } else {
1007 Vec::new()
1008 }
1009 }
1010 Shape::And(cs) => cs
1011 .iter()
1012 .flat_map(|c| explain(evaluator, node, *c, path_ctx, severity, stack))
1013 .collect(),
1014 Shape::Or(cs) => {
1015 let mut sub_reasons = Vec::new();
1016 let mut satisfied = false;
1017 for c in &cs {
1018 let sub = explain(evaluator, node, *c, path_ctx, severity, stack);
1019 if sub.is_empty() {
1020 satisfied = true;
1021 break;
1022 }
1023 sub_reasons.extend(sub);
1024 }
1025 if satisfied {
1026 Vec::new()
1027 } else {
1028 vec![Reason {
1029 value: node.clone(),
1030 path: path_ctx.map(str::to_string),
1031 shape: id,
1032 severity: severity.clone(),
1033 message: format!("none of {} alternative(s) satisfied", cs.len()),
1034 sub_reasons,
1035 }]
1036 }
1037 }
1038 Shape::Count {
1039 path,
1040 min,
1041 max,
1042 qualifier,
1043 } => explain_count(
1044 evaluator, node, id, &path, min, max, qualifier, severity, stack,
1045 ),
1046 };
1047 stack.remove(&key);
1048 reasons
1049}
1050
1051#[allow(clippy::too_many_arguments)]
1052fn explain_count(
1053 evaluator: &mut ShapeEvaluator<'_>,
1054 node: &Term,
1055 id: ShapeId,
1056 path: &Path,
1057 min: Option<u64>,
1058 max: Option<u64>,
1059 qualifier: ShapeId,
1060 severity: &Severity,
1061 stack: &mut HashSet<(ShapeId, Term)>,
1062) -> Vec<Reason> {
1063 let path_str = path_to_string(path);
1064 let matched: Vec<Term> = succ(evaluator.g, node, path)
1065 .into_iter()
1066 .filter(|u| evaluator.holds(u, qualifier))
1067 .collect();
1068 let n = matched.len() as u64;
1069 let mut reasons = Vec::new();
1070
1071 if let Some(mx) = max
1072 && n > mx
1073 {
1074 match evaluator.arena.get(qualifier).clone() {
1075 Shape::Not(inner) if mx == 0 => {
1077 for u in &matched {
1078 reasons.extend(explain(
1079 evaluator,
1080 u,
1081 inner,
1082 Some(&path_str),
1083 severity,
1084 stack,
1085 ));
1086 }
1087 }
1088 _ => reasons.push(Reason {
1089 value: node.clone(),
1090 path: Some(path_str.clone()),
1091 shape: id,
1092 severity: severity.clone(),
1093 message: format!("at most {mx} value(s) may match along {path_str}, found {n}"),
1094 sub_reasons: Vec::new(),
1095 }),
1096 }
1097 }
1098
1099 if let Some(mn) = min
1100 && n < mn
1101 {
1102 reasons.push(Reason {
1103 value: node.clone(),
1104 path: Some(path_str.clone()),
1105 shape: id,
1106 severity: severity.clone(),
1107 message: format!("at least {mn} value(s) required along {path_str}, found {n}"),
1108 sub_reasons: Vec::new(),
1109 });
1110 }
1111
1112 reasons
1113}
1114
1115fn sparql_violation_message(
1120 violation: &SparqlViolation,
1121 constraint: &SparqlConstraint,
1122 node: &Term,
1123) -> String {
1124 if let Some(message) = &violation.message {
1125 return term_text(message);
1126 }
1127 if !constraint.messages.is_empty() {
1128 return constraint
1129 .messages
1130 .iter()
1131 .map(|m| apply_message_template(&term_text(m), node, &violation.bindings))
1132 .collect::<Vec<_>>()
1133 .join("; ");
1134 }
1135 let mut message = match &constraint.shape {
1136 Some(shape) => format!("SPARQL constraint at {shape} not satisfied"),
1137 None => "SPARQL constraint not satisfied".to_string(),
1138 };
1139 if let Some(value) = &violation.value {
1140 message.push_str(&format!(" (value: {value})"));
1141 }
1142 message
1143}
1144
1145pub(crate) fn apply_message_template(
1150 template: &str,
1151 focus: &Term,
1152 bindings: &HashMap<String, Term>,
1153) -> String {
1154 static RE: OnceLock<Regex> = OnceLock::new();
1155 let re = RE
1156 .get_or_init(|| Regex::new(r"\{(\$[A-Za-z_]\w*|\?[A-Za-z_]\w*)\}").expect("static regex"));
1157 re.replace_all(template, |caps: ®ex::Captures| {
1158 let placeholder = &caps[1];
1159 let name = &placeholder[1..]; let term = if name == "this" {
1161 Some(focus)
1162 } else {
1163 bindings.get(name)
1164 };
1165 term.map(|t| match t {
1166 Term::NamedNode(n) => format!("<{}>", n.as_str()),
1167 Term::BlankNode(b) => format!("_:{}", b.as_str()),
1168 Term::Literal(l) => l.value().to_string(),
1169 })
1170 .unwrap_or_else(|| placeholder.to_string())
1171 })
1172 .to_string()
1173}
1174
1175fn term_text(term: &Term) -> String {
1178 match term {
1179 Term::Literal(literal) => literal.value().to_string(),
1180 other => other.to_string(),
1181 }
1182}
1183
1184fn leaf(
1185 ok: bool,
1186 node: &Term,
1187 id: ShapeId,
1188 path_ctx: Option<&str>,
1189 severity: &Severity,
1190 message: String,
1191) -> Vec<Reason> {
1192 if ok {
1193 Vec::new()
1194 } else {
1195 vec![Reason {
1196 value: node.clone(),
1197 path: path_ctx.map(str::to_string),
1198 shape: id,
1199 severity: severity.clone(),
1200 message,
1201 sub_reasons: Vec::new(),
1202 }]
1203 }
1204}
1205
1206fn all_pairs_ordered(
1207 g: &dyn PathBackend,
1208 v: &Term,
1209 path: &Path,
1210 p: &NamedNode,
1211 allow_eq: bool,
1212) -> bool {
1213 let lhs = succ(g, v, path);
1214 let rhs = objects(g, v, p);
1215 for a in &lhs {
1216 for b in &rhs {
1217 match compare_terms(a, b) {
1218 Some(Ordering::Less) => {}
1219 Some(Ordering::Equal) if allow_eq => {}
1220 _ => return false,
1221 }
1222 }
1223 }
1224 true
1225}
1226
1227fn objects(g: &dyn PathBackend, v: &Term, p: &NamedNode) -> HashSet<Term> {
1228 succ(g, v, &Path::Pred(p.clone()))
1229}
1230
1231fn closed_offenders(
1233 g: &dyn PathBackend,
1234 node: &Term,
1235 q: &BTreeSet<NamedNode>,
1236) -> BTreeSet<NamedNode> {
1237 g.out_predicates(node)
1238 .into_iter()
1239 .filter(|p| !q.contains(p))
1240 .collect()
1241}
1242
1243fn unique_lang(values: &HashSet<Term>) -> bool {
1244 let mut seen = HashSet::new();
1245 for term in values {
1246 if let Term::Literal(l) = term
1247 && let Some(lang) = l.language()
1248 && !seen.insert(lang.to_ascii_lowercase())
1249 {
1250 return false;
1251 }
1252 }
1253 true
1254}
1255
1256fn dedup_reasons(reasons: &mut Vec<Reason>) {
1257 let mut seen = HashSet::new();
1258 reasons.retain(|r| {
1259 seen.insert((
1260 r.value.to_string(),
1261 r.message.clone(),
1262 r.severity.as_str().to_string(),
1263 ))
1264 });
1265}
1266
1267fn subject_term(s: oxrdf::NamedOrBlankNodeRef) -> Term {
1268 crate::path::term_of(s.into_owned())
1269}
1270
1271fn subjects_of(data: &Graph, p: &NamedNode) -> Vec<Term> {
1273 let mut seen = HashSet::new();
1274 data.triples_for_predicate(p.as_ref())
1275 .filter_map(|t| {
1276 let term = subject_term(t.subject);
1277 seen.insert(term.clone()).then_some(term)
1278 })
1279 .collect()
1280}
1281
1282fn objects_of(data: &Graph, p: &NamedNode) -> Vec<Term> {
1284 let mut seen = HashSet::new();
1285 data.triples_for_predicate(p.as_ref())
1286 .filter_map(|t| {
1287 let term = t.object.into_owned();
1288 seen.insert(term.clone()).then_some(term)
1289 })
1290 .collect()
1291}
1292
1293fn all_nodes(g: &Graph) -> HashSet<Term> {
1295 let mut nodes = HashSet::new();
1296 for t in g.iter() {
1297 nodes.insert(subject_term(t.subject));
1298 nodes.insert(t.object.into_owned());
1299 }
1300 nodes
1301}
1302
1303fn graph_contains_term(g: &Graph, term: &Term) -> bool {
1305 node_of(term).is_some_and(|node| g.triples_for_subject(&node).next().is_some())
1306 || g.triples_for_object(term).next().is_some()
1307}
1308
1309#[cfg(test)]
1310mod tests {
1311 use super::*;
1312 use oxrdf::{NamedNode, Triple};
1313
1314 fn iri(local: &str) -> NamedNode {
1315 NamedNode::new(format!("http://ex/{local}")).unwrap()
1316 }
1317
1318 fn term(local: &str) -> Term {
1319 Term::NamedNode(iri(local))
1320 }
1321
1322 #[test]
1323 fn memoizes_shared_value_checks_across_focus_nodes() {
1324 let p = iri("p");
1325 let shared = term("shared");
1326 let mut graph = Graph::new();
1327 graph.insert(&Triple::new(iri("a"), p.clone(), shared.clone()));
1328 graph.insert(&Triple::new(iri("b"), p.clone(), shared.clone()));
1329
1330 let mut arena = ShapeArena::new();
1331 let qualifier = arena.insert(Shape::TestConst(shared));
1332 let root = arena.insert(Shape::Count {
1333 path: Path::Pred(p),
1334 min: Some(1),
1335 max: None,
1336 qualifier,
1337 });
1338 let sparql = SparqlExecutor::new(&graph).unwrap();
1339 crate::profile::enable();
1340 {
1341 let mut evaluator = ShapeEvaluator::new(&graph, &arena, &sparql);
1342 assert!(evaluator.holds(&term("a"), root));
1343 assert!(evaluator.holds(&term("b"), root));
1344 }
1345 let profile = crate::profile::take().unwrap();
1346 let cache = profile.shape_cache();
1347 assert_eq!(cache.evaluators, 1);
1348 assert!(cache.hits >= 1, "shared qualifier should hit the cache");
1349 assert_eq!(cache.peak_entries, 3);
1350 assert!(cache.estimated_peak_bytes > 0);
1351 }
1352
1353 #[test]
1354 fn does_not_cache_cycle_dependent_results() {
1355 let mut arena = ShapeArena::new();
1359 let a = arena.reserve();
1360 let b = arena.reserve();
1361 let bottom = arena.insert(Shape::Or(Vec::new()));
1362 arena.set(a, Shape::And(vec![b, bottom]));
1363 arena.set(b, Shape::And(vec![a]));
1364
1365 let graph = Graph::new();
1366 let sparql = SparqlExecutor::new(&graph).unwrap();
1367 let node = term("x");
1368
1369 crate::profile::enable();
1370 {
1371 let mut evaluator = ShapeEvaluator::new(&graph, &arena, &sparql);
1372 assert!(!evaluator.holds(&node, a));
1373 assert!(!evaluator.holds(&node, b));
1374 }
1375 let profile = crate::profile::take().unwrap();
1376 let cache = profile.shape_cache();
1377 assert!(cache.recursion_back_edges > 0);
1378 assert!(cache.non_cacheable_results > 0);
1379 }
1380}