1use crate::frozen::FrozenIndexedDataset;
13use crate::path::{PathBackend, node_of, pred, succ};
14use crate::profile::ShapeCacheSample;
15use crate::sparql::{SparqlExecutor, SparqlViolation};
16use crate::value::{compare_terms, value_type_holds};
17use oxrdf::{Graph, NamedNode, Term};
18use regex::Regex;
19use serde::{Deserialize, Serialize};
20use shifty_algebra::render::{path_to_string, shape_to_string};
21use shifty_algebra::{Path, Schema, Selector, Shape, ShapeArena, ShapeId, SparqlConstraint};
22use shifty_opt::{FocusSource, PhysicalPlan, analyze};
23use std::cmp::Ordering;
24use std::collections::{BTreeSet, HashMap, HashSet};
25use std::fmt;
26use std::sync::OnceLock;
27
28#[derive(Debug, Clone, Copy)]
29struct EvalResult {
30 holds: bool,
31 cacheable: bool,
32}
33
34#[derive(Default)]
35struct EvalState {
36 memo: HashMap<(ShapeId, Term), bool>,
37 active: HashSet<(ShapeId, Term)>,
38 telemetry: Option<ShapeCacheSample>,
39}
40
41pub(crate) struct ShapeEvaluator<'a> {
47 g: &'a dyn PathBackend,
48 arena: &'a ShapeArena,
49 sparql: &'a SparqlExecutor,
50 state: EvalState,
51}
52
53impl<'a> ShapeEvaluator<'a> {
54 pub(crate) fn new(
55 g: &'a dyn PathBackend,
56 arena: &'a ShapeArena,
57 sparql: &'a SparqlExecutor,
58 ) -> Self {
59 Self {
60 g,
61 arena,
62 sparql,
63 state: EvalState {
64 telemetry: crate::profile::is_enabled().then(ShapeCacheSample::default),
65 ..EvalState::default()
66 },
67 }
68 }
69
70 pub(crate) fn holds(&mut self, node: &Term, id: ShapeId) -> bool {
71 holds_memoized(self.g, self.arena, node, id, self.sparql, &mut self.state).holds
72 }
73
74 pub(crate) fn sparql(&self) -> &SparqlExecutor {
75 self.sparql
76 }
77}
78
79impl Drop for ShapeEvaluator<'_> {
80 fn drop(&mut self) {
81 let Some(mut sample) = self.state.telemetry else {
82 return;
83 };
84 sample.entries = self.state.memo.len();
85 sample.estimated_bytes = estimated_memo_bytes(&self.state.memo);
86 crate::profile::record_shape_cache(sample);
87 }
88}
89
90#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
92pub struct Reason {
93 pub value: Term,
95 pub path: Option<String>,
98 pub shape: ShapeId,
100 pub message: String,
101 #[serde(default, skip_serializing_if = "Vec::is_empty")]
104 pub sub_reasons: Vec<Reason>,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
109pub struct Violation {
110 pub focus: Term,
111 pub statement: usize,
113 pub reasons: Vec<Reason>,
114}
115
116#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
118pub struct ValidationOutcome {
119 pub conforms: bool,
120 pub violations: Vec<Violation>,
121}
122
123#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
125pub enum ValidationGraphMode {
126 Data,
128 #[default]
131 Union,
132 UnionAll,
134}
135
136#[derive(Debug, Clone, PartialEq, Eq)]
140pub struct NonStratifiable {
141 pub components: Vec<Vec<ShapeId>>,
142}
143
144impl fmt::Display for NonStratifiable {
145 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
146 write!(f, "non-stratifiable schema (recursion through negation): ")?;
147 for (i, c) in self.components.iter().enumerate() {
148 if i > 0 {
149 write!(f, "; ")?;
150 }
151 let ids: Vec<String> = c.iter().map(|s| format!("@{}", s.0)).collect();
152 write!(f, "{{{}}}", ids.join(" "))?;
153 }
154 Ok(())
155 }
156}
157
158impl std::error::Error for NonStratifiable {}
159
160pub fn validate(data: &Graph, schema: &Schema) -> Result<ValidationOutcome, NonStratifiable> {
169 validate_with_context(data, data, schema)
170}
171
172pub fn validate_graphs(
174 data: &Graph,
175 shapes: &Graph,
176 schema: &Schema,
177) -> Result<ValidationOutcome, NonStratifiable> {
178 validate_graphs_with_mode(data, shapes, schema, ValidationGraphMode::default())
179}
180
181pub fn validate_graphs_with_mode(
183 data: &Graph,
184 shapes: &Graph,
185 schema: &Schema,
186 mode: ValidationGraphMode,
187) -> Result<ValidationOutcome, NonStratifiable> {
188 match mode {
189 ValidationGraphMode::Data => {
190 let uses_shapes = uses_shapes_graph(&schema.arena);
191 let frozen = if uses_shapes {
192 FrozenIndexedDataset::from_graphs(data, shapes)
193 } else {
194 FrozenIndexedDataset::from_graph(data)
195 };
196 validate_with_frozen(data, schema, frozen, uses_shapes)
197 }
198 ValidationGraphMode::Union => {
199 let uses_shapes = uses_shapes_graph(&schema.arena);
200 let frozen = if uses_shapes {
201 FrozenIndexedDataset::from_graph_union_with_shapes(data, shapes)
202 } else {
203 FrozenIndexedDataset::from_graph_union(data, shapes)
204 };
205 validate_with_frozen(data, schema, frozen, uses_shapes)
206 }
207 ValidationGraphMode::UnionAll => {
208 let union = graph_union(data, shapes);
209 validate_with_context(&union, &union, schema)
210 }
211 }
212}
213
214pub fn validate_with_context(
218 data: &Graph,
219 context: &Graph,
220 schema: &Schema,
221) -> Result<ValidationOutcome, NonStratifiable> {
222 let uses_shapes = uses_shapes_graph(&schema.arena);
223 let frozen = if uses_shapes {
224 FrozenIndexedDataset::from_graphs(context, context)
225 } else {
226 FrozenIndexedDataset::from_graph(context)
227 };
228 validate_with_frozen(data, schema, frozen, uses_shapes)
229}
230
231fn validate_with_frozen(
232 data: &Graph,
233 schema: &Schema,
234 frozen: FrozenIndexedDataset,
235 has_shapes_graph: bool,
236) -> Result<ValidationOutcome, NonStratifiable> {
237 let strat = analyze(&schema.arena);
238 if !strat.stratifiable {
239 let components = strat
240 .strata
241 .iter()
242 .filter(|s| !s.stratifiable)
243 .map(|s| s.shapes.clone())
244 .collect();
245 return Err(NonStratifiable { components });
246 }
247
248 let sparql = SparqlExecutor::from_frozen(frozen, has_shapes_graph);
249 let backend = sparql
250 .frozen()
251 .expect("validation executor always has a frozen dataset");
252 let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
253 let mut violations = Vec::new();
254 for (i, st) in schema.statements.iter().enumerate() {
255 let label = schema
256 .names
257 .get(&st.shape)
258 .cloned()
259 .unwrap_or_else(|| format!("@{}", st.shape.0));
260 let foci = focus_nodes_with_evaluator(data, &st.selector, &mut evaluator);
261 prefetch_sparql_constraints(&schema.arena, st.shape, &foci, &sparql);
262 for v in foci {
263 let t = std::time::Instant::now();
264 let mut stack = HashSet::new();
265 let mut reasons = explain(&mut evaluator, &v, st.shape, None, &mut stack);
266 crate::profile::record_shape(&label, t.elapsed().as_micros() as u64);
267 dedup_reasons(&mut reasons);
268 if !reasons.is_empty() {
269 violations.push(Violation {
270 focus: v,
271 statement: i,
272 reasons,
273 });
274 }
275 }
276 }
277 Ok(ValidationOutcome {
278 conforms: violations.is_empty(),
279 violations,
280 })
281}
282
283fn uses_shapes_graph(arena: &ShapeArena) -> bool {
286 (0..arena.len()).any(|i| {
287 matches!(arena.get(ShapeId(i as u32)), Shape::Sparql(c) if c.query.contains("shapesGraph"))
288 })
289}
290
291pub(crate) fn graph_union(left: &Graph, right: &Graph) -> Graph {
292 let mut union = left.clone();
293 for triple in right.iter() {
294 union.insert(triple);
295 }
296 union
297}
298
299pub fn validate_plan(
305 data: &Graph,
306 plan: &PhysicalPlan,
307) -> Result<ValidationOutcome, NonStratifiable> {
308 validate_plan_with_context(data, data, plan)
309}
310
311pub fn validate_plan_graphs(
313 data: &Graph,
314 shapes: &Graph,
315 plan: &PhysicalPlan,
316) -> Result<ValidationOutcome, NonStratifiable> {
317 validate_plan_graphs_with_mode(data, shapes, plan, ValidationGraphMode::default())
318}
319
320pub fn validate_plan_graphs_with_mode(
322 data: &Graph,
323 shapes: &Graph,
324 plan: &PhysicalPlan,
325 mode: ValidationGraphMode,
326) -> Result<ValidationOutcome, NonStratifiable> {
327 match mode {
328 ValidationGraphMode::Data => {
329 let uses_shapes = uses_shapes_graph(&plan.arena);
330 let frozen = if uses_shapes {
331 FrozenIndexedDataset::from_graphs(data, shapes)
332 } else {
333 FrozenIndexedDataset::from_graph(data)
334 };
335 validate_plan_with_frozen(data, plan, frozen, uses_shapes)
336 }
337 ValidationGraphMode::Union => {
338 let uses_shapes = uses_shapes_graph(&plan.arena);
339 let frozen = if uses_shapes {
340 FrozenIndexedDataset::from_graph_union_with_shapes(data, shapes)
341 } else {
342 FrozenIndexedDataset::from_graph_union(data, shapes)
343 };
344 validate_plan_with_frozen(data, plan, frozen, uses_shapes)
345 }
346 ValidationGraphMode::UnionAll => {
347 let union = graph_union(data, shapes);
348 validate_plan_with_context(&union, &union, plan)
349 }
350 }
351}
352
353pub fn validate_plan_with_context(
355 data: &Graph,
356 context: &Graph,
357 plan: &PhysicalPlan,
358) -> Result<ValidationOutcome, NonStratifiable> {
359 let uses_shapes = uses_shapes_graph(&plan.arena);
360 let frozen = if uses_shapes {
361 FrozenIndexedDataset::from_graphs(context, context)
362 } else {
363 FrozenIndexedDataset::from_graph(context)
364 };
365 validate_plan_with_frozen(data, plan, frozen, uses_shapes)
366}
367
368fn validate_plan_with_frozen(
369 data: &Graph,
370 plan: &PhysicalPlan,
371 frozen: FrozenIndexedDataset,
372 has_shapes_graph: bool,
373) -> Result<ValidationOutcome, NonStratifiable> {
374 let strat = analyze(&plan.arena);
375 if !strat.stratifiable {
376 let components = strat
377 .strata
378 .iter()
379 .filter(|s| !s.stratifiable)
380 .map(|s| s.shapes.clone())
381 .collect();
382 return Err(NonStratifiable { components });
383 }
384
385 let sparql = SparqlExecutor::from_frozen(frozen, has_shapes_graph);
386 let backend = sparql
387 .frozen()
388 .expect("validation executor always has a frozen dataset");
389 let mut evaluator = ShapeEvaluator::new(backend, &plan.arena, &sparql);
390 let mut violations = Vec::new();
391 for (i, sp) in plan.statements.iter().enumerate() {
392 let label = plan
393 .names
394 .get(&sp.shape)
395 .cloned()
396 .unwrap_or_else(|| format!("@{}", sp.shape.0));
397 let foci = focus_for_source(data, &sp.source, &mut evaluator);
398 prefetch_sparql_constraints(&plan.arena, sp.shape, &foci, &sparql);
399 for v in foci {
400 let t = std::time::Instant::now();
401 let mut stack = HashSet::new();
402 let mut reasons = explain(&mut evaluator, &v, sp.shape, None, &mut stack);
403 crate::profile::record_shape(&label, t.elapsed().as_micros() as u64);
404 dedup_reasons(&mut reasons);
405 if !reasons.is_empty() {
406 violations.push(Violation {
407 focus: v,
408 statement: i,
409 reasons,
410 });
411 }
412 }
413 }
414 Ok(ValidationOutcome {
415 conforms: violations.is_empty(),
416 violations,
417 })
418}
419
420fn focus_for_source(
422 data: &Graph,
423 source: &FocusSource,
424 evaluator: &mut ShapeEvaluator<'_>,
425) -> Vec<Term> {
426 match source {
427 FocusSource::SubjectsOf(p) => subjects_of(data, p),
428 FocusSource::ObjectsOf(p) => objects_of(data, p),
429 FocusSource::Node(c) => vec![c.clone()],
430 FocusSource::PathToConst { path, target } => pred(evaluator.g, target, path)
432 .into_iter()
433 .filter(|node| graph_contains_term(data, node))
434 .collect(),
435 FocusSource::ScanFilter { path, qualifier } => all_nodes(data)
436 .into_iter()
437 .filter(|v| {
438 succ(evaluator.g, v, path)
439 .iter()
440 .any(|u| evaluator.holds(u, *qualifier))
441 })
442 .collect(),
443 FocusSource::Sparql(target) => {
444 let candidates = all_nodes(data);
445 evaluator
446 .sparql
447 .target_nodes(&target.query)
448 .unwrap_or_default()
449 .into_iter()
450 .filter(|node| candidates.contains(node))
451 .collect()
452 }
453 }
454}
455
456pub fn focus_nodes(data: &Graph, sel: &Selector, arena: &ShapeArena) -> Vec<Term> {
458 let sparql =
459 SparqlExecutor::new(data).expect("building an in-memory Oxigraph store should succeed");
460 let mut evaluator = ShapeEvaluator::new(data, arena, &sparql);
461 focus_nodes_with_evaluator(data, sel, &mut evaluator)
462}
463
464pub(crate) fn focus_nodes_with(
465 data: &Graph,
466 backend: &dyn PathBackend,
467 sel: &Selector,
468 arena: &ShapeArena,
469 sparql: &SparqlExecutor,
470) -> Vec<Term> {
471 let mut evaluator = ShapeEvaluator::new(backend, arena, sparql);
472 focus_nodes_with_evaluator(data, sel, &mut evaluator)
473}
474
475fn focus_nodes_with_evaluator(
476 data: &Graph,
477 sel: &Selector,
478 evaluator: &mut ShapeEvaluator<'_>,
479) -> Vec<Term> {
480 match sel {
481 Selector::HasOut(q) => subjects_of(data, q),
482 Selector::HasIn(q) => objects_of(data, q),
483 Selector::IsConst(c) => vec![c.clone()],
484 Selector::HasPath(path, qual) => match evaluator.arena.get(*qual) {
485 Shape::TestConst(target) => pred(evaluator.g, target, path)
490 .into_iter()
491 .filter(|node| graph_contains_term(data, node))
492 .collect(),
493 _ => all_nodes(data)
494 .into_iter()
495 .filter(|v| {
496 succ(evaluator.g, v, path)
497 .iter()
498 .any(|u| evaluator.holds(u, *qual))
499 })
500 .collect(),
501 },
502 Selector::Sparql(target) => {
503 let candidates = all_nodes(data);
504 evaluator
505 .sparql
506 .target_nodes(&target.query)
507 .unwrap_or_default()
508 .into_iter()
509 .filter(|node| candidates.contains(node))
510 .collect()
511 }
512 }
513}
514
515fn holds_memoized(
516 g: &dyn PathBackend,
517 arena: &ShapeArena,
518 v: &Term,
519 id: ShapeId,
520 sparql: &SparqlExecutor,
521 state: &mut EvalState,
522) -> EvalResult {
523 let key = (id, v.clone());
524 if let Some(&holds) = state.memo.get(&key) {
525 if let Some(telemetry) = state.telemetry.as_mut() {
526 telemetry.hits += 1;
527 }
528 return EvalResult {
529 holds,
530 cacheable: true,
531 };
532 }
533 if let Some(telemetry) = state.telemetry.as_mut() {
534 telemetry.misses += 1;
535 }
536 if !state.active.insert(key.clone()) {
537 if let Some(telemetry) = state.telemetry.as_mut() {
538 telemetry.recursion_back_edges += 1;
539 }
540 return EvalResult {
541 holds: true,
542 cacheable: false,
543 }; }
545 let result = match arena.get(id) {
546 Shape::Top | Shape::Pending => EvalResult {
547 holds: true,
548 cacheable: true,
549 },
550 Shape::Sparql(constraint) => EvalResult {
551 holds: sparql
552 .constraint_violations(constraint, v)
553 .is_ok_and(|violations| violations.is_empty()),
554 cacheable: true,
555 },
556 Shape::TestConst(c) => EvalResult {
557 holds: v == c,
558 cacheable: true,
559 },
560 Shape::TestType(t) => EvalResult {
561 holds: value_type_holds(t, v),
562 cacheable: true,
563 },
564 Shape::TestKind(k) => EvalResult {
565 holds: k.matches(v),
566 cacheable: true,
567 },
568 Shape::Closed(q) => EvalResult {
569 holds: closed_offenders(g, v, q).is_empty(),
570 cacheable: true,
571 },
572 Shape::Eq(path, p) => EvalResult {
573 holds: succ(g, v, path) == objects(g, v, p),
574 cacheable: true,
575 },
576 Shape::Disj(path, p) => EvalResult {
577 holds: succ(g, v, path).is_disjoint(&objects(g, v, p)),
578 cacheable: true,
579 },
580 Shape::Lt(path, p) => EvalResult {
581 holds: all_pairs_ordered(g, v, path, p, false),
582 cacheable: true,
583 },
584 Shape::Le(path, p) => EvalResult {
585 holds: all_pairs_ordered(g, v, path, p, true),
586 cacheable: true,
587 },
588 Shape::UniqueLang(path) => EvalResult {
589 holds: unique_lang(&succ(g, v, path)),
590 cacheable: true,
591 },
592 Shape::Not(c) => {
593 let child = holds_memoized(g, arena, v, *c, sparql, state);
594 EvalResult {
595 holds: !child.holds,
596 cacheable: child.cacheable,
597 }
598 }
599 Shape::And(cs) => {
600 let mut result = EvalResult {
601 holds: true,
602 cacheable: true,
603 };
604 for child in cs {
605 let child = holds_memoized(g, arena, v, *child, sparql, state);
606 result.cacheable &= child.cacheable;
607 if !child.holds {
608 result.holds = false;
609 break;
610 }
611 }
612 result
613 }
614 Shape::Or(cs) => {
615 let mut result = EvalResult {
616 holds: false,
617 cacheable: true,
618 };
619 for child in cs {
620 let child = holds_memoized(g, arena, v, *child, sparql, state);
621 result.cacheable &= child.cacheable;
622 if child.holds {
623 result.holds = true;
624 break;
625 }
626 }
627 result
628 }
629 Shape::Count {
630 path,
631 min,
632 max,
633 qualifier,
634 } => {
635 let mut n = 0;
636 let mut cacheable = true;
637 for value in succ(g, v, path) {
638 let qualified = holds_memoized(g, arena, &value, *qualifier, sparql, state);
639 cacheable &= qualified.cacheable;
640 n += u64::from(qualified.holds);
641 }
642 EvalResult {
643 holds: min.is_none_or(|m| n >= m) && max.is_none_or(|m| n <= m),
644 cacheable,
645 }
646 }
647 };
648 state.active.remove(&key);
649 if result.cacheable {
650 state.memo.insert(key, result.holds);
651 if let Some(telemetry) = state.telemetry.as_mut() {
652 telemetry.insertions += 1;
653 }
654 } else if let Some(telemetry) = state.telemetry.as_mut() {
655 telemetry.non_cacheable_results += 1;
656 }
657 result
658}
659
660fn estimated_memo_bytes(memo: &HashMap<(ShapeId, Term), bool>) -> usize {
661 const CONTROL_BYTE_ESTIMATE: usize = 1;
662 let bucket_bytes =
663 memo.capacity() * (std::mem::size_of::<((ShapeId, Term), bool)>() + CONTROL_BYTE_ESTIMATE);
664 bucket_bytes
665 + memo
666 .keys()
667 .map(|(_, term)| estimated_term_heap_bytes(term))
668 .sum::<usize>()
669}
670
671fn estimated_term_heap_bytes(term: &Term) -> usize {
672 match term {
673 Term::NamedNode(node) => node.as_str().len(),
674 Term::BlankNode(node) => node.as_str().len(),
675 Term::Literal(literal) => {
676 literal.value().len()
677 + literal.language().map_or_else(
678 || {
679 let datatype = literal.datatype();
680 if datatype.as_str() == "http://www.w3.org/2001/XMLSchema#string" {
681 0
682 } else {
683 datatype.as_str().len()
684 }
685 },
686 str::len,
687 )
688 }
689 }
690}
691
692fn prefetch_sparql_constraints(
701 arena: &ShapeArena,
702 root: ShapeId,
703 foci: &[Term],
704 sparql: &SparqlExecutor,
705) {
706 if foci.len() < 2 {
707 return;
708 }
709 let mut constraints = Vec::new();
710 let mut seen = HashSet::new();
711 collect_focus_sparql(arena, root, &mut seen, &mut constraints);
712 for constraint in constraints {
713 let _ = sparql.prefetch_constraint(constraint, foci);
714 }
715}
716
717fn collect_focus_sparql<'a>(
718 arena: &'a ShapeArena,
719 id: ShapeId,
720 seen: &mut HashSet<ShapeId>,
721 out: &mut Vec<&'a SparqlConstraint>,
722) {
723 if !seen.insert(id) {
724 return; }
726 match arena.get(id) {
727 Shape::Sparql(constraint) => out.push(constraint),
728 Shape::Not(inner) => collect_focus_sparql(arena, *inner, seen, out),
729 Shape::And(ids) | Shape::Or(ids) => {
730 for &child in ids {
731 collect_focus_sparql(arena, child, seen, out);
732 }
733 }
734 _ => {}
736 }
737}
738
739fn explain(
742 evaluator: &mut ShapeEvaluator<'_>,
743 node: &Term,
744 id: ShapeId,
745 path_ctx: Option<&str>,
746 stack: &mut HashSet<(ShapeId, Term)>,
747) -> Vec<Reason> {
748 let key = (id, node.clone());
749 if !stack.insert(key.clone()) {
750 return Vec::new(); }
752 if evaluator.holds(node, id) {
753 stack.remove(&key);
754 return Vec::new();
755 }
756 let reasons = match evaluator.arena.get(id).clone() {
757 Shape::Top | Shape::Pending => Vec::new(),
758 Shape::Sparql(constraint) => {
759 match evaluator.sparql.constraint_violations(&constraint, node) {
760 Ok(violations) => violations
761 .into_iter()
762 .map(|violation| {
763 let message = sparql_violation_message(&violation, &constraint, node);
766 Reason {
767 value: violation.value.unwrap_or_else(|| node.clone()),
768 path: violation
769 .path
770 .map(|path| path.to_string())
771 .or_else(|| path_ctx.map(str::to_string))
772 .or_else(|| constraint.path.as_ref().map(path_to_string)),
773 message,
774 shape: id,
775 sub_reasons: Vec::new(),
776 }
777 })
778 .collect(),
779 Err(error) => vec![Reason {
780 value: node.clone(),
781 path: path_ctx.map(str::to_string),
782 shape: id,
783 message: format!("SPARQL constraint evaluation failed: {error}"),
784 sub_reasons: Vec::new(),
785 }],
786 }
787 }
788 Shape::TestConst(_)
789 | Shape::TestType(_)
790 | Shape::TestKind(_)
791 | Shape::Eq(..)
792 | Shape::Disj(..)
793 | Shape::Lt(..)
794 | Shape::Le(..)
795 | Shape::UniqueLang(_) => leaf(
796 evaluator.holds(node, id),
797 node,
798 id,
799 path_ctx,
800 format!("{} not satisfied", shape_to_string(evaluator.arena, id)),
801 ),
802 Shape::Closed(q) => {
803 let bad = closed_offenders(evaluator.g, node, &q);
804 if bad.is_empty() {
805 Vec::new()
806 } else {
807 let preds: Vec<String> = bad.iter().map(|p| p.to_string()).collect();
808 vec![Reason {
809 value: node.clone(),
810 path: path_ctx.map(str::to_string),
811 shape: id,
812 message: format!("closed: unexpected predicate(s) {}", preds.join(", ")),
813 sub_reasons: Vec::new(),
814 }]
815 }
816 }
817 Shape::Not(c) => {
818 if explain(evaluator, node, c, path_ctx, stack).is_empty() {
819 vec![Reason {
820 value: node.clone(),
821 path: path_ctx.map(str::to_string),
822 shape: id,
823 message: "negated shape unexpectedly held".to_string(),
824 sub_reasons: Vec::new(),
825 }]
826 } else {
827 Vec::new()
828 }
829 }
830 Shape::And(cs) => cs
831 .iter()
832 .flat_map(|c| explain(evaluator, node, *c, path_ctx, stack))
833 .collect(),
834 Shape::Or(cs) => {
835 let mut sub_reasons = Vec::new();
836 let mut satisfied = false;
837 for c in &cs {
838 let sub = explain(evaluator, node, *c, path_ctx, stack);
839 if sub.is_empty() {
840 satisfied = true;
841 break;
842 }
843 sub_reasons.extend(sub);
844 }
845 if satisfied {
846 Vec::new()
847 } else {
848 vec![Reason {
849 value: node.clone(),
850 path: path_ctx.map(str::to_string),
851 shape: id,
852 message: format!("none of {} alternative(s) satisfied", cs.len()),
853 sub_reasons,
854 }]
855 }
856 }
857 Shape::Count {
858 path,
859 min,
860 max,
861 qualifier,
862 } => explain_count(evaluator, node, id, &path, min, max, qualifier, stack),
863 };
864 stack.remove(&key);
865 reasons
866}
867
868#[allow(clippy::too_many_arguments)]
869fn explain_count(
870 evaluator: &mut ShapeEvaluator<'_>,
871 node: &Term,
872 id: ShapeId,
873 path: &Path,
874 min: Option<u64>,
875 max: Option<u64>,
876 qualifier: ShapeId,
877 stack: &mut HashSet<(ShapeId, Term)>,
878) -> Vec<Reason> {
879 let path_str = path_to_string(path);
880 let matched: Vec<Term> = succ(evaluator.g, node, path)
881 .into_iter()
882 .filter(|u| evaluator.holds(u, qualifier))
883 .collect();
884 let n = matched.len() as u64;
885 let mut reasons = Vec::new();
886
887 if let Some(mx) = max
888 && n > mx
889 {
890 match evaluator.arena.get(qualifier).clone() {
891 Shape::Not(inner) if mx == 0 => {
893 for u in &matched {
894 reasons.extend(explain(evaluator, u, inner, Some(&path_str), stack));
895 }
896 }
897 _ => reasons.push(Reason {
898 value: node.clone(),
899 path: Some(path_str.clone()),
900 shape: id,
901 message: format!("at most {mx} value(s) may match along {path_str}, found {n}"),
902 sub_reasons: Vec::new(),
903 }),
904 }
905 }
906
907 if let Some(mn) = min
908 && n < mn
909 {
910 reasons.push(Reason {
911 value: node.clone(),
912 path: Some(path_str.clone()),
913 shape: id,
914 message: format!("at least {mn} value(s) required along {path_str}, found {n}"),
915 sub_reasons: Vec::new(),
916 });
917 }
918
919 reasons
920}
921
922fn sparql_violation_message(
927 violation: &SparqlViolation,
928 constraint: &SparqlConstraint,
929 node: &Term,
930) -> String {
931 if let Some(message) = &violation.message {
932 return term_text(message);
933 }
934 if !constraint.messages.is_empty() {
935 return constraint
936 .messages
937 .iter()
938 .map(|m| apply_message_template(&term_text(m), node, &violation.bindings))
939 .collect::<Vec<_>>()
940 .join("; ");
941 }
942 let mut message = match &constraint.shape {
943 Some(shape) => format!("SPARQL constraint at {shape} not satisfied"),
944 None => "SPARQL constraint not satisfied".to_string(),
945 };
946 if let Some(value) = &violation.value {
947 message.push_str(&format!(" (value: {value})"));
948 }
949 message
950}
951
952pub(crate) fn apply_message_template(
957 template: &str,
958 focus: &Term,
959 bindings: &HashMap<String, Term>,
960) -> String {
961 static RE: OnceLock<Regex> = OnceLock::new();
962 let re = RE
963 .get_or_init(|| Regex::new(r"\{(\$[A-Za-z_]\w*|\?[A-Za-z_]\w*)\}").expect("static regex"));
964 re.replace_all(template, |caps: ®ex::Captures| {
965 let placeholder = &caps[1];
966 let name = &placeholder[1..]; let term = if name == "this" {
968 Some(focus)
969 } else {
970 bindings.get(name)
971 };
972 term.map(|t| match t {
973 Term::NamedNode(n) => format!("<{}>", n.as_str()),
974 Term::BlankNode(b) => format!("_:{}", b.as_str()),
975 Term::Literal(l) => l.value().to_string(),
976 })
977 .unwrap_or_else(|| placeholder.to_string())
978 })
979 .to_string()
980}
981
982fn term_text(term: &Term) -> String {
985 match term {
986 Term::Literal(literal) => literal.value().to_string(),
987 other => other.to_string(),
988 }
989}
990
991fn leaf(
992 ok: bool,
993 node: &Term,
994 id: ShapeId,
995 path_ctx: Option<&str>,
996 message: String,
997) -> Vec<Reason> {
998 if ok {
999 Vec::new()
1000 } else {
1001 vec![Reason {
1002 value: node.clone(),
1003 path: path_ctx.map(str::to_string),
1004 shape: id,
1005 message,
1006 sub_reasons: Vec::new(),
1007 }]
1008 }
1009}
1010
1011fn all_pairs_ordered(
1012 g: &dyn PathBackend,
1013 v: &Term,
1014 path: &Path,
1015 p: &NamedNode,
1016 allow_eq: bool,
1017) -> bool {
1018 let lhs = succ(g, v, path);
1019 let rhs = objects(g, v, p);
1020 for a in &lhs {
1021 for b in &rhs {
1022 match compare_terms(a, b) {
1023 Some(Ordering::Less) => {}
1024 Some(Ordering::Equal) if allow_eq => {}
1025 _ => return false,
1026 }
1027 }
1028 }
1029 true
1030}
1031
1032fn objects(g: &dyn PathBackend, v: &Term, p: &NamedNode) -> HashSet<Term> {
1033 succ(g, v, &Path::Pred(p.clone()))
1034}
1035
1036fn closed_offenders(
1038 g: &dyn PathBackend,
1039 node: &Term,
1040 q: &BTreeSet<NamedNode>,
1041) -> BTreeSet<NamedNode> {
1042 g.out_predicates(node)
1043 .into_iter()
1044 .filter(|p| !q.contains(p))
1045 .collect()
1046}
1047
1048fn unique_lang(values: &HashSet<Term>) -> bool {
1049 let mut seen = HashSet::new();
1050 for term in values {
1051 if let Term::Literal(l) = term
1052 && let Some(lang) = l.language()
1053 && !seen.insert(lang.to_ascii_lowercase())
1054 {
1055 return false;
1056 }
1057 }
1058 true
1059}
1060
1061fn dedup_reasons(reasons: &mut Vec<Reason>) {
1062 let mut seen = HashSet::new();
1063 reasons.retain(|r| seen.insert((r.value.to_string(), r.message.clone())));
1064}
1065
1066fn subject_term(s: oxrdf::NamedOrBlankNodeRef) -> Term {
1067 crate::path::term_of(s.into_owned())
1068}
1069
1070fn subjects_of(data: &Graph, p: &NamedNode) -> Vec<Term> {
1072 let mut seen = HashSet::new();
1073 data.triples_for_predicate(p.as_ref())
1074 .filter_map(|t| {
1075 let term = subject_term(t.subject);
1076 seen.insert(term.clone()).then_some(term)
1077 })
1078 .collect()
1079}
1080
1081fn objects_of(data: &Graph, p: &NamedNode) -> Vec<Term> {
1083 let mut seen = HashSet::new();
1084 data.triples_for_predicate(p.as_ref())
1085 .filter_map(|t| {
1086 let term = t.object.into_owned();
1087 seen.insert(term.clone()).then_some(term)
1088 })
1089 .collect()
1090}
1091
1092fn all_nodes(g: &Graph) -> HashSet<Term> {
1094 let mut nodes = HashSet::new();
1095 for t in g.iter() {
1096 nodes.insert(subject_term(t.subject));
1097 nodes.insert(t.object.into_owned());
1098 }
1099 nodes
1100}
1101
1102fn graph_contains_term(g: &Graph, term: &Term) -> bool {
1104 node_of(term).is_some_and(|node| g.triples_for_subject(&node).next().is_some())
1105 || g.triples_for_object(term).next().is_some()
1106}
1107
1108#[cfg(test)]
1109mod tests {
1110 use super::*;
1111 use oxrdf::{NamedNode, Triple};
1112
1113 fn iri(local: &str) -> NamedNode {
1114 NamedNode::new(format!("http://ex/{local}")).unwrap()
1115 }
1116
1117 fn term(local: &str) -> Term {
1118 Term::NamedNode(iri(local))
1119 }
1120
1121 #[test]
1122 fn memoizes_shared_value_checks_across_focus_nodes() {
1123 let p = iri("p");
1124 let shared = term("shared");
1125 let mut graph = Graph::new();
1126 graph.insert(&Triple::new(iri("a"), p.clone(), shared.clone()));
1127 graph.insert(&Triple::new(iri("b"), p.clone(), shared.clone()));
1128
1129 let mut arena = ShapeArena::new();
1130 let qualifier = arena.insert(Shape::TestConst(shared));
1131 let root = arena.insert(Shape::Count {
1132 path: Path::Pred(p),
1133 min: Some(1),
1134 max: None,
1135 qualifier,
1136 });
1137 let sparql = SparqlExecutor::new(&graph).unwrap();
1138 crate::profile::enable();
1139 {
1140 let mut evaluator = ShapeEvaluator::new(&graph, &arena, &sparql);
1141 assert!(evaluator.holds(&term("a"), root));
1142 assert!(evaluator.holds(&term("b"), root));
1143 }
1144 let profile = crate::profile::take().unwrap();
1145 let cache = profile.shape_cache();
1146 assert_eq!(cache.evaluators, 1);
1147 assert!(cache.hits >= 1, "shared qualifier should hit the cache");
1148 assert_eq!(cache.peak_entries, 3);
1149 assert!(cache.estimated_peak_bytes > 0);
1150 }
1151
1152 #[test]
1153 fn does_not_cache_cycle_dependent_results() {
1154 let mut arena = ShapeArena::new();
1158 let a = arena.reserve();
1159 let b = arena.reserve();
1160 let bottom = arena.insert(Shape::Or(Vec::new()));
1161 arena.set(a, Shape::And(vec![b, bottom]));
1162 arena.set(b, Shape::And(vec![a]));
1163
1164 let graph = Graph::new();
1165 let sparql = SparqlExecutor::new(&graph).unwrap();
1166 let node = term("x");
1167
1168 crate::profile::enable();
1169 {
1170 let mut evaluator = ShapeEvaluator::new(&graph, &arena, &sparql);
1171 assert!(!evaluator.holds(&node, a));
1172 assert!(!evaluator.holds(&node, b));
1173 }
1174 let profile = crate::profile::take().unwrap();
1175 let cache = profile.shape_cache();
1176 assert!(cache.recursion_back_edges > 0);
1177 assert!(cache.non_cacheable_results > 0);
1178 }
1179}