use crate::frozen::FrozenIndexedDataset;
use crate::path::{PathBackend, node_of, pred, succ};
use crate::profile::ShapeCacheSample;
use crate::sparql::{SparqlExecutor, SparqlViolation};
use crate::value::{compare_terms, value_type_holds};
use oxrdf::{Graph, NamedNode, Term};
use regex::Regex;
use serde::{Deserialize, Serialize};
use shifty_algebra::render::{path_to_string, shape_to_string};
use shifty_algebra::{Path, Schema, Selector, Shape, ShapeArena, ShapeId, SparqlConstraint};
use shifty_opt::{FocusSource, PhysicalPlan, analyze};
use std::cmp::Ordering;
use std::collections::{BTreeSet, HashMap, HashSet};
use std::fmt;
use std::sync::OnceLock;
#[derive(Debug, Clone, Copy)]
struct EvalResult {
holds: bool,
cacheable: bool,
}
#[derive(Default)]
struct EvalState {
memo: HashMap<(ShapeId, Term), bool>,
active: HashSet<(ShapeId, Term)>,
telemetry: Option<ShapeCacheSample>,
}
pub(crate) struct ShapeEvaluator<'a> {
g: &'a dyn PathBackend,
arena: &'a ShapeArena,
sparql: &'a SparqlExecutor,
state: EvalState,
}
impl<'a> ShapeEvaluator<'a> {
pub(crate) fn new(
g: &'a dyn PathBackend,
arena: &'a ShapeArena,
sparql: &'a SparqlExecutor,
) -> Self {
Self {
g,
arena,
sparql,
state: EvalState {
telemetry: crate::profile::is_enabled().then(ShapeCacheSample::default),
..EvalState::default()
},
}
}
pub(crate) fn holds(&mut self, node: &Term, id: ShapeId) -> bool {
holds_memoized(self.g, self.arena, node, id, self.sparql, &mut self.state).holds
}
pub(crate) fn sparql(&self) -> &SparqlExecutor {
self.sparql
}
}
impl Drop for ShapeEvaluator<'_> {
fn drop(&mut self) {
let Some(mut sample) = self.state.telemetry else {
return;
};
sample.entries = self.state.memo.len();
sample.estimated_bytes = estimated_memo_bytes(&self.state.memo);
crate::profile::record_shape_cache(sample);
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Reason {
pub value: Term,
pub path: Option<String>,
pub shape: ShapeId,
pub message: String,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub sub_reasons: Vec<Reason>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Violation {
pub focus: Term,
pub statement: usize,
pub reasons: Vec<Reason>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ValidationOutcome {
pub conforms: bool,
pub violations: Vec<Violation>,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum ValidationGraphMode {
Data,
#[default]
Union,
UnionAll,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NonStratifiable {
pub components: Vec<Vec<ShapeId>>,
}
impl fmt::Display for NonStratifiable {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "non-stratifiable schema (recursion through negation): ")?;
for (i, c) in self.components.iter().enumerate() {
if i > 0 {
write!(f, "; ")?;
}
let ids: Vec<String> = c.iter().map(|s| format!("@{}", s.0)).collect();
write!(f, "{{{}}}", ids.join(" "))?;
}
Ok(())
}
}
impl std::error::Error for NonStratifiable {}
pub fn validate(data: &Graph, schema: &Schema) -> Result<ValidationOutcome, NonStratifiable> {
validate_with_context(data, data, schema)
}
pub fn validate_graphs(
data: &Graph,
shapes: &Graph,
schema: &Schema,
) -> Result<ValidationOutcome, NonStratifiable> {
validate_graphs_with_mode(data, shapes, schema, ValidationGraphMode::default())
}
pub fn validate_graphs_with_mode(
data: &Graph,
shapes: &Graph,
schema: &Schema,
mode: ValidationGraphMode,
) -> Result<ValidationOutcome, NonStratifiable> {
match mode {
ValidationGraphMode::Data => {
let uses_shapes = uses_shapes_graph(&schema.arena);
let frozen = if uses_shapes {
FrozenIndexedDataset::from_graphs(data, shapes)
} else {
FrozenIndexedDataset::from_graph(data)
};
validate_with_frozen(data, schema, frozen, uses_shapes)
}
ValidationGraphMode::Union => {
let uses_shapes = uses_shapes_graph(&schema.arena);
let frozen = if uses_shapes {
FrozenIndexedDataset::from_graph_union_with_shapes(data, shapes)
} else {
FrozenIndexedDataset::from_graph_union(data, shapes)
};
validate_with_frozen(data, schema, frozen, uses_shapes)
}
ValidationGraphMode::UnionAll => {
let union = graph_union(data, shapes);
validate_with_context(&union, &union, schema)
}
}
}
pub fn validate_with_context(
data: &Graph,
context: &Graph,
schema: &Schema,
) -> Result<ValidationOutcome, NonStratifiable> {
let uses_shapes = uses_shapes_graph(&schema.arena);
let frozen = if uses_shapes {
FrozenIndexedDataset::from_graphs(context, context)
} else {
FrozenIndexedDataset::from_graph(context)
};
validate_with_frozen(data, schema, frozen, uses_shapes)
}
fn validate_with_frozen(
data: &Graph,
schema: &Schema,
frozen: FrozenIndexedDataset,
has_shapes_graph: bool,
) -> Result<ValidationOutcome, NonStratifiable> {
let strat = analyze(&schema.arena);
if !strat.stratifiable {
let components = strat
.strata
.iter()
.filter(|s| !s.stratifiable)
.map(|s| s.shapes.clone())
.collect();
return Err(NonStratifiable { components });
}
let sparql = SparqlExecutor::from_frozen(frozen, has_shapes_graph);
let backend = sparql
.frozen()
.expect("validation executor always has a frozen dataset");
let mut evaluator = ShapeEvaluator::new(backend, &schema.arena, &sparql);
let mut violations = Vec::new();
for (i, st) in schema.statements.iter().enumerate() {
let label = schema
.names
.get(&st.shape)
.cloned()
.unwrap_or_else(|| format!("@{}", st.shape.0));
let foci = focus_nodes_with_evaluator(data, &st.selector, &mut evaluator);
prefetch_sparql_constraints(&schema.arena, st.shape, &foci, &sparql);
for v in foci {
let t = std::time::Instant::now();
let mut stack = HashSet::new();
let mut reasons = explain(&mut evaluator, &v, st.shape, None, &mut stack);
crate::profile::record_shape(&label, t.elapsed().as_micros() as u64);
dedup_reasons(&mut reasons);
if !reasons.is_empty() {
violations.push(Violation {
focus: v,
statement: i,
reasons,
});
}
}
}
Ok(ValidationOutcome {
conforms: violations.is_empty(),
violations,
})
}
fn uses_shapes_graph(arena: &ShapeArena) -> bool {
(0..arena.len()).any(|i| {
matches!(arena.get(ShapeId(i as u32)), Shape::Sparql(c) if c.query.contains("shapesGraph"))
})
}
pub(crate) fn graph_union(left: &Graph, right: &Graph) -> Graph {
let mut union = left.clone();
for triple in right.iter() {
union.insert(triple);
}
union
}
pub fn validate_plan(
data: &Graph,
plan: &PhysicalPlan,
) -> Result<ValidationOutcome, NonStratifiable> {
validate_plan_with_context(data, data, plan)
}
pub fn validate_plan_graphs(
data: &Graph,
shapes: &Graph,
plan: &PhysicalPlan,
) -> Result<ValidationOutcome, NonStratifiable> {
validate_plan_graphs_with_mode(data, shapes, plan, ValidationGraphMode::default())
}
pub fn validate_plan_graphs_with_mode(
data: &Graph,
shapes: &Graph,
plan: &PhysicalPlan,
mode: ValidationGraphMode,
) -> Result<ValidationOutcome, NonStratifiable> {
match mode {
ValidationGraphMode::Data => {
let uses_shapes = uses_shapes_graph(&plan.arena);
let frozen = if uses_shapes {
FrozenIndexedDataset::from_graphs(data, shapes)
} else {
FrozenIndexedDataset::from_graph(data)
};
validate_plan_with_frozen(data, plan, frozen, uses_shapes)
}
ValidationGraphMode::Union => {
let uses_shapes = uses_shapes_graph(&plan.arena);
let frozen = if uses_shapes {
FrozenIndexedDataset::from_graph_union_with_shapes(data, shapes)
} else {
FrozenIndexedDataset::from_graph_union(data, shapes)
};
validate_plan_with_frozen(data, plan, frozen, uses_shapes)
}
ValidationGraphMode::UnionAll => {
let union = graph_union(data, shapes);
validate_plan_with_context(&union, &union, plan)
}
}
}
pub fn validate_plan_with_context(
data: &Graph,
context: &Graph,
plan: &PhysicalPlan,
) -> Result<ValidationOutcome, NonStratifiable> {
let uses_shapes = uses_shapes_graph(&plan.arena);
let frozen = if uses_shapes {
FrozenIndexedDataset::from_graphs(context, context)
} else {
FrozenIndexedDataset::from_graph(context)
};
validate_plan_with_frozen(data, plan, frozen, uses_shapes)
}
fn validate_plan_with_frozen(
data: &Graph,
plan: &PhysicalPlan,
frozen: FrozenIndexedDataset,
has_shapes_graph: bool,
) -> Result<ValidationOutcome, NonStratifiable> {
let strat = analyze(&plan.arena);
if !strat.stratifiable {
let components = strat
.strata
.iter()
.filter(|s| !s.stratifiable)
.map(|s| s.shapes.clone())
.collect();
return Err(NonStratifiable { components });
}
let sparql = SparqlExecutor::from_frozen(frozen, has_shapes_graph);
let backend = sparql
.frozen()
.expect("validation executor always has a frozen dataset");
let mut evaluator = ShapeEvaluator::new(backend, &plan.arena, &sparql);
let mut violations = Vec::new();
for (i, sp) in plan.statements.iter().enumerate() {
let label = plan
.names
.get(&sp.shape)
.cloned()
.unwrap_or_else(|| format!("@{}", sp.shape.0));
let foci = focus_for_source(data, &sp.source, &mut evaluator);
prefetch_sparql_constraints(&plan.arena, sp.shape, &foci, &sparql);
for v in foci {
let t = std::time::Instant::now();
let mut stack = HashSet::new();
let mut reasons = explain(&mut evaluator, &v, sp.shape, None, &mut stack);
crate::profile::record_shape(&label, t.elapsed().as_micros() as u64);
dedup_reasons(&mut reasons);
if !reasons.is_empty() {
violations.push(Violation {
focus: v,
statement: i,
reasons,
});
}
}
}
Ok(ValidationOutcome {
conforms: violations.is_empty(),
violations,
})
}
fn focus_for_source(
data: &Graph,
source: &FocusSource,
evaluator: &mut ShapeEvaluator<'_>,
) -> Vec<Term> {
match source {
FocusSource::SubjectsOf(p) => subjects_of(data, p),
FocusSource::ObjectsOf(p) => objects_of(data, p),
FocusSource::Node(c) => vec![c.clone()],
FocusSource::PathToConst { path, target } => pred(evaluator.g, target, path)
.into_iter()
.filter(|node| graph_contains_term(data, node))
.collect(),
FocusSource::ScanFilter { path, qualifier } => all_nodes(data)
.into_iter()
.filter(|v| {
succ(evaluator.g, v, path)
.iter()
.any(|u| evaluator.holds(u, *qualifier))
})
.collect(),
FocusSource::Sparql(target) => {
let candidates = all_nodes(data);
evaluator
.sparql
.target_nodes(&target.query)
.unwrap_or_default()
.into_iter()
.filter(|node| candidates.contains(node))
.collect()
}
}
}
pub fn focus_nodes(data: &Graph, sel: &Selector, arena: &ShapeArena) -> Vec<Term> {
let sparql =
SparqlExecutor::new(data).expect("building an in-memory Oxigraph store should succeed");
let mut evaluator = ShapeEvaluator::new(data, arena, &sparql);
focus_nodes_with_evaluator(data, sel, &mut evaluator)
}
pub(crate) fn focus_nodes_with(
data: &Graph,
backend: &dyn PathBackend,
sel: &Selector,
arena: &ShapeArena,
sparql: &SparqlExecutor,
) -> Vec<Term> {
let mut evaluator = ShapeEvaluator::new(backend, arena, sparql);
focus_nodes_with_evaluator(data, sel, &mut evaluator)
}
fn focus_nodes_with_evaluator(
data: &Graph,
sel: &Selector,
evaluator: &mut ShapeEvaluator<'_>,
) -> Vec<Term> {
match sel {
Selector::HasOut(q) => subjects_of(data, q),
Selector::HasIn(q) => objects_of(data, q),
Selector::IsConst(c) => vec![c.clone()],
Selector::HasPath(path, qual) => match evaluator.arena.get(*qual) {
Shape::TestConst(target) => pred(evaluator.g, target, path)
.into_iter()
.filter(|node| graph_contains_term(data, node))
.collect(),
_ => all_nodes(data)
.into_iter()
.filter(|v| {
succ(evaluator.g, v, path)
.iter()
.any(|u| evaluator.holds(u, *qual))
})
.collect(),
},
Selector::Sparql(target) => {
let candidates = all_nodes(data);
evaluator
.sparql
.target_nodes(&target.query)
.unwrap_or_default()
.into_iter()
.filter(|node| candidates.contains(node))
.collect()
}
}
}
fn holds_memoized(
g: &dyn PathBackend,
arena: &ShapeArena,
v: &Term,
id: ShapeId,
sparql: &SparqlExecutor,
state: &mut EvalState,
) -> EvalResult {
let key = (id, v.clone());
if let Some(&holds) = state.memo.get(&key) {
if let Some(telemetry) = state.telemetry.as_mut() {
telemetry.hits += 1;
}
return EvalResult {
holds,
cacheable: true,
};
}
if let Some(telemetry) = state.telemetry.as_mut() {
telemetry.misses += 1;
}
if !state.active.insert(key.clone()) {
if let Some(telemetry) = state.telemetry.as_mut() {
telemetry.recursion_back_edges += 1;
}
return EvalResult {
holds: true,
cacheable: false,
}; }
let result = match arena.get(id) {
Shape::Top | Shape::Pending => EvalResult {
holds: true,
cacheable: true,
},
Shape::Sparql(constraint) => EvalResult {
holds: sparql
.constraint_violations(constraint, v)
.is_ok_and(|violations| violations.is_empty()),
cacheable: true,
},
Shape::TestConst(c) => EvalResult {
holds: v == c,
cacheable: true,
},
Shape::TestType(t) => EvalResult {
holds: value_type_holds(t, v),
cacheable: true,
},
Shape::TestKind(k) => EvalResult {
holds: k.matches(v),
cacheable: true,
},
Shape::Closed(q) => EvalResult {
holds: closed_offenders(g, v, q).is_empty(),
cacheable: true,
},
Shape::Eq(path, p) => EvalResult {
holds: succ(g, v, path) == objects(g, v, p),
cacheable: true,
},
Shape::Disj(path, p) => EvalResult {
holds: succ(g, v, path).is_disjoint(&objects(g, v, p)),
cacheable: true,
},
Shape::Lt(path, p) => EvalResult {
holds: all_pairs_ordered(g, v, path, p, false),
cacheable: true,
},
Shape::Le(path, p) => EvalResult {
holds: all_pairs_ordered(g, v, path, p, true),
cacheable: true,
},
Shape::UniqueLang(path) => EvalResult {
holds: unique_lang(&succ(g, v, path)),
cacheable: true,
},
Shape::Not(c) => {
let child = holds_memoized(g, arena, v, *c, sparql, state);
EvalResult {
holds: !child.holds,
cacheable: child.cacheable,
}
}
Shape::And(cs) => {
let mut result = EvalResult {
holds: true,
cacheable: true,
};
for child in cs {
let child = holds_memoized(g, arena, v, *child, sparql, state);
result.cacheable &= child.cacheable;
if !child.holds {
result.holds = false;
break;
}
}
result
}
Shape::Or(cs) => {
let mut result = EvalResult {
holds: false,
cacheable: true,
};
for child in cs {
let child = holds_memoized(g, arena, v, *child, sparql, state);
result.cacheable &= child.cacheable;
if child.holds {
result.holds = true;
break;
}
}
result
}
Shape::Count {
path,
min,
max,
qualifier,
} => {
let mut n = 0;
let mut cacheable = true;
for value in succ(g, v, path) {
let qualified = holds_memoized(g, arena, &value, *qualifier, sparql, state);
cacheable &= qualified.cacheable;
n += u64::from(qualified.holds);
}
EvalResult {
holds: min.is_none_or(|m| n >= m) && max.is_none_or(|m| n <= m),
cacheable,
}
}
};
state.active.remove(&key);
if result.cacheable {
state.memo.insert(key, result.holds);
if let Some(telemetry) = state.telemetry.as_mut() {
telemetry.insertions += 1;
}
} else if let Some(telemetry) = state.telemetry.as_mut() {
telemetry.non_cacheable_results += 1;
}
result
}
fn estimated_memo_bytes(memo: &HashMap<(ShapeId, Term), bool>) -> usize {
const CONTROL_BYTE_ESTIMATE: usize = 1;
let bucket_bytes =
memo.capacity() * (std::mem::size_of::<((ShapeId, Term), bool)>() + CONTROL_BYTE_ESTIMATE);
bucket_bytes
+ memo
.keys()
.map(|(_, term)| estimated_term_heap_bytes(term))
.sum::<usize>()
}
fn estimated_term_heap_bytes(term: &Term) -> usize {
match term {
Term::NamedNode(node) => node.as_str().len(),
Term::BlankNode(node) => node.as_str().len(),
Term::Literal(literal) => {
literal.value().len()
+ literal.language().map_or_else(
|| {
let datatype = literal.datatype();
if datatype.as_str() == "http://www.w3.org/2001/XMLSchema#string" {
0
} else {
datatype.as_str().len()
}
},
str::len,
)
}
}
}
fn prefetch_sparql_constraints(
arena: &ShapeArena,
root: ShapeId,
foci: &[Term],
sparql: &SparqlExecutor,
) {
if foci.len() < 2 {
return;
}
let mut constraints = Vec::new();
let mut seen = HashSet::new();
collect_focus_sparql(arena, root, &mut seen, &mut constraints);
for constraint in constraints {
let _ = sparql.prefetch_constraint(constraint, foci);
}
}
fn collect_focus_sparql<'a>(
arena: &'a ShapeArena,
id: ShapeId,
seen: &mut HashSet<ShapeId>,
out: &mut Vec<&'a SparqlConstraint>,
) {
if !seen.insert(id) {
return; }
match arena.get(id) {
Shape::Sparql(constraint) => out.push(constraint),
Shape::Not(inner) => collect_focus_sparql(arena, *inner, seen, out),
Shape::And(ids) | Shape::Or(ids) => {
for &child in ids {
collect_focus_sparql(arena, child, seen, out);
}
}
_ => {}
}
}
fn explain(
evaluator: &mut ShapeEvaluator<'_>,
node: &Term,
id: ShapeId,
path_ctx: Option<&str>,
stack: &mut HashSet<(ShapeId, Term)>,
) -> Vec<Reason> {
let key = (id, node.clone());
if !stack.insert(key.clone()) {
return Vec::new(); }
if evaluator.holds(node, id) {
stack.remove(&key);
return Vec::new();
}
let reasons = match evaluator.arena.get(id).clone() {
Shape::Top | Shape::Pending => Vec::new(),
Shape::Sparql(constraint) => {
match evaluator.sparql.constraint_violations(&constraint, node) {
Ok(violations) => violations
.into_iter()
.map(|violation| {
let message = sparql_violation_message(&violation, &constraint, node);
Reason {
value: violation.value.unwrap_or_else(|| node.clone()),
path: violation
.path
.map(|path| path.to_string())
.or_else(|| path_ctx.map(str::to_string))
.or_else(|| constraint.path.as_ref().map(path_to_string)),
message,
shape: id,
sub_reasons: Vec::new(),
}
})
.collect(),
Err(error) => vec![Reason {
value: node.clone(),
path: path_ctx.map(str::to_string),
shape: id,
message: format!("SPARQL constraint evaluation failed: {error}"),
sub_reasons: Vec::new(),
}],
}
}
Shape::TestConst(_)
| Shape::TestType(_)
| Shape::TestKind(_)
| Shape::Eq(..)
| Shape::Disj(..)
| Shape::Lt(..)
| Shape::Le(..)
| Shape::UniqueLang(_) => leaf(
evaluator.holds(node, id),
node,
id,
path_ctx,
format!("{} not satisfied", shape_to_string(evaluator.arena, id)),
),
Shape::Closed(q) => {
let bad = closed_offenders(evaluator.g, node, &q);
if bad.is_empty() {
Vec::new()
} else {
let preds: Vec<String> = bad.iter().map(|p| p.to_string()).collect();
vec![Reason {
value: node.clone(),
path: path_ctx.map(str::to_string),
shape: id,
message: format!("closed: unexpected predicate(s) {}", preds.join(", ")),
sub_reasons: Vec::new(),
}]
}
}
Shape::Not(c) => {
if explain(evaluator, node, c, path_ctx, stack).is_empty() {
vec![Reason {
value: node.clone(),
path: path_ctx.map(str::to_string),
shape: id,
message: "negated shape unexpectedly held".to_string(),
sub_reasons: Vec::new(),
}]
} else {
Vec::new()
}
}
Shape::And(cs) => cs
.iter()
.flat_map(|c| explain(evaluator, node, *c, path_ctx, stack))
.collect(),
Shape::Or(cs) => {
let mut sub_reasons = Vec::new();
let mut satisfied = false;
for c in &cs {
let sub = explain(evaluator, node, *c, path_ctx, stack);
if sub.is_empty() {
satisfied = true;
break;
}
sub_reasons.extend(sub);
}
if satisfied {
Vec::new()
} else {
vec![Reason {
value: node.clone(),
path: path_ctx.map(str::to_string),
shape: id,
message: format!("none of {} alternative(s) satisfied", cs.len()),
sub_reasons,
}]
}
}
Shape::Count {
path,
min,
max,
qualifier,
} => explain_count(evaluator, node, id, &path, min, max, qualifier, stack),
};
stack.remove(&key);
reasons
}
#[allow(clippy::too_many_arguments)]
fn explain_count(
evaluator: &mut ShapeEvaluator<'_>,
node: &Term,
id: ShapeId,
path: &Path,
min: Option<u64>,
max: Option<u64>,
qualifier: ShapeId,
stack: &mut HashSet<(ShapeId, Term)>,
) -> Vec<Reason> {
let path_str = path_to_string(path);
let matched: Vec<Term> = succ(evaluator.g, node, path)
.into_iter()
.filter(|u| evaluator.holds(u, qualifier))
.collect();
let n = matched.len() as u64;
let mut reasons = Vec::new();
if let Some(mx) = max
&& n > mx
{
match evaluator.arena.get(qualifier).clone() {
Shape::Not(inner) if mx == 0 => {
for u in &matched {
reasons.extend(explain(evaluator, u, inner, Some(&path_str), stack));
}
}
_ => reasons.push(Reason {
value: node.clone(),
path: Some(path_str.clone()),
shape: id,
message: format!("at most {mx} value(s) may match along {path_str}, found {n}"),
sub_reasons: Vec::new(),
}),
}
}
if let Some(mn) = min
&& n < mn
{
reasons.push(Reason {
value: node.clone(),
path: Some(path_str.clone()),
shape: id,
message: format!("at least {mn} value(s) required along {path_str}, found {n}"),
sub_reasons: Vec::new(),
});
}
reasons
}
fn sparql_violation_message(
violation: &SparqlViolation,
constraint: &SparqlConstraint,
node: &Term,
) -> String {
if let Some(message) = &violation.message {
return term_text(message);
}
if !constraint.messages.is_empty() {
return constraint
.messages
.iter()
.map(|m| apply_message_template(&term_text(m), node, &violation.bindings))
.collect::<Vec<_>>()
.join("; ");
}
let mut message = match &constraint.shape {
Some(shape) => format!("SPARQL constraint at {shape} not satisfied"),
None => "SPARQL constraint not satisfied".to_string(),
};
if let Some(value) = &violation.value {
message.push_str(&format!(" (value: {value})"));
}
message
}
pub(crate) fn apply_message_template(
template: &str,
focus: &Term,
bindings: &HashMap<String, Term>,
) -> String {
static RE: OnceLock<Regex> = OnceLock::new();
let re = RE
.get_or_init(|| Regex::new(r"\{(\$[A-Za-z_]\w*|\?[A-Za-z_]\w*)\}").expect("static regex"));
re.replace_all(template, |caps: ®ex::Captures| {
let placeholder = &caps[1];
let name = &placeholder[1..]; let term = if name == "this" {
Some(focus)
} else {
bindings.get(name)
};
term.map(|t| match t {
Term::NamedNode(n) => format!("<{}>", n.as_str()),
Term::BlankNode(b) => format!("_:{}", b.as_str()),
Term::Literal(l) => l.value().to_string(),
})
.unwrap_or_else(|| placeholder.to_string())
})
.to_string()
}
fn term_text(term: &Term) -> String {
match term {
Term::Literal(literal) => literal.value().to_string(),
other => other.to_string(),
}
}
fn leaf(
ok: bool,
node: &Term,
id: ShapeId,
path_ctx: Option<&str>,
message: String,
) -> Vec<Reason> {
if ok {
Vec::new()
} else {
vec![Reason {
value: node.clone(),
path: path_ctx.map(str::to_string),
shape: id,
message,
sub_reasons: Vec::new(),
}]
}
}
fn all_pairs_ordered(
g: &dyn PathBackend,
v: &Term,
path: &Path,
p: &NamedNode,
allow_eq: bool,
) -> bool {
let lhs = succ(g, v, path);
let rhs = objects(g, v, p);
for a in &lhs {
for b in &rhs {
match compare_terms(a, b) {
Some(Ordering::Less) => {}
Some(Ordering::Equal) if allow_eq => {}
_ => return false,
}
}
}
true
}
fn objects(g: &dyn PathBackend, v: &Term, p: &NamedNode) -> HashSet<Term> {
succ(g, v, &Path::Pred(p.clone()))
}
fn closed_offenders(
g: &dyn PathBackend,
node: &Term,
q: &BTreeSet<NamedNode>,
) -> BTreeSet<NamedNode> {
g.out_predicates(node)
.into_iter()
.filter(|p| !q.contains(p))
.collect()
}
fn unique_lang(values: &HashSet<Term>) -> bool {
let mut seen = HashSet::new();
for term in values {
if let Term::Literal(l) = term
&& let Some(lang) = l.language()
&& !seen.insert(lang.to_ascii_lowercase())
{
return false;
}
}
true
}
fn dedup_reasons(reasons: &mut Vec<Reason>) {
let mut seen = HashSet::new();
reasons.retain(|r| seen.insert((r.value.to_string(), r.message.clone())));
}
fn subject_term(s: oxrdf::NamedOrBlankNodeRef) -> Term {
crate::path::term_of(s.into_owned())
}
fn subjects_of(data: &Graph, p: &NamedNode) -> Vec<Term> {
let mut seen = HashSet::new();
data.triples_for_predicate(p.as_ref())
.filter_map(|t| {
let term = subject_term(t.subject);
seen.insert(term.clone()).then_some(term)
})
.collect()
}
fn objects_of(data: &Graph, p: &NamedNode) -> Vec<Term> {
let mut seen = HashSet::new();
data.triples_for_predicate(p.as_ref())
.filter_map(|t| {
let term = t.object.into_owned();
seen.insert(term.clone()).then_some(term)
})
.collect()
}
fn all_nodes(g: &Graph) -> HashSet<Term> {
let mut nodes = HashSet::new();
for t in g.iter() {
nodes.insert(subject_term(t.subject));
nodes.insert(t.object.into_owned());
}
nodes
}
fn graph_contains_term(g: &Graph, term: &Term) -> bool {
node_of(term).is_some_and(|node| g.triples_for_subject(&node).next().is_some())
|| g.triples_for_object(term).next().is_some()
}
#[cfg(test)]
mod tests {
use super::*;
use oxrdf::{NamedNode, Triple};
fn iri(local: &str) -> NamedNode {
NamedNode::new(format!("http://ex/{local}")).unwrap()
}
fn term(local: &str) -> Term {
Term::NamedNode(iri(local))
}
#[test]
fn memoizes_shared_value_checks_across_focus_nodes() {
let p = iri("p");
let shared = term("shared");
let mut graph = Graph::new();
graph.insert(&Triple::new(iri("a"), p.clone(), shared.clone()));
graph.insert(&Triple::new(iri("b"), p.clone(), shared.clone()));
let mut arena = ShapeArena::new();
let qualifier = arena.insert(Shape::TestConst(shared));
let root = arena.insert(Shape::Count {
path: Path::Pred(p),
min: Some(1),
max: None,
qualifier,
});
let sparql = SparqlExecutor::new(&graph).unwrap();
crate::profile::enable();
{
let mut evaluator = ShapeEvaluator::new(&graph, &arena, &sparql);
assert!(evaluator.holds(&term("a"), root));
assert!(evaluator.holds(&term("b"), root));
}
let profile = crate::profile::take().unwrap();
let cache = profile.shape_cache();
assert_eq!(cache.evaluators, 1);
assert!(cache.hits >= 1, "shared qualifier should hit the cache");
assert_eq!(cache.peak_entries, 3);
assert!(cache.estimated_peak_bytes > 0);
}
#[test]
fn does_not_cache_cycle_dependent_results() {
let mut arena = ShapeArena::new();
let a = arena.reserve();
let b = arena.reserve();
let bottom = arena.insert(Shape::Or(Vec::new()));
arena.set(a, Shape::And(vec![b, bottom]));
arena.set(b, Shape::And(vec![a]));
let graph = Graph::new();
let sparql = SparqlExecutor::new(&graph).unwrap();
let node = term("x");
crate::profile::enable();
{
let mut evaluator = ShapeEvaluator::new(&graph, &arena, &sparql);
assert!(!evaluator.holds(&node, a));
assert!(!evaluator.holds(&node, b));
}
let profile = crate::profile::take().unwrap();
let cache = profile.shape_cache();
assert!(cache.recursion_back_edges > 0);
assert!(cache.non_cacheable_results > 0);
}
}