Skip to main content

shifty_parse/
lower.rs

1//! Lower a loaded shapes graph into the formalism [`Schema`].
2//!
3//! Every SHACL Core construct collapses into the small IR, applying the sugar
4//! rules from the gap analysis (`class → path`, `minCount/maxCount → Count`,
5//! per-value constraints wrapped in `∀π = ∃≤0 π.¬φ`, `xone → ∧∨¬`, …). Each
6//! shape lowers to a **focus-node predicate** `φ`, so `sh:property`/`sh:node`
7//! compose by conjunction. Unsupported AF constructs emit diagnostics.
8
9use crate::diagnostics::{DiagLevel, Diagnostic};
10use crate::graph::{Loaded, term_to_node};
11use crate::path::parse_path;
12use crate::vocab;
13use oxrdf::{Literal, NamedNode, NamedOrBlankNode, Term};
14use shifty_algebra::{
15    Bound, NodeExpr, NodeKindSet, Path, Rule, RuleHead, Schema, Selector, Severity, Shape,
16    ShapeArena, ShapeId, SparqlConstraint, SparqlConstruct, SparqlQueryKind, SparqlTarget,
17    Statement, ValueType,
18};
19use spargebra::{Query, SparqlParser};
20use std::collections::{BTreeSet, HashMap, HashSet};
21
22pub struct Lowered {
23    pub schema: Schema,
24    pub diagnostics: Vec<Diagnostic>,
25}
26
27/// Lower a loaded graph into a schema plus diagnostics.
28pub fn lower(g: &Loaded) -> Lowered {
29    let mut l = Lowerer {
30        g,
31        arena: ShapeArena::new(),
32        cache: HashMap::new(),
33        statements: Vec::new(),
34        rules: Vec::new(),
35        diags: Vec::new(),
36    };
37    let shapes = l.discover_shapes();
38    for s in &shapes {
39        l.lower_shape(s);
40    }
41    for s in &shapes {
42        // selectors are shared by the shape's statements and its rules
43        let selectors = l.target_selectors(s);
44        if let Some(shape) = l.cache.get(s).copied() {
45            for sel in &selectors {
46                l.statements.push(Statement {
47                    selector: sel.clone(),
48                    shape,
49                });
50            }
51        }
52        l.parse_rules(s, &selectors);
53    }
54    let names = l
55        .cache
56        .iter()
57        .filter_map(|(node, id)| match node {
58            NamedOrBlankNode::NamedNode(n) => Some((*id, n.as_str().to_string())),
59            NamedOrBlankNode::BlankNode(_) => None,
60        })
61        .collect();
62    let schema = Schema {
63        arena: l.arena,
64        statements: l.statements,
65        rules: l.rules,
66        names,
67    };
68    schema.arena.debug_assert_finalized();
69    Lowered {
70        schema,
71        diagnostics: l.diags,
72    }
73}
74
75struct Lowerer<'a> {
76    g: &'a Loaded,
77    arena: ShapeArena,
78    cache: HashMap<NamedOrBlankNode, ShapeId>,
79    statements: Vec<Statement>,
80    rules: Vec<Rule>,
81    diags: Vec<Diagnostic>,
82}
83
84impl Lowerer<'_> {
85    fn diag(&mut self, level: DiagLevel, msg: impl Into<String>, subj: &NamedOrBlankNode) {
86        self.diags
87            .push(Diagnostic::new(level, msg, Some(subj.to_string())));
88    }
89
90    /// Subjects that are declared shapes: typed NodeShape/PropertyShape, or
91    /// carrying `sh:path` or a target predicate. Referenced-only shapes are
92    /// pulled in on demand during lowering. Sorted for deterministic output.
93    fn discover_shapes(&self) -> Vec<NamedOrBlankNode> {
94        let mut found: HashSet<NamedOrBlankNode> = HashSet::new();
95        for triple in self.g.graph.iter() {
96            let p = triple.predicate;
97            let is_target = p == vocab::SH_TARGET_NODE
98                || p == vocab::SH_TARGET_CLASS
99                || p == vocab::SH_TARGET_SUBJECTS_OF
100                || p == vocab::SH_TARGET_OBJECTS_OF
101                || p == vocab::SH_TARGET;
102            if p == vocab::SH_PATH || p == vocab::SH_SPARQL || p == vocab::SH_RULE || is_target {
103                found.insert(triple.subject.into_owned());
104            }
105            if p == vocab::RDF_TYPE
106                && let Term::NamedNode(ty) = triple.object.into_owned()
107                && (ty.as_ref() == vocab::SH_NODE_SHAPE || ty.as_ref() == vocab::SH_PROPERTY_SHAPE)
108            {
109                found.insert(triple.subject.into_owned());
110            }
111        }
112        let mut shapes: Vec<NamedOrBlankNode> = found.into_iter().collect();
113        shapes.sort_by_key(|n| n.to_string());
114        shapes
115    }
116
117    fn lower_shape(&mut self, s: &NamedOrBlankNode) -> ShapeId {
118        if let Some(id) = self.cache.get(s) {
119            return *id;
120        }
121        let id = self.arena.reserve();
122        self.cache.insert(s.clone(), id);
123
124        if self.bool_prop(s, vocab::SH_DEACTIVATED) {
125            self.arena.set(id, Shape::Top);
126            return id;
127        }
128
129        let path = self.parse_shape_path(s);
130        let mut conjuncts: Vec<ShapeId> = Vec::new();
131
132        // Value-scoped constraints: each applies to every value node along the
133        // path (or to the focus node directly when there is no path).
134        let value = self.collect_value_constraints(s);
135        if !value.is_empty() {
136            let value_phi = self.arena.and(value);
137            match &path {
138                Some(p) => {
139                    // ∀π.φ  ≡  ∃≤0 π.¬φ
140                    let neg = self.arena.not(value_phi);
141                    let c = self.arena.count(p.clone(), None, Some(0), neg);
142                    conjuncts.push(c);
143                }
144                None => conjuncts.push(value_phi),
145            }
146        }
147
148        self.collect_path_constraints(s, path.as_ref(), &mut conjuncts);
149
150        if self.bool_prop(s, vocab::SH_CLOSED) {
151            let q = self.closed_allowed(s);
152            let c = self.arena.insert(Shape::Closed(q));
153            conjuncts.push(c);
154        }
155
156        for constraint_term in self.g.objects(s, vocab::SH_SPARQL) {
157            let Some(constraint_node) = term_to_node(&constraint_term) else {
158                self.diag(DiagLevel::Error, "sh:sparql must reference a resource", s);
159                continue;
160            };
161            let parsed = if let Some(Term::Literal(query)) =
162                self.g.object(&constraint_node, vocab::SH_SELECT)
163            {
164                self.canonical_sparql(&constraint_node, query.value(), ExpectedQuery::Select)
165                    .map(|query| (SparqlQueryKind::Select, query))
166            } else if let Some(Term::Literal(query)) =
167                self.g.object(&constraint_node, vocab::SH_ASK)
168            {
169                self.canonical_sparql(&constraint_node, query.value(), ExpectedQuery::Ask)
170                    .map(|query| (SparqlQueryKind::Ask, query))
171            } else {
172                self.diag(
173                    DiagLevel::Error,
174                    "sh:sparql constraint requires sh:select or sh:ask",
175                    &constraint_node,
176                );
177                None
178            };
179            if let Some((kind, query)) = parsed {
180                let shape = Some(match s {
181                    NamedOrBlankNode::NamedNode(n) => Term::NamedNode(n.clone()),
182                    NamedOrBlankNode::BlankNode(b) => Term::BlankNode(b.clone()),
183                });
184                // `sh:message` on the SPARQL constraint takes precedence; absent
185                // that, fall back to the owning shape's `sh:message` (SHACL §5.2.1).
186                let mut messages: Vec<Term> = self.g.objects(&constraint_node, vocab::SH_MESSAGE);
187                if messages.is_empty() {
188                    messages = self.g.objects(s, vocab::SH_MESSAGE);
189                }
190                let constraint = SparqlConstraint {
191                    kind,
192                    query,
193                    path: path.clone(),
194                    shape,
195                    messages,
196                };
197                conjuncts.push(self.arena.insert(Shape::Sparql(constraint)));
198            }
199        }
200
201        let body = if conjuncts.is_empty() {
202            self.arena.top()
203        } else if conjuncts.len() == 1 {
204            if conjuncts[0] == id {
205                self.arena.top()
206            } else {
207                conjuncts[0]
208            }
209        } else {
210            self.arena.insert(Shape::And(conjuncts))
211        };
212        self.arena.set(
213            id,
214            Shape::Annotated {
215                severity: self.severity(s),
216                shape: body,
217            },
218        );
219        id
220    }
221
222    fn severity(&self, shape: &NamedOrBlankNode) -> Severity {
223        match self.g.object(shape, vocab::SH_SEVERITY) {
224            Some(Term::NamedNode(value)) => Severity::from_named_node(value),
225            _ => Severity::Violation,
226        }
227    }
228
229    fn collect_value_constraints(&mut self, s: &NamedOrBlankNode) -> Vec<ShapeId> {
230        let mut value: Vec<ShapeId> = Vec::new();
231
232        // sh:class C  ≡  ∃≥1 (rdf:type/rdfs:subClassOf*) . test(C)
233        for c in self.g.objects(s, vocab::SH_CLASS) {
234            let tn = self.arena.insert(Shape::TestConst(c));
235            let cc = self.arena.count(class_path(), Some(1), None, tn);
236            value.push(cc);
237        }
238
239        // sh:datatype
240        for d in self.g.objects(s, vocab::SH_DATATYPE) {
241            if let Term::NamedNode(n) = d {
242                let id = self.arena.insert(Shape::TestType(ValueType::Datatype(n)));
243                value.push(id);
244            }
245        }
246
247        // sh:nodeKind
248        for k in self.g.objects(s, vocab::SH_NODE_KIND) {
249            if let Some(set) = map_node_kind(&k) {
250                let id = self.arena.insert(Shape::TestKind(set));
251                value.push(id);
252            } else {
253                self.diag(DiagLevel::Warning, "unrecognized sh:nodeKind value", s);
254            }
255        }
256
257        // numeric range (combine the four bounds into one facet)
258        let lo = self
259            .lit(s, vocab::SH_MIN_INCLUSIVE)
260            .map(|value| Bound {
261                value,
262                inclusive: true,
263            })
264            .or_else(|| {
265                self.lit(s, vocab::SH_MIN_EXCLUSIVE).map(|value| Bound {
266                    value,
267                    inclusive: false,
268                })
269            });
270        let hi = self
271            .lit(s, vocab::SH_MAX_INCLUSIVE)
272            .map(|value| Bound {
273                value,
274                inclusive: true,
275            })
276            .or_else(|| {
277                self.lit(s, vocab::SH_MAX_EXCLUSIVE).map(|value| Bound {
278                    value,
279                    inclusive: false,
280                })
281            });
282        if lo.is_some() || hi.is_some() {
283            let id = self
284                .arena
285                .insert(Shape::TestType(ValueType::NumericRange { lo, hi }));
286            value.push(id);
287        }
288
289        // length
290        let min_len = self.int(s, vocab::SH_MIN_LENGTH);
291        let max_len = self.int(s, vocab::SH_MAX_LENGTH);
292        if min_len.is_some() || max_len.is_some() {
293            let id = self.arena.insert(Shape::TestType(ValueType::Length {
294                min: min_len,
295                max: max_len,
296            }));
297            value.push(id);
298        }
299
300        // pattern (+ flags)
301        let flags = self
302            .lit(s, vocab::SH_FLAGS)
303            .map(|l| l.value().to_string())
304            .unwrap_or_default();
305        for pat in self.g.objects(s, vocab::SH_PATTERN) {
306            if let Term::Literal(l) = pat {
307                let id = self.arena.insert(Shape::TestType(ValueType::Pattern {
308                    regex: l.value().to_string(),
309                    flags: flags.clone(),
310                }));
311                value.push(id);
312            }
313        }
314
315        // sh:languageIn
316        for li in self.g.objects(s, vocab::SH_LANGUAGE_IN) {
317            let langs: Vec<String> = self
318                .g
319                .read_list(&li)
320                .into_iter()
321                .filter_map(|m| match m {
322                    Term::Literal(l) => Some(l.value().to_string()),
323                    _ => None,
324                })
325                .collect();
326            let id = self.arena.insert(Shape::TestType(ValueType::LangIn(langs)));
327            value.push(id);
328        }
329
330        // sh:in  ≡  ⋁ test(member)
331        for inl in self.g.objects(s, vocab::SH_IN) {
332            let alts: Vec<ShapeId> = self
333                .g
334                .read_list(&inl)
335                .into_iter()
336                .map(|m| self.arena.insert(Shape::TestConst(m)))
337                .collect();
338            let or = self.arena.or(alts);
339            value.push(or);
340        }
341
342        // sh:node — each value node must conform to the referenced shape
343        for n in self.g.objects(s, vocab::SH_NODE) {
344            if let Some(nn) = term_to_node(&n) {
345                let id = self.lower_shape(&nn);
346                value.push(id);
347            }
348        }
349
350        // sh:property — like sh:node, each *value node* must conform to the
351        // referenced property shape (so on a property shape it is scoped under
352        // ∀path, not applied to the focus node directly).
353        for prop in self.g.objects(s, vocab::SH_PROPERTY) {
354            if let Some(pn) = term_to_node(&prop) {
355                let id = self.lower_shape(&pn);
356                value.push(id);
357            }
358        }
359
360        // sh:not
361        for n in self.g.objects(s, vocab::SH_NOT) {
362            if let Some(nn) = term_to_node(&n) {
363                let id = self.lower_shape(&nn);
364                let neg = self.arena.not(id);
365                value.push(neg);
366            }
367        }
368
369        // sh:and / sh:or / sh:xone (each object is an rdf:list of shapes)
370        for l in self.g.objects(s, vocab::SH_AND) {
371            let ids = self.lower_shape_list(&l);
372            let a = self.arena.and(ids);
373            value.push(a);
374        }
375        for l in self.g.objects(s, vocab::SH_OR) {
376            let ids = self.lower_shape_list(&l);
377            let o = self.arena.or(ids);
378            value.push(o);
379        }
380        for l in self.g.objects(s, vocab::SH_XONE) {
381            let ids = self.lower_shape_list(&l);
382            let x = self.arena.xone(ids);
383            value.push(x);
384        }
385
386        value
387    }
388
389    /// Path-level constraints (cardinality, qualified counts, property pairs,
390    /// hasValue, uniqueLang). Most require a path; without one they are ignored
391    /// with a diagnostic, except `sh:hasValue` which applies to the focus node.
392    fn collect_path_constraints(
393        &mut self,
394        s: &NamedOrBlankNode,
395        path: Option<&Path>,
396        conjuncts: &mut Vec<ShapeId>,
397    ) {
398        let need_path = |me: &mut Self, what: &str| {
399            me.diag(DiagLevel::Warning, format!("{what} ignored: no sh:path"), s);
400        };
401
402        let min_count = self.int(s, vocab::SH_MIN_COUNT);
403        let max_count = self.int(s, vocab::SH_MAX_COUNT);
404        if min_count.is_some() || max_count.is_some() {
405            match path {
406                Some(p) => {
407                    let top = self.arena.top();
408                    let c = self.arena.count(p.clone(), min_count, max_count, top);
409                    conjuncts.push(c);
410                }
411                None => need_path(self, "sh:minCount/sh:maxCount"),
412            }
413        }
414
415        // sh:hasValue
416        for v in self.g.objects(s, vocab::SH_HAS_VALUE) {
417            match path {
418                Some(p) => {
419                    let tc = self.arena.insert(Shape::TestConst(v));
420                    let c = self.arena.count(p.clone(), Some(1), None, tc);
421                    conjuncts.push(c);
422                }
423                None => {
424                    let tc = self.arena.insert(Shape::TestConst(v));
425                    conjuncts.push(tc);
426                }
427            }
428        }
429
430        // sh:qualifiedValueShape + qualifiedMin/MaxCount
431        for q in self.g.objects(s, vocab::SH_QUALIFIED_VALUE_SHAPE) {
432            if let Some(qn) = term_to_node(&q) {
433                let qmin = self.int(s, vocab::SH_QUALIFIED_MIN_COUNT);
434                let qmax = self.int(s, vocab::SH_QUALIFIED_MAX_COUNT);
435                match path {
436                    Some(p) => {
437                        let mut qualifiers = vec![self.lower_shape(&qn)];
438                        if self.bool_prop(s, vocab::SH_QUALIFIED_VALUE_SHAPES_DISJOINT) {
439                            for sibling in self.sibling_qualified_shapes(s, &qn) {
440                                let sibling = self.lower_shape(&sibling);
441                                qualifiers.push(self.arena.not(sibling));
442                            }
443                        }
444                        let qualifier = self.arena.and(qualifiers);
445                        let c = self.arena.count(p.clone(), qmin, qmax, qualifier);
446                        conjuncts.push(c);
447                    }
448                    None => need_path(self, "sh:qualifiedValueShape"),
449                }
450            }
451        }
452
453        // property-pair constraints
454        let pairs = [
455            (vocab::SH_EQUALS, "equals"),
456            (vocab::SH_DISJOINT, "disjoint"),
457            (vocab::SH_LESS_THAN, "lessThan"),
458            (vocab::SH_LESS_THAN_OR_EQUALS, "lessThanOrEquals"),
459        ];
460        for (pred, name) in pairs {
461            for other in self.g.objects(s, pred) {
462                let Term::NamedNode(op) = other else { continue };
463                match path {
464                    Some(p) => {
465                        let shape = match name {
466                            "equals" => Shape::Eq(p.clone(), op),
467                            "disjoint" => Shape::Disj(p.clone(), op),
468                            "lessThan" => Shape::Lt(p.clone(), op),
469                            _ => Shape::Le(p.clone(), op),
470                        };
471                        let c = self.arena.insert(shape);
472                        conjuncts.push(c);
473                    }
474                    None if matches!(name, "equals" | "disjoint") => {
475                        let shape = if name == "equals" {
476                            Shape::Eq(Path::Id, op)
477                        } else {
478                            Shape::Disj(Path::Id, op)
479                        };
480                        let c = self.arena.insert(shape);
481                        conjuncts.push(c);
482                    }
483                    None => need_path(self, &format!("sh:{name}")),
484                }
485            }
486        }
487
488        // sh:uniqueLang
489        if self.bool_prop(s, vocab::SH_UNIQUE_LANG) {
490            match path {
491                Some(p) => {
492                    let c = self.arena.insert(Shape::UniqueLang(p.clone()));
493                    conjuncts.push(c);
494                }
495                None => need_path(self, "sh:uniqueLang"),
496            }
497        }
498    }
499
500    /// The target selectors of a shape (used by both its statements and rules).
501    fn target_selectors(&mut self, s: &NamedOrBlankNode) -> Vec<Selector> {
502        let mut sels = Vec::new();
503
504        for c in self.g.objects(s, vocab::SH_TARGET_NODE) {
505            sels.push(Selector::IsConst(c));
506        }
507        for c in self.g.objects(s, vocab::SH_TARGET_CLASS) {
508            sels.push(self.class_selector(c));
509        }
510        for p in self.g.objects(s, vocab::SH_TARGET_SUBJECTS_OF) {
511            if let Term::NamedNode(n) = p {
512                sels.push(Selector::HasOut(n));
513            }
514        }
515        for p in self.g.objects(s, vocab::SH_TARGET_OBJECTS_OF) {
516            if let Term::NamedNode(n) = p {
517                sels.push(Selector::HasIn(n));
518            }
519        }
520
521        // implicit class target: a shape that is also an rdfs:Class / owl:Class
522        if (self.g.is_instance_of(s, vocab::RDFS_CLASS)
523            || self.g.is_instance_of(s, vocab::OWL_CLASS))
524            && let NamedOrBlankNode::NamedNode(n) = s
525        {
526            sels.push(self.class_selector(Term::NamedNode(n.clone())));
527        }
528
529        for target_term in self.g.objects(s, vocab::SH_TARGET) {
530            let Some(target_node) = term_to_node(&target_term) else {
531                self.diag(DiagLevel::Error, "sh:target must reference a resource", s);
532                continue;
533            };
534            match self.g.object(&target_node, vocab::SH_SELECT) {
535                Some(Term::Literal(query)) => {
536                    if let Some(query) =
537                        self.canonical_sparql(&target_node, query.value(), ExpectedQuery::Select)
538                    {
539                        sels.push(Selector::Sparql(SparqlTarget { query }));
540                    }
541                }
542                _ => self.diag(
543                    DiagLevel::Unsupported,
544                    "custom sh:target without sh:select is not yet lowered",
545                    &target_node,
546                ),
547            }
548        }
549
550        sels
551    }
552
553    /// Lower the `sh:rule`s of a shape (SHACL-AF). A rule fires on the shape's
554    /// targets, so we emit one [`Rule`] per selector.
555    fn parse_rules(&mut self, s: &NamedOrBlankNode, selectors: &[Selector]) {
556        for rule_term in self.g.objects(s, vocab::SH_RULE) {
557            let Some(rn) = term_to_node(&rule_term) else {
558                continue;
559            };
560            let Some(head) = self.parse_rule_head(&rn) else {
561                continue;
562            };
563
564            let conditions: Vec<ShapeId> = self
565                .g
566                .objects(&rn, vocab::SH_CONDITION)
567                .iter()
568                .filter_map(term_to_node)
569                .map(|c| self.lower_shape(&c))
570                .collect();
571            let order = self.order(&rn);
572            let deactivated = self.bool_prop(&rn, vocab::SH_DEACTIVATED);
573
574            for sel in selectors {
575                self.rules.push(Rule {
576                    selector: sel.clone(),
577                    conditions: conditions.clone(),
578                    head: head.clone(),
579                    order,
580                    deactivated,
581                });
582            }
583        }
584    }
585
586    /// Qualified value shapes attached through the same parent `sh:property`
587    /// declaration, excluding the current qualified shape itself.
588    fn sibling_qualified_shapes(
589        &self,
590        shape: &NamedOrBlankNode,
591        qualifier: &NamedOrBlankNode,
592    ) -> Vec<NamedOrBlankNode> {
593        let mut siblings = HashSet::new();
594        for triple in self.g.graph.triples_for_predicate(vocab::SH_PROPERTY) {
595            if term_to_node(&triple.object.into_owned()).as_ref() != Some(shape) {
596                continue;
597            }
598            let parent = triple.subject.into_owned();
599            for property in self.g.objects(&parent, vocab::SH_PROPERTY) {
600                let Some(property) = term_to_node(&property) else {
601                    continue;
602                };
603                for sibling in self.g.objects(&property, vocab::SH_QUALIFIED_VALUE_SHAPE) {
604                    if let Some(sibling) = term_to_node(&sibling) {
605                        siblings.insert(sibling);
606                    }
607                }
608            }
609        }
610        siblings.remove(qualifier);
611        let mut siblings: Vec<_> = siblings.into_iter().collect();
612        siblings.sort_by_key(|node| node.to_string());
613        siblings
614    }
615
616    fn parse_rule_head(&mut self, rn: &NamedOrBlankNode) -> Option<RuleHead> {
617        // sh:SPARQLRule — parse and canonicalize the CONSTRUCT while retaining
618        // an opaque algebra leaf for later query rewriting.
619        if let Some(Term::Literal(q)) = self.g.object(rn, vocab::SH_CONSTRUCT) {
620            let query = self.canonical_sparql(rn, q.value(), ExpectedQuery::Construct)?;
621            return Some(RuleHead::Sparql(SparqlConstruct { query }));
622        }
623        // sh:TripleRule — subject/predicate/object node expressions
624        let (subj, pred, obj) = (
625            self.g.object(rn, vocab::SH_SUBJECT),
626            self.g.object(rn, vocab::SH_PREDICATE),
627            self.g.object(rn, vocab::SH_OBJECT),
628        );
629        if subj.is_none() && pred.is_none() && obj.is_none() {
630            self.diag(DiagLevel::Unsupported, "unrecognized sh:rule head", rn);
631            return None;
632        }
633        let (Some(subj), Some(pred), Some(obj)) = (subj, pred, obj) else {
634            self.diag(
635                DiagLevel::Error,
636                "sh:TripleRule missing subject/predicate/object",
637                rn,
638            );
639            return None;
640        };
641        Some(RuleHead::Triple {
642            subject: self.parse_node_expr(subj, rn)?,
643            predicate: self.parse_node_expr(pred, rn)?,
644            object: self.parse_node_expr(obj, rn)?,
645        })
646    }
647
648    /// Parse a node expression (SHACL-AF §5). Handles `sh:this`, constants,
649    /// path expressions, and SPARQL function calls `[ ex:fn (arg …) ]`.
650    fn parse_node_expr(&mut self, term: Term, owner: &NamedOrBlankNode) -> Option<NodeExpr> {
651        match &term {
652            Term::NamedNode(n) if n.as_ref() == vocab::SH_THIS => Some(NodeExpr::This),
653            Term::NamedNode(_) | Term::Literal(_) => Some(NodeExpr::Constant(term)),
654            Term::BlankNode(_) => {
655                let node = term_to_node(&term).expect("blank node");
656                if let Some(path_term) = self.g.object(&node, vocab::SH_PATH) {
657                    match parse_path(self.g, &path_term) {
658                        Ok(path) => Some(NodeExpr::Path(path)),
659                        Err(e) => {
660                            self.diag(
661                                DiagLevel::Error,
662                                format!("invalid node-expression path: {e}"),
663                                owner,
664                            );
665                            None
666                        }
667                    }
668                } else if let Some(expr) = self.try_function_call(&node, owner) {
669                    Some(expr)
670                } else {
671                    self.diag(
672                        DiagLevel::Unsupported,
673                        "complex node expression not yet lowered",
674                        owner,
675                    );
676                    None
677                }
678            }
679        }
680    }
681
682    /// Detect a function-call node expression `[ ex:fn ( arg1 arg2 … ) ]`.
683    ///
684    /// The blank node must have exactly one non-SHACL/RDF/RDFS/OWL predicate;
685    /// its object must be an RDF list of argument node expressions.
686    fn try_function_call(
687        &mut self,
688        node: &NamedOrBlankNode,
689        owner: &NamedOrBlankNode,
690    ) -> Option<NodeExpr> {
691        let func_preds: Vec<(NamedNode, Term)> = self
692            .g
693            .graph
694            .triples_for_subject(node)
695            .map(|t| (t.predicate.into_owned(), t.object.into_owned()))
696            .filter(|(p, _)| {
697                let s = p.as_str();
698                !s.starts_with(vocab::SH)
699                    && !s.starts_with(vocab::RDF)
700                    && !s.starts_with(vocab::RDFS)
701                    && !s.starts_with(vocab::OWL)
702            })
703            .collect();
704
705        if func_preds.len() != 1 {
706            return None;
707        }
708        let (func_iri, list_head) = func_preds.into_iter().next().unwrap();
709        let arg_terms = self.g.read_list(&list_head);
710        let n = arg_terms.len();
711        let args: Vec<NodeExpr> = arg_terms
712            .into_iter()
713            .filter_map(|t| self.parse_node_expr(t, owner))
714            .collect();
715        if args.len() != n {
716            return None;
717        }
718        Some(NodeExpr::Function {
719            iri: func_iri,
720            args,
721        })
722    }
723
724    fn order(&self, s: &NamedOrBlankNode) -> Option<i64> {
725        match self.g.object(s, vocab::SH_ORDER) {
726            Some(Term::Literal(l)) => l.value().parse().ok(),
727            _ => None,
728        }
729    }
730
731    /// `∃≥1 (rdf:type/rdfs:subClassOf*) . test(class)` as a selector.
732    fn class_selector(&mut self, class: Term) -> Selector {
733        let tn = self.arena.insert(Shape::TestConst(class));
734        Selector::HasPath(class_path(), tn)
735    }
736
737    fn lower_shape_list(&mut self, list_head: &Term) -> Vec<ShapeId> {
738        self.g
739            .read_list(list_head)
740            .into_iter()
741            .filter_map(|m| term_to_node(&m))
742            .map(|n| self.lower_shape(&n))
743            .collect()
744    }
745
746    fn parse_shape_path(&mut self, s: &NamedOrBlankNode) -> Option<Path> {
747        let term = self.g.object(s, vocab::SH_PATH)?;
748        match parse_path(self.g, &term) {
749            Ok(p) => Some(p),
750            Err(e) => {
751                self.diag(DiagLevel::Error, format!("invalid sh:path: {e}"), s);
752                None
753            }
754        }
755    }
756
757    fn closed_allowed(&self, s: &NamedOrBlankNode) -> BTreeSet<oxrdf::NamedNode> {
758        let mut q = BTreeSet::new();
759        for prop in self.g.objects(s, vocab::SH_PROPERTY) {
760            if let Some(pn) = term_to_node(&prop)
761                && let Some(Term::NamedNode(n)) = self.g.object(&pn, vocab::SH_PATH)
762            {
763                q.insert(n);
764            }
765        }
766        for ip in self.g.objects(s, vocab::SH_IGNORED_PROPERTIES) {
767            for m in self.g.read_list(&ip) {
768                if let Term::NamedNode(n) = m {
769                    q.insert(n);
770                }
771            }
772        }
773        q
774    }
775
776    fn bool_prop(&self, s: &NamedOrBlankNode, pred: oxrdf::NamedNodeRef) -> bool {
777        matches!(self.g.object(s, pred), Some(Term::Literal(l)) if l.value() == "true")
778    }
779
780    fn int(&self, s: &NamedOrBlankNode, pred: oxrdf::NamedNodeRef) -> Option<u64> {
781        match self.g.object(s, pred) {
782            Some(Term::Literal(l)) => l.value().parse().ok(),
783            _ => None,
784        }
785    }
786
787    fn lit(&self, s: &NamedOrBlankNode, pred: oxrdf::NamedNodeRef) -> Option<Literal> {
788        match self.g.object(s, pred) {
789            Some(Term::Literal(l)) => Some(l),
790            _ => None,
791        }
792    }
793
794    /// Parse a SHACL SPARQL query once, resolving both document prefixes and
795    /// `sh:prefixes` declarations. `Query::to_string` expands prefix names, so
796    /// the IR remains self-contained and can be reparsed or rewritten later.
797    fn canonical_sparql(
798        &mut self,
799        owner: &NamedOrBlankNode,
800        raw: &str,
801        expected: ExpectedQuery,
802    ) -> Option<String> {
803        let (query, canonical) = match canonical_sparql_query(self.g, owner, raw) {
804            Ok(result) => result,
805            Err(message) => {
806                self.diag(DiagLevel::Error, message, owner);
807                return None;
808            }
809        };
810        let actual = match &query {
811            Query::Select { .. } => ExpectedQuery::Select,
812            Query::Ask { .. } => ExpectedQuery::Ask,
813            Query::Construct { .. } => ExpectedQuery::Construct,
814            Query::Describe { .. } => ExpectedQuery::Describe,
815        };
816        if actual != expected {
817            self.diag(
818                DiagLevel::Error,
819                format!("expected SPARQL {expected}, found {actual}"),
820                owner,
821            );
822            return None;
823        }
824        Some(canonical)
825    }
826}
827
828/// Build the canonical, prefix-expanded form of a SHACL SPARQL query string.
829///
830/// Resolves the document base IRI, document-level prefixes, and the
831/// `sh:prefixes` / `sh:declare` chains (following `owl:imports`) declared on
832/// `owner`, parses `raw`, and returns the parsed query together with its
833/// canonical string form. `Query::to_string` expands prefix names, so the
834/// result is self-contained and can be reparsed without external declarations.
835///
836/// Errors are returned as messages so callers can decide how to surface them:
837/// the lowerer routes them to diagnostics; the report validator drops the
838/// offending constraint, matching the lowering path.
839pub fn canonical_sparql_query(
840    g: &Loaded,
841    owner: &NamedOrBlankNode,
842    raw: &str,
843) -> Result<(Query, String), String> {
844    let mut parser = SparqlParser::new();
845    if let Some(base) = &g.base {
846        parser = parser
847            .with_base_iri(base)
848            .map_err(|e| format!("invalid SPARQL base IRI: {e}"))?;
849    }
850    for (prefix, namespace) in &g.prefixes {
851        parser = parser
852            .with_prefix(prefix, namespace)
853            .map_err(|e| format!("invalid SPARQL prefix declaration {prefix}: {e}"))?;
854    }
855    let mut prefix_sources: Vec<NamedOrBlankNode> = g
856        .objects(owner, vocab::SH_PREFIXES)
857        .iter()
858        .filter_map(term_to_node)
859        .collect();
860    let mut seen_sources = HashSet::new();
861    while let Some(source) = prefix_sources.pop() {
862        if !seen_sources.insert(source.clone()) {
863            continue;
864        }
865        prefix_sources.extend(
866            g.objects(&source, vocab::OWL_IMPORTS)
867                .iter()
868                .filter_map(term_to_node),
869        );
870        for declaration_term in g.objects(&source, vocab::SH_DECLARE) {
871            let Some(declaration) = term_to_node(&declaration_term) else {
872                continue;
873            };
874            let (Some(Term::Literal(prefix)), Some(Term::Literal(namespace))) = (
875                g.object(&declaration, vocab::SH_PREFIX),
876                g.object(&declaration, vocab::SH_NAMESPACE),
877            ) else {
878                continue;
879            };
880            parser = parser
881                .with_prefix(prefix.value(), namespace.value())
882                .map_err(|e| format!("invalid SHACL SPARQL prefix declaration: {e}"))?;
883        }
884    }
885    let query = parser
886        .parse_query(raw)
887        .map_err(|e| format!("invalid SPARQL query: {e}"))?;
888    let canonical = query.to_string();
889    Ok((query, canonical))
890}
891
892#[derive(Clone, Copy, PartialEq, Eq)]
893enum ExpectedQuery {
894    Select,
895    Ask,
896    Construct,
897    Describe,
898}
899
900impl std::fmt::Display for ExpectedQuery {
901    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
902        f.write_str(match self {
903            Self::Select => "SELECT",
904            Self::Ask => "ASK",
905            Self::Construct => "CONSTRUCT",
906            Self::Describe => "DESCRIBE",
907        })
908    }
909}
910
911fn class_path() -> Path {
912    Path::seq(vec![
913        Path::Pred(vocab::rdf_type()),
914        Path::star(Path::Pred(vocab::rdfs_subclassof())),
915    ])
916}
917
918fn map_node_kind(term: &Term) -> Option<NodeKindSet> {
919    let Term::NamedNode(n) = term else {
920        return None;
921    };
922    let r = n.as_ref();
923    Some(if r == vocab::SH_IRI {
924        NodeKindSet::IRI
925    } else if r == vocab::SH_BLANK_NODE {
926        NodeKindSet::BLANK_NODE
927    } else if r == vocab::SH_LITERAL {
928        NodeKindSet::LITERAL
929    } else if r == vocab::SH_BLANK_NODE_OR_IRI {
930        NodeKindSet::BLANK_NODE_OR_IRI
931    } else if r == vocab::SH_BLANK_NODE_OR_LITERAL {
932        NodeKindSet::BLANK_NODE_OR_LITERAL
933    } else if r == vocab::SH_IRI_OR_LITERAL {
934        NodeKindSet::IRI_OR_LITERAL
935    } else {
936        return None;
937    })
938}