Skip to main content

shifty_parse/
lower.rs

1//! Lower a loaded shapes graph into the formalism [`Schema`].
2//!
3//! Every SHACL Core construct collapses into the small IR, applying the sugar
4//! rules from the gap analysis (`class → path`, `minCount/maxCount → Count`,
5//! per-value constraints wrapped in `∀π = ∃≤0 π.¬φ`, `xone → ∧∨¬`, …). Each
6//! shape lowers to a **focus-node predicate** `φ`, so `sh:property`/`sh:node`
7//! compose by conjunction. Unsupported AF constructs emit diagnostics.
8
9use crate::diagnostics::{DiagLevel, Diagnostic};
10use crate::graph::{Loaded, term_to_node};
11use crate::path::parse_path;
12use crate::vocab;
13use oxrdf::{Literal, NamedNode, NamedOrBlankNode, Term};
14use shifty_algebra::{
15    Bound, NodeExpr, NodeKindSet, Path, Rule, RuleHead, Schema, Selector, Shape, ShapeArena,
16    ShapeId, SparqlConstraint, SparqlConstruct, SparqlQueryKind, SparqlTarget, Statement,
17    ValueType,
18};
19use spargebra::{Query, SparqlParser};
20use std::collections::{BTreeSet, HashMap, HashSet};
21
22pub struct Lowered {
23    pub schema: Schema,
24    pub diagnostics: Vec<Diagnostic>,
25}
26
27/// Lower a loaded graph into a schema plus diagnostics.
28pub fn lower(g: &Loaded) -> Lowered {
29    let mut l = Lowerer {
30        g,
31        arena: ShapeArena::new(),
32        cache: HashMap::new(),
33        statements: Vec::new(),
34        rules: Vec::new(),
35        diags: Vec::new(),
36    };
37    let shapes = l.discover_shapes();
38    for s in &shapes {
39        l.lower_shape(s);
40    }
41    for s in &shapes {
42        // selectors are shared by the shape's statements and its rules
43        let selectors = l.target_selectors(s);
44        if let Some(shape) = l.cache.get(s).copied() {
45            for sel in &selectors {
46                l.statements.push(Statement {
47                    selector: sel.clone(),
48                    shape,
49                });
50            }
51        }
52        l.parse_rules(s, &selectors);
53    }
54    let names = l
55        .cache
56        .iter()
57        .filter_map(|(node, id)| match node {
58            NamedOrBlankNode::NamedNode(n) => Some((*id, n.as_str().to_string())),
59            NamedOrBlankNode::BlankNode(_) => None,
60        })
61        .collect();
62    Lowered {
63        schema: Schema {
64            arena: l.arena,
65            statements: l.statements,
66            rules: l.rules,
67            names,
68        },
69        diagnostics: l.diags,
70    }
71}
72
73struct Lowerer<'a> {
74    g: &'a Loaded,
75    arena: ShapeArena,
76    cache: HashMap<NamedOrBlankNode, ShapeId>,
77    statements: Vec<Statement>,
78    rules: Vec<Rule>,
79    diags: Vec<Diagnostic>,
80}
81
82impl Lowerer<'_> {
83    fn diag(&mut self, level: DiagLevel, msg: impl Into<String>, subj: &NamedOrBlankNode) {
84        self.diags
85            .push(Diagnostic::new(level, msg, Some(subj.to_string())));
86    }
87
88    /// Subjects that are declared shapes: typed NodeShape/PropertyShape, or
89    /// carrying `sh:path` or a target predicate. Referenced-only shapes are
90    /// pulled in on demand during lowering. Sorted for deterministic output.
91    fn discover_shapes(&self) -> Vec<NamedOrBlankNode> {
92        let mut found: HashSet<NamedOrBlankNode> = HashSet::new();
93        for triple in self.g.graph.iter() {
94            let p = triple.predicate;
95            let is_target = p == vocab::SH_TARGET_NODE
96                || p == vocab::SH_TARGET_CLASS
97                || p == vocab::SH_TARGET_SUBJECTS_OF
98                || p == vocab::SH_TARGET_OBJECTS_OF
99                || p == vocab::SH_TARGET;
100            if p == vocab::SH_PATH || p == vocab::SH_SPARQL || p == vocab::SH_RULE || is_target {
101                found.insert(triple.subject.into_owned());
102            }
103            if p == vocab::RDF_TYPE
104                && let Term::NamedNode(ty) = triple.object.into_owned()
105                && (ty.as_ref() == vocab::SH_NODE_SHAPE || ty.as_ref() == vocab::SH_PROPERTY_SHAPE)
106            {
107                found.insert(triple.subject.into_owned());
108            }
109        }
110        let mut shapes: Vec<NamedOrBlankNode> = found.into_iter().collect();
111        shapes.sort_by_key(|n| n.to_string());
112        shapes
113    }
114
115    fn lower_shape(&mut self, s: &NamedOrBlankNode) -> ShapeId {
116        if let Some(id) = self.cache.get(s) {
117            return *id;
118        }
119        let id = self.arena.reserve();
120        self.cache.insert(s.clone(), id);
121
122        if self.bool_prop(s, vocab::SH_DEACTIVATED) {
123            self.arena.set(id, Shape::Top);
124            return id;
125        }
126
127        let path = self.parse_shape_path(s);
128        let mut conjuncts: Vec<ShapeId> = Vec::new();
129
130        // Value-scoped constraints: each applies to every value node along the
131        // path (or to the focus node directly when there is no path).
132        let value = self.collect_value_constraints(s);
133        if !value.is_empty() {
134            let value_phi = self.arena.and(value);
135            match &path {
136                Some(p) => {
137                    // ∀π.φ  ≡  ∃≤0 π.¬φ
138                    let neg = self.arena.not(value_phi);
139                    let c = self.arena.count(p.clone(), None, Some(0), neg);
140                    conjuncts.push(c);
141                }
142                None => conjuncts.push(value_phi),
143            }
144        }
145
146        self.collect_path_constraints(s, path.as_ref(), &mut conjuncts);
147
148        if self.bool_prop(s, vocab::SH_CLOSED) {
149            let q = self.closed_allowed(s);
150            let c = self.arena.insert(Shape::Closed(q));
151            conjuncts.push(c);
152        }
153
154        for constraint_term in self.g.objects(s, vocab::SH_SPARQL) {
155            let Some(constraint_node) = term_to_node(&constraint_term) else {
156                self.diag(DiagLevel::Error, "sh:sparql must reference a resource", s);
157                continue;
158            };
159            let parsed = if let Some(Term::Literal(query)) =
160                self.g.object(&constraint_node, vocab::SH_SELECT)
161            {
162                self.canonical_sparql(&constraint_node, query.value(), ExpectedQuery::Select)
163                    .map(|query| (SparqlQueryKind::Select, query))
164            } else if let Some(Term::Literal(query)) =
165                self.g.object(&constraint_node, vocab::SH_ASK)
166            {
167                self.canonical_sparql(&constraint_node, query.value(), ExpectedQuery::Ask)
168                    .map(|query| (SparqlQueryKind::Ask, query))
169            } else {
170                self.diag(
171                    DiagLevel::Error,
172                    "sh:sparql constraint requires sh:select or sh:ask",
173                    &constraint_node,
174                );
175                None
176            };
177            if let Some((kind, query)) = parsed {
178                let shape = Some(match s {
179                    NamedOrBlankNode::NamedNode(n) => Term::NamedNode(n.clone()),
180                    NamedOrBlankNode::BlankNode(b) => Term::BlankNode(b.clone()),
181                });
182                // `sh:message` on the SPARQL constraint takes precedence; absent
183                // that, fall back to the owning shape's `sh:message` (SHACL §5.2.1).
184                let mut messages: Vec<Term> = self.g.objects(&constraint_node, vocab::SH_MESSAGE);
185                if messages.is_empty() {
186                    messages = self.g.objects(s, vocab::SH_MESSAGE);
187                }
188                let constraint = SparqlConstraint {
189                    kind,
190                    query,
191                    path: path.clone(),
192                    shape,
193                    messages,
194                };
195                conjuncts.push(self.arena.insert(Shape::Sparql(constraint)));
196            }
197        }
198
199        let shape = if conjuncts.is_empty() {
200            Shape::Top
201        } else if conjuncts.len() == 1 {
202            if conjuncts[0] == id {
203                Shape::Top
204            } else if matches!(self.arena.get(conjuncts[0]), Shape::Pending) {
205                // back-reference to an ancestor still being built: keep the ref
206                Shape::And(vec![conjuncts[0]])
207            } else {
208                self.arena.get(conjuncts[0]).clone()
209            }
210        } else {
211            Shape::And(conjuncts)
212        };
213        self.arena.set(id, shape);
214        id
215    }
216
217    fn collect_value_constraints(&mut self, s: &NamedOrBlankNode) -> Vec<ShapeId> {
218        let mut value: Vec<ShapeId> = Vec::new();
219
220        // sh:class C  ≡  ∃≥1 (rdf:type/rdfs:subClassOf*) . test(C)
221        for c in self.g.objects(s, vocab::SH_CLASS) {
222            let tn = self.arena.insert(Shape::TestConst(c));
223            let cc = self.arena.count(class_path(), Some(1), None, tn);
224            value.push(cc);
225        }
226
227        // sh:datatype
228        for d in self.g.objects(s, vocab::SH_DATATYPE) {
229            if let Term::NamedNode(n) = d {
230                let id = self.arena.insert(Shape::TestType(ValueType::Datatype(n)));
231                value.push(id);
232            }
233        }
234
235        // sh:nodeKind
236        for k in self.g.objects(s, vocab::SH_NODE_KIND) {
237            if let Some(set) = map_node_kind(&k) {
238                let id = self.arena.insert(Shape::TestKind(set));
239                value.push(id);
240            } else {
241                self.diag(DiagLevel::Warning, "unrecognized sh:nodeKind value", s);
242            }
243        }
244
245        // numeric range (combine the four bounds into one facet)
246        let lo = self
247            .lit(s, vocab::SH_MIN_INCLUSIVE)
248            .map(|value| Bound {
249                value,
250                inclusive: true,
251            })
252            .or_else(|| {
253                self.lit(s, vocab::SH_MIN_EXCLUSIVE).map(|value| Bound {
254                    value,
255                    inclusive: false,
256                })
257            });
258        let hi = self
259            .lit(s, vocab::SH_MAX_INCLUSIVE)
260            .map(|value| Bound {
261                value,
262                inclusive: true,
263            })
264            .or_else(|| {
265                self.lit(s, vocab::SH_MAX_EXCLUSIVE).map(|value| Bound {
266                    value,
267                    inclusive: false,
268                })
269            });
270        if lo.is_some() || hi.is_some() {
271            let id = self
272                .arena
273                .insert(Shape::TestType(ValueType::NumericRange { lo, hi }));
274            value.push(id);
275        }
276
277        // length
278        let min_len = self.int(s, vocab::SH_MIN_LENGTH);
279        let max_len = self.int(s, vocab::SH_MAX_LENGTH);
280        if min_len.is_some() || max_len.is_some() {
281            let id = self.arena.insert(Shape::TestType(ValueType::Length {
282                min: min_len,
283                max: max_len,
284            }));
285            value.push(id);
286        }
287
288        // pattern (+ flags)
289        let flags = self
290            .lit(s, vocab::SH_FLAGS)
291            .map(|l| l.value().to_string())
292            .unwrap_or_default();
293        for pat in self.g.objects(s, vocab::SH_PATTERN) {
294            if let Term::Literal(l) = pat {
295                let id = self.arena.insert(Shape::TestType(ValueType::Pattern {
296                    regex: l.value().to_string(),
297                    flags: flags.clone(),
298                }));
299                value.push(id);
300            }
301        }
302
303        // sh:languageIn
304        for li in self.g.objects(s, vocab::SH_LANGUAGE_IN) {
305            let langs: Vec<String> = self
306                .g
307                .read_list(&li)
308                .into_iter()
309                .filter_map(|m| match m {
310                    Term::Literal(l) => Some(l.value().to_string()),
311                    _ => None,
312                })
313                .collect();
314            let id = self.arena.insert(Shape::TestType(ValueType::LangIn(langs)));
315            value.push(id);
316        }
317
318        // sh:in  ≡  ⋁ test(member)
319        for inl in self.g.objects(s, vocab::SH_IN) {
320            let alts: Vec<ShapeId> = self
321                .g
322                .read_list(&inl)
323                .into_iter()
324                .map(|m| self.arena.insert(Shape::TestConst(m)))
325                .collect();
326            let or = self.arena.or(alts);
327            value.push(or);
328        }
329
330        // sh:node — each value node must conform to the referenced shape
331        for n in self.g.objects(s, vocab::SH_NODE) {
332            if let Some(nn) = term_to_node(&n) {
333                let id = self.lower_shape(&nn);
334                value.push(id);
335            }
336        }
337
338        // sh:property — like sh:node, each *value node* must conform to the
339        // referenced property shape (so on a property shape it is scoped under
340        // ∀path, not applied to the focus node directly).
341        for prop in self.g.objects(s, vocab::SH_PROPERTY) {
342            if let Some(pn) = term_to_node(&prop) {
343                let id = self.lower_shape(&pn);
344                value.push(id);
345            }
346        }
347
348        // sh:not
349        for n in self.g.objects(s, vocab::SH_NOT) {
350            if let Some(nn) = term_to_node(&n) {
351                let id = self.lower_shape(&nn);
352                let neg = self.arena.not(id);
353                value.push(neg);
354            }
355        }
356
357        // sh:and / sh:or / sh:xone (each object is an rdf:list of shapes)
358        for l in self.g.objects(s, vocab::SH_AND) {
359            let ids = self.lower_shape_list(&l);
360            let a = self.arena.and(ids);
361            value.push(a);
362        }
363        for l in self.g.objects(s, vocab::SH_OR) {
364            let ids = self.lower_shape_list(&l);
365            let o = self.arena.or(ids);
366            value.push(o);
367        }
368        for l in self.g.objects(s, vocab::SH_XONE) {
369            let ids = self.lower_shape_list(&l);
370            let x = self.arena.xone(ids);
371            value.push(x);
372        }
373
374        value
375    }
376
377    /// Path-level constraints (cardinality, qualified counts, property pairs,
378    /// hasValue, uniqueLang). Most require a path; without one they are ignored
379    /// with a diagnostic, except `sh:hasValue` which applies to the focus node.
380    fn collect_path_constraints(
381        &mut self,
382        s: &NamedOrBlankNode,
383        path: Option<&Path>,
384        conjuncts: &mut Vec<ShapeId>,
385    ) {
386        let need_path = |me: &mut Self, what: &str| {
387            me.diag(DiagLevel::Warning, format!("{what} ignored: no sh:path"), s);
388        };
389
390        let min_count = self.int(s, vocab::SH_MIN_COUNT);
391        let max_count = self.int(s, vocab::SH_MAX_COUNT);
392        if min_count.is_some() || max_count.is_some() {
393            match path {
394                Some(p) => {
395                    let top = self.arena.top();
396                    let c = self.arena.count(p.clone(), min_count, max_count, top);
397                    conjuncts.push(c);
398                }
399                None => need_path(self, "sh:minCount/sh:maxCount"),
400            }
401        }
402
403        // sh:hasValue
404        for v in self.g.objects(s, vocab::SH_HAS_VALUE) {
405            match path {
406                Some(p) => {
407                    let tc = self.arena.insert(Shape::TestConst(v));
408                    let c = self.arena.count(p.clone(), Some(1), None, tc);
409                    conjuncts.push(c);
410                }
411                None => {
412                    let tc = self.arena.insert(Shape::TestConst(v));
413                    conjuncts.push(tc);
414                }
415            }
416        }
417
418        // sh:qualifiedValueShape + qualifiedMin/MaxCount
419        for q in self.g.objects(s, vocab::SH_QUALIFIED_VALUE_SHAPE) {
420            if let Some(qn) = term_to_node(&q) {
421                let qmin = self.int(s, vocab::SH_QUALIFIED_MIN_COUNT);
422                let qmax = self.int(s, vocab::SH_QUALIFIED_MAX_COUNT);
423                match path {
424                    Some(p) => {
425                        let mut qualifiers = vec![self.lower_shape(&qn)];
426                        if self.bool_prop(s, vocab::SH_QUALIFIED_VALUE_SHAPES_DISJOINT) {
427                            for sibling in self.sibling_qualified_shapes(s, &qn) {
428                                let sibling = self.lower_shape(&sibling);
429                                qualifiers.push(self.arena.not(sibling));
430                            }
431                        }
432                        let qualifier = self.arena.and(qualifiers);
433                        let c = self.arena.count(p.clone(), qmin, qmax, qualifier);
434                        conjuncts.push(c);
435                    }
436                    None => need_path(self, "sh:qualifiedValueShape"),
437                }
438            }
439        }
440
441        // property-pair constraints
442        let pairs = [
443            (vocab::SH_EQUALS, "equals"),
444            (vocab::SH_DISJOINT, "disjoint"),
445            (vocab::SH_LESS_THAN, "lessThan"),
446            (vocab::SH_LESS_THAN_OR_EQUALS, "lessThanOrEquals"),
447        ];
448        for (pred, name) in pairs {
449            for other in self.g.objects(s, pred) {
450                let Term::NamedNode(op) = other else { continue };
451                match path {
452                    Some(p) => {
453                        let shape = match name {
454                            "equals" => Shape::Eq(p.clone(), op),
455                            "disjoint" => Shape::Disj(p.clone(), op),
456                            "lessThan" => Shape::Lt(p.clone(), op),
457                            _ => Shape::Le(p.clone(), op),
458                        };
459                        let c = self.arena.insert(shape);
460                        conjuncts.push(c);
461                    }
462                    None if matches!(name, "equals" | "disjoint") => {
463                        let shape = if name == "equals" {
464                            Shape::Eq(Path::Id, op)
465                        } else {
466                            Shape::Disj(Path::Id, op)
467                        };
468                        let c = self.arena.insert(shape);
469                        conjuncts.push(c);
470                    }
471                    None => need_path(self, &format!("sh:{name}")),
472                }
473            }
474        }
475
476        // sh:uniqueLang
477        if self.bool_prop(s, vocab::SH_UNIQUE_LANG) {
478            match path {
479                Some(p) => {
480                    let c = self.arena.insert(Shape::UniqueLang(p.clone()));
481                    conjuncts.push(c);
482                }
483                None => need_path(self, "sh:uniqueLang"),
484            }
485        }
486    }
487
488    /// The target selectors of a shape (used by both its statements and rules).
489    fn target_selectors(&mut self, s: &NamedOrBlankNode) -> Vec<Selector> {
490        let mut sels = Vec::new();
491
492        for c in self.g.objects(s, vocab::SH_TARGET_NODE) {
493            sels.push(Selector::IsConst(c));
494        }
495        for c in self.g.objects(s, vocab::SH_TARGET_CLASS) {
496            sels.push(self.class_selector(c));
497        }
498        for p in self.g.objects(s, vocab::SH_TARGET_SUBJECTS_OF) {
499            if let Term::NamedNode(n) = p {
500                sels.push(Selector::HasOut(n));
501            }
502        }
503        for p in self.g.objects(s, vocab::SH_TARGET_OBJECTS_OF) {
504            if let Term::NamedNode(n) = p {
505                sels.push(Selector::HasIn(n));
506            }
507        }
508
509        // implicit class target: a shape that is also an rdfs:Class / owl:Class
510        if (self.g.is_instance_of(s, vocab::RDFS_CLASS)
511            || self.g.is_instance_of(s, vocab::OWL_CLASS))
512            && let NamedOrBlankNode::NamedNode(n) = s
513        {
514            sels.push(self.class_selector(Term::NamedNode(n.clone())));
515        }
516
517        for target_term in self.g.objects(s, vocab::SH_TARGET) {
518            let Some(target_node) = term_to_node(&target_term) else {
519                self.diag(DiagLevel::Error, "sh:target must reference a resource", s);
520                continue;
521            };
522            match self.g.object(&target_node, vocab::SH_SELECT) {
523                Some(Term::Literal(query)) => {
524                    if let Some(query) =
525                        self.canonical_sparql(&target_node, query.value(), ExpectedQuery::Select)
526                    {
527                        sels.push(Selector::Sparql(SparqlTarget { query }));
528                    }
529                }
530                _ => self.diag(
531                    DiagLevel::Unsupported,
532                    "custom sh:target without sh:select is not yet lowered",
533                    &target_node,
534                ),
535            }
536        }
537
538        sels
539    }
540
541    /// Lower the `sh:rule`s of a shape (SHACL-AF). A rule fires on the shape's
542    /// targets, so we emit one [`Rule`] per selector.
543    fn parse_rules(&mut self, s: &NamedOrBlankNode, selectors: &[Selector]) {
544        for rule_term in self.g.objects(s, vocab::SH_RULE) {
545            let Some(rn) = term_to_node(&rule_term) else {
546                continue;
547            };
548            let Some(head) = self.parse_rule_head(&rn) else {
549                continue;
550            };
551
552            let conditions: Vec<ShapeId> = self
553                .g
554                .objects(&rn, vocab::SH_CONDITION)
555                .iter()
556                .filter_map(term_to_node)
557                .map(|c| self.lower_shape(&c))
558                .collect();
559            let order = self.order(&rn);
560            let deactivated = self.bool_prop(&rn, vocab::SH_DEACTIVATED);
561
562            for sel in selectors {
563                self.rules.push(Rule {
564                    selector: sel.clone(),
565                    conditions: conditions.clone(),
566                    head: head.clone(),
567                    order,
568                    deactivated,
569                });
570            }
571        }
572    }
573
574    /// Qualified value shapes attached through the same parent `sh:property`
575    /// declaration, excluding the current qualified shape itself.
576    fn sibling_qualified_shapes(
577        &self,
578        shape: &NamedOrBlankNode,
579        qualifier: &NamedOrBlankNode,
580    ) -> Vec<NamedOrBlankNode> {
581        let mut siblings = HashSet::new();
582        for triple in self.g.graph.triples_for_predicate(vocab::SH_PROPERTY) {
583            if term_to_node(&triple.object.into_owned()).as_ref() != Some(shape) {
584                continue;
585            }
586            let parent = triple.subject.into_owned();
587            for property in self.g.objects(&parent, vocab::SH_PROPERTY) {
588                let Some(property) = term_to_node(&property) else {
589                    continue;
590                };
591                for sibling in self.g.objects(&property, vocab::SH_QUALIFIED_VALUE_SHAPE) {
592                    if let Some(sibling) = term_to_node(&sibling) {
593                        siblings.insert(sibling);
594                    }
595                }
596            }
597        }
598        siblings.remove(qualifier);
599        let mut siblings: Vec<_> = siblings.into_iter().collect();
600        siblings.sort_by_key(|node| node.to_string());
601        siblings
602    }
603
604    fn parse_rule_head(&mut self, rn: &NamedOrBlankNode) -> Option<RuleHead> {
605        // sh:SPARQLRule — parse and canonicalize the CONSTRUCT while retaining
606        // an opaque algebra leaf for later query rewriting.
607        if let Some(Term::Literal(q)) = self.g.object(rn, vocab::SH_CONSTRUCT) {
608            let query = self.canonical_sparql(rn, q.value(), ExpectedQuery::Construct)?;
609            return Some(RuleHead::Sparql(SparqlConstruct { query }));
610        }
611        // sh:TripleRule — subject/predicate/object node expressions
612        let (subj, pred, obj) = (
613            self.g.object(rn, vocab::SH_SUBJECT),
614            self.g.object(rn, vocab::SH_PREDICATE),
615            self.g.object(rn, vocab::SH_OBJECT),
616        );
617        if subj.is_none() && pred.is_none() && obj.is_none() {
618            self.diag(DiagLevel::Unsupported, "unrecognized sh:rule head", rn);
619            return None;
620        }
621        let (Some(subj), Some(pred), Some(obj)) = (subj, pred, obj) else {
622            self.diag(
623                DiagLevel::Error,
624                "sh:TripleRule missing subject/predicate/object",
625                rn,
626            );
627            return None;
628        };
629        Some(RuleHead::Triple {
630            subject: self.parse_node_expr(subj, rn)?,
631            predicate: self.parse_node_expr(pred, rn)?,
632            object: self.parse_node_expr(obj, rn)?,
633        })
634    }
635
636    /// Parse a node expression (SHACL-AF §5). Handles `sh:this`, constants,
637    /// path expressions, and SPARQL function calls `[ ex:fn (arg …) ]`.
638    fn parse_node_expr(&mut self, term: Term, owner: &NamedOrBlankNode) -> Option<NodeExpr> {
639        match &term {
640            Term::NamedNode(n) if n.as_ref() == vocab::SH_THIS => Some(NodeExpr::This),
641            Term::NamedNode(_) | Term::Literal(_) => Some(NodeExpr::Constant(term)),
642            Term::BlankNode(_) => {
643                let node = term_to_node(&term).expect("blank node");
644                if let Some(path_term) = self.g.object(&node, vocab::SH_PATH) {
645                    match parse_path(self.g, &path_term) {
646                        Ok(path) => Some(NodeExpr::Path(path)),
647                        Err(e) => {
648                            self.diag(
649                                DiagLevel::Error,
650                                format!("invalid node-expression path: {e}"),
651                                owner,
652                            );
653                            None
654                        }
655                    }
656                } else if let Some(expr) = self.try_function_call(&node, owner) {
657                    Some(expr)
658                } else {
659                    self.diag(
660                        DiagLevel::Unsupported,
661                        "complex node expression not yet lowered",
662                        owner,
663                    );
664                    None
665                }
666            }
667        }
668    }
669
670    /// Detect a function-call node expression `[ ex:fn ( arg1 arg2 … ) ]`.
671    ///
672    /// The blank node must have exactly one non-SHACL/RDF/RDFS/OWL predicate;
673    /// its object must be an RDF list of argument node expressions.
674    fn try_function_call(
675        &mut self,
676        node: &NamedOrBlankNode,
677        owner: &NamedOrBlankNode,
678    ) -> Option<NodeExpr> {
679        let func_preds: Vec<(NamedNode, Term)> = self
680            .g
681            .graph
682            .triples_for_subject(node)
683            .map(|t| (t.predicate.into_owned(), t.object.into_owned()))
684            .filter(|(p, _)| {
685                let s = p.as_str();
686                !s.starts_with(vocab::SH)
687                    && !s.starts_with(vocab::RDF)
688                    && !s.starts_with(vocab::RDFS)
689                    && !s.starts_with(vocab::OWL)
690            })
691            .collect();
692
693        if func_preds.len() != 1 {
694            return None;
695        }
696        let (func_iri, list_head) = func_preds.into_iter().next().unwrap();
697        let arg_terms = self.g.read_list(&list_head);
698        let n = arg_terms.len();
699        let args: Vec<NodeExpr> = arg_terms
700            .into_iter()
701            .filter_map(|t| self.parse_node_expr(t, owner))
702            .collect();
703        if args.len() != n {
704            return None;
705        }
706        Some(NodeExpr::Function {
707            iri: func_iri,
708            args,
709        })
710    }
711
712    fn order(&self, s: &NamedOrBlankNode) -> Option<i64> {
713        match self.g.object(s, vocab::SH_ORDER) {
714            Some(Term::Literal(l)) => l.value().parse().ok(),
715            _ => None,
716        }
717    }
718
719    /// `∃≥1 (rdf:type/rdfs:subClassOf*) . test(class)` as a selector.
720    fn class_selector(&mut self, class: Term) -> Selector {
721        let tn = self.arena.insert(Shape::TestConst(class));
722        Selector::HasPath(class_path(), tn)
723    }
724
725    fn lower_shape_list(&mut self, list_head: &Term) -> Vec<ShapeId> {
726        self.g
727            .read_list(list_head)
728            .into_iter()
729            .filter_map(|m| term_to_node(&m))
730            .map(|n| self.lower_shape(&n))
731            .collect()
732    }
733
734    fn parse_shape_path(&mut self, s: &NamedOrBlankNode) -> Option<Path> {
735        let term = self.g.object(s, vocab::SH_PATH)?;
736        match parse_path(self.g, &term) {
737            Ok(p) => Some(p),
738            Err(e) => {
739                self.diag(DiagLevel::Error, format!("invalid sh:path: {e}"), s);
740                None
741            }
742        }
743    }
744
745    fn closed_allowed(&self, s: &NamedOrBlankNode) -> BTreeSet<oxrdf::NamedNode> {
746        let mut q = BTreeSet::new();
747        for prop in self.g.objects(s, vocab::SH_PROPERTY) {
748            if let Some(pn) = term_to_node(&prop)
749                && let Some(Term::NamedNode(n)) = self.g.object(&pn, vocab::SH_PATH)
750            {
751                q.insert(n);
752            }
753        }
754        for ip in self.g.objects(s, vocab::SH_IGNORED_PROPERTIES) {
755            for m in self.g.read_list(&ip) {
756                if let Term::NamedNode(n) = m {
757                    q.insert(n);
758                }
759            }
760        }
761        q
762    }
763
764    fn bool_prop(&self, s: &NamedOrBlankNode, pred: oxrdf::NamedNodeRef) -> bool {
765        matches!(self.g.object(s, pred), Some(Term::Literal(l)) if l.value() == "true")
766    }
767
768    fn int(&self, s: &NamedOrBlankNode, pred: oxrdf::NamedNodeRef) -> Option<u64> {
769        match self.g.object(s, pred) {
770            Some(Term::Literal(l)) => l.value().parse().ok(),
771            _ => None,
772        }
773    }
774
775    fn lit(&self, s: &NamedOrBlankNode, pred: oxrdf::NamedNodeRef) -> Option<Literal> {
776        match self.g.object(s, pred) {
777            Some(Term::Literal(l)) => Some(l),
778            _ => None,
779        }
780    }
781
782    /// Parse a SHACL SPARQL query once, resolving both document prefixes and
783    /// `sh:prefixes` declarations. `Query::to_string` expands prefix names, so
784    /// the IR remains self-contained and can be reparsed or rewritten later.
785    fn canonical_sparql(
786        &mut self,
787        owner: &NamedOrBlankNode,
788        raw: &str,
789        expected: ExpectedQuery,
790    ) -> Option<String> {
791        let (query, canonical) = match canonical_sparql_query(self.g, owner, raw) {
792            Ok(result) => result,
793            Err(message) => {
794                self.diag(DiagLevel::Error, message, owner);
795                return None;
796            }
797        };
798        let actual = match &query {
799            Query::Select { .. } => ExpectedQuery::Select,
800            Query::Ask { .. } => ExpectedQuery::Ask,
801            Query::Construct { .. } => ExpectedQuery::Construct,
802            Query::Describe { .. } => ExpectedQuery::Describe,
803        };
804        if actual != expected {
805            self.diag(
806                DiagLevel::Error,
807                format!("expected SPARQL {expected}, found {actual}"),
808                owner,
809            );
810            return None;
811        }
812        Some(canonical)
813    }
814}
815
816/// Build the canonical, prefix-expanded form of a SHACL SPARQL query string.
817///
818/// Resolves the document base IRI, document-level prefixes, and the
819/// `sh:prefixes` / `sh:declare` chains (following `owl:imports`) declared on
820/// `owner`, parses `raw`, and returns the parsed query together with its
821/// canonical string form. `Query::to_string` expands prefix names, so the
822/// result is self-contained and can be reparsed without external declarations.
823///
824/// Errors are returned as messages so callers can decide how to surface them:
825/// the lowerer routes them to diagnostics; the report validator drops the
826/// offending constraint, matching the lowering path.
827pub fn canonical_sparql_query(
828    g: &Loaded,
829    owner: &NamedOrBlankNode,
830    raw: &str,
831) -> Result<(Query, String), String> {
832    let mut parser = SparqlParser::new();
833    if let Some(base) = &g.base {
834        parser = parser
835            .with_base_iri(base)
836            .map_err(|e| format!("invalid SPARQL base IRI: {e}"))?;
837    }
838    for (prefix, namespace) in &g.prefixes {
839        parser = parser
840            .with_prefix(prefix, namespace)
841            .map_err(|e| format!("invalid SPARQL prefix declaration {prefix}: {e}"))?;
842    }
843    let mut prefix_sources: Vec<NamedOrBlankNode> = g
844        .objects(owner, vocab::SH_PREFIXES)
845        .iter()
846        .filter_map(term_to_node)
847        .collect();
848    let mut seen_sources = HashSet::new();
849    while let Some(source) = prefix_sources.pop() {
850        if !seen_sources.insert(source.clone()) {
851            continue;
852        }
853        prefix_sources.extend(
854            g.objects(&source, vocab::OWL_IMPORTS)
855                .iter()
856                .filter_map(term_to_node),
857        );
858        for declaration_term in g.objects(&source, vocab::SH_DECLARE) {
859            let Some(declaration) = term_to_node(&declaration_term) else {
860                continue;
861            };
862            let (Some(Term::Literal(prefix)), Some(Term::Literal(namespace))) = (
863                g.object(&declaration, vocab::SH_PREFIX),
864                g.object(&declaration, vocab::SH_NAMESPACE),
865            ) else {
866                continue;
867            };
868            parser = parser
869                .with_prefix(prefix.value(), namespace.value())
870                .map_err(|e| format!("invalid SHACL SPARQL prefix declaration: {e}"))?;
871        }
872    }
873    let query = parser
874        .parse_query(raw)
875        .map_err(|e| format!("invalid SPARQL query: {e}"))?;
876    let canonical = query.to_string();
877    Ok((query, canonical))
878}
879
880#[derive(Clone, Copy, PartialEq, Eq)]
881enum ExpectedQuery {
882    Select,
883    Ask,
884    Construct,
885    Describe,
886}
887
888impl std::fmt::Display for ExpectedQuery {
889    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
890        f.write_str(match self {
891            Self::Select => "SELECT",
892            Self::Ask => "ASK",
893            Self::Construct => "CONSTRUCT",
894            Self::Describe => "DESCRIBE",
895        })
896    }
897}
898
899fn class_path() -> Path {
900    Path::seq(vec![
901        Path::Pred(vocab::rdf_type()),
902        Path::star(Path::Pred(vocab::rdfs_subclassof())),
903    ])
904}
905
906fn map_node_kind(term: &Term) -> Option<NodeKindSet> {
907    let Term::NamedNode(n) = term else {
908        return None;
909    };
910    let r = n.as_ref();
911    Some(if r == vocab::SH_IRI {
912        NodeKindSet::IRI
913    } else if r == vocab::SH_BLANK_NODE {
914        NodeKindSet::BLANK_NODE
915    } else if r == vocab::SH_LITERAL {
916        NodeKindSet::LITERAL
917    } else if r == vocab::SH_BLANK_NODE_OR_IRI {
918        NodeKindSet::BLANK_NODE_OR_IRI
919    } else if r == vocab::SH_BLANK_NODE_OR_LITERAL {
920        NodeKindSet::BLANK_NODE_OR_LITERAL
921    } else if r == vocab::SH_IRI_OR_LITERAL {
922        NodeKindSet::IRI_OR_LITERAL
923    } else {
924        return None;
925    })
926}