Skip to main content

shifty_parse/
lower.rs

1//! Lower a loaded shapes graph into the formalism [`Schema`].
2//!
3//! Every SHACL Core construct collapses into the small IR, applying the sugar
4//! rules from the gap analysis (`class → path`, `minCount/maxCount → Count`,
5//! per-value constraints wrapped in `∀π = ∃≤0 π.¬φ`, `xone → ∧∨¬`, …). Each
6//! shape lowers to a **focus-node predicate** `φ`, so `sh:property`/`sh:node`
7//! compose by conjunction. Unsupported AF constructs emit diagnostics.
8
9use crate::diagnostics::{DiagLevel, Diagnostic};
10use crate::graph::{Loaded, term_to_node};
11use crate::path::parse_path;
12use crate::vocab;
13use oxrdf::{Literal, NamedNode, NamedOrBlankNode, Term};
14use shifty_algebra::{
15    Bound, NodeExpr, NodeKindSet, Path, Rule, RuleHead, Schema, Selector, Severity, Shape,
16    ShapeArena, ShapeId, SparqlConstraint, SparqlConstruct, SparqlQueryKind, SparqlTarget,
17    Statement, ValueType,
18};
19use spargebra::{Query, SparqlParser};
20use std::collections::{BTreeSet, HashMap, HashSet};
21
22pub struct Lowered {
23    pub schema: Schema,
24    pub diagnostics: Vec<Diagnostic>,
25}
26
27/// Lower a loaded graph into a schema plus diagnostics.
28pub fn lower(g: &Loaded) -> Lowered {
29    let mut l = Lowerer {
30        g,
31        arena: ShapeArena::new(),
32        cache: HashMap::new(),
33        statements: Vec::new(),
34        rules: Vec::new(),
35        diags: Vec::new(),
36    };
37    let shapes = l.discover_shapes();
38    for s in &shapes {
39        l.lower_shape(s);
40    }
41    for s in &shapes {
42        // selectors are shared by the shape's statements and its rules
43        let selectors = l.target_selectors(s);
44        if let Some(shape) = l.cache.get(s).copied() {
45            for sel in &selectors {
46                l.statements.push(Statement {
47                    selector: sel.clone(),
48                    shape,
49                });
50            }
51        }
52        l.parse_rules(s, &selectors);
53    }
54    let names = l
55        .cache
56        .iter()
57        .filter_map(|(node, id)| match node {
58            NamedOrBlankNode::NamedNode(n) => Some((*id, n.as_str().to_string())),
59            NamedOrBlankNode::BlankNode(_) => None,
60        })
61        .collect();
62    Lowered {
63        schema: Schema {
64            arena: l.arena,
65            statements: l.statements,
66            rules: l.rules,
67            names,
68        },
69        diagnostics: l.diags,
70    }
71}
72
73struct Lowerer<'a> {
74    g: &'a Loaded,
75    arena: ShapeArena,
76    cache: HashMap<NamedOrBlankNode, ShapeId>,
77    statements: Vec<Statement>,
78    rules: Vec<Rule>,
79    diags: Vec<Diagnostic>,
80}
81
82impl Lowerer<'_> {
83    fn diag(&mut self, level: DiagLevel, msg: impl Into<String>, subj: &NamedOrBlankNode) {
84        self.diags
85            .push(Diagnostic::new(level, msg, Some(subj.to_string())));
86    }
87
88    /// Subjects that are declared shapes: typed NodeShape/PropertyShape, or
89    /// carrying `sh:path` or a target predicate. Referenced-only shapes are
90    /// pulled in on demand during lowering. Sorted for deterministic output.
91    fn discover_shapes(&self) -> Vec<NamedOrBlankNode> {
92        let mut found: HashSet<NamedOrBlankNode> = HashSet::new();
93        for triple in self.g.graph.iter() {
94            let p = triple.predicate;
95            let is_target = p == vocab::SH_TARGET_NODE
96                || p == vocab::SH_TARGET_CLASS
97                || p == vocab::SH_TARGET_SUBJECTS_OF
98                || p == vocab::SH_TARGET_OBJECTS_OF
99                || p == vocab::SH_TARGET;
100            if p == vocab::SH_PATH || p == vocab::SH_SPARQL || p == vocab::SH_RULE || is_target {
101                found.insert(triple.subject.into_owned());
102            }
103            if p == vocab::RDF_TYPE
104                && let Term::NamedNode(ty) = triple.object.into_owned()
105                && (ty.as_ref() == vocab::SH_NODE_SHAPE || ty.as_ref() == vocab::SH_PROPERTY_SHAPE)
106            {
107                found.insert(triple.subject.into_owned());
108            }
109        }
110        let mut shapes: Vec<NamedOrBlankNode> = found.into_iter().collect();
111        shapes.sort_by_key(|n| n.to_string());
112        shapes
113    }
114
115    fn lower_shape(&mut self, s: &NamedOrBlankNode) -> ShapeId {
116        if let Some(id) = self.cache.get(s) {
117            return *id;
118        }
119        let id = self.arena.reserve();
120        self.cache.insert(s.clone(), id);
121
122        if self.bool_prop(s, vocab::SH_DEACTIVATED) {
123            self.arena.set(id, Shape::Top);
124            return id;
125        }
126
127        let path = self.parse_shape_path(s);
128        let mut conjuncts: Vec<ShapeId> = Vec::new();
129
130        // Value-scoped constraints: each applies to every value node along the
131        // path (or to the focus node directly when there is no path).
132        let value = self.collect_value_constraints(s);
133        if !value.is_empty() {
134            let value_phi = self.arena.and(value);
135            match &path {
136                Some(p) => {
137                    // ∀π.φ  ≡  ∃≤0 π.¬φ
138                    let neg = self.arena.not(value_phi);
139                    let c = self.arena.count(p.clone(), None, Some(0), neg);
140                    conjuncts.push(c);
141                }
142                None => conjuncts.push(value_phi),
143            }
144        }
145
146        self.collect_path_constraints(s, path.as_ref(), &mut conjuncts);
147
148        if self.bool_prop(s, vocab::SH_CLOSED) {
149            let q = self.closed_allowed(s);
150            let c = self.arena.insert(Shape::Closed(q));
151            conjuncts.push(c);
152        }
153
154        for constraint_term in self.g.objects(s, vocab::SH_SPARQL) {
155            let Some(constraint_node) = term_to_node(&constraint_term) else {
156                self.diag(DiagLevel::Error, "sh:sparql must reference a resource", s);
157                continue;
158            };
159            let parsed = if let Some(Term::Literal(query)) =
160                self.g.object(&constraint_node, vocab::SH_SELECT)
161            {
162                self.canonical_sparql(&constraint_node, query.value(), ExpectedQuery::Select)
163                    .map(|query| (SparqlQueryKind::Select, query))
164            } else if let Some(Term::Literal(query)) =
165                self.g.object(&constraint_node, vocab::SH_ASK)
166            {
167                self.canonical_sparql(&constraint_node, query.value(), ExpectedQuery::Ask)
168                    .map(|query| (SparqlQueryKind::Ask, query))
169            } else {
170                self.diag(
171                    DiagLevel::Error,
172                    "sh:sparql constraint requires sh:select or sh:ask",
173                    &constraint_node,
174                );
175                None
176            };
177            if let Some((kind, query)) = parsed {
178                let shape = Some(match s {
179                    NamedOrBlankNode::NamedNode(n) => Term::NamedNode(n.clone()),
180                    NamedOrBlankNode::BlankNode(b) => Term::BlankNode(b.clone()),
181                });
182                // `sh:message` on the SPARQL constraint takes precedence; absent
183                // that, fall back to the owning shape's `sh:message` (SHACL §5.2.1).
184                let mut messages: Vec<Term> = self.g.objects(&constraint_node, vocab::SH_MESSAGE);
185                if messages.is_empty() {
186                    messages = self.g.objects(s, vocab::SH_MESSAGE);
187                }
188                let constraint = SparqlConstraint {
189                    kind,
190                    query,
191                    path: path.clone(),
192                    shape,
193                    messages,
194                };
195                conjuncts.push(self.arena.insert(Shape::Sparql(constraint)));
196            }
197        }
198
199        let body = if conjuncts.is_empty() {
200            self.arena.top()
201        } else if conjuncts.len() == 1 {
202            if conjuncts[0] == id {
203                self.arena.top()
204            } else {
205                conjuncts[0]
206            }
207        } else {
208            self.arena.insert(Shape::And(conjuncts))
209        };
210        self.arena.set(
211            id,
212            Shape::Annotated {
213                severity: self.severity(s),
214                shape: body,
215            },
216        );
217        id
218    }
219
220    fn severity(&self, shape: &NamedOrBlankNode) -> Severity {
221        match self.g.object(shape, vocab::SH_SEVERITY) {
222            Some(Term::NamedNode(value)) => Severity::from_named_node(value),
223            _ => Severity::Violation,
224        }
225    }
226
227    fn collect_value_constraints(&mut self, s: &NamedOrBlankNode) -> Vec<ShapeId> {
228        let mut value: Vec<ShapeId> = Vec::new();
229
230        // sh:class C  ≡  ∃≥1 (rdf:type/rdfs:subClassOf*) . test(C)
231        for c in self.g.objects(s, vocab::SH_CLASS) {
232            let tn = self.arena.insert(Shape::TestConst(c));
233            let cc = self.arena.count(class_path(), Some(1), None, tn);
234            value.push(cc);
235        }
236
237        // sh:datatype
238        for d in self.g.objects(s, vocab::SH_DATATYPE) {
239            if let Term::NamedNode(n) = d {
240                let id = self.arena.insert(Shape::TestType(ValueType::Datatype(n)));
241                value.push(id);
242            }
243        }
244
245        // sh:nodeKind
246        for k in self.g.objects(s, vocab::SH_NODE_KIND) {
247            if let Some(set) = map_node_kind(&k) {
248                let id = self.arena.insert(Shape::TestKind(set));
249                value.push(id);
250            } else {
251                self.diag(DiagLevel::Warning, "unrecognized sh:nodeKind value", s);
252            }
253        }
254
255        // numeric range (combine the four bounds into one facet)
256        let lo = self
257            .lit(s, vocab::SH_MIN_INCLUSIVE)
258            .map(|value| Bound {
259                value,
260                inclusive: true,
261            })
262            .or_else(|| {
263                self.lit(s, vocab::SH_MIN_EXCLUSIVE).map(|value| Bound {
264                    value,
265                    inclusive: false,
266                })
267            });
268        let hi = self
269            .lit(s, vocab::SH_MAX_INCLUSIVE)
270            .map(|value| Bound {
271                value,
272                inclusive: true,
273            })
274            .or_else(|| {
275                self.lit(s, vocab::SH_MAX_EXCLUSIVE).map(|value| Bound {
276                    value,
277                    inclusive: false,
278                })
279            });
280        if lo.is_some() || hi.is_some() {
281            let id = self
282                .arena
283                .insert(Shape::TestType(ValueType::NumericRange { lo, hi }));
284            value.push(id);
285        }
286
287        // length
288        let min_len = self.int(s, vocab::SH_MIN_LENGTH);
289        let max_len = self.int(s, vocab::SH_MAX_LENGTH);
290        if min_len.is_some() || max_len.is_some() {
291            let id = self.arena.insert(Shape::TestType(ValueType::Length {
292                min: min_len,
293                max: max_len,
294            }));
295            value.push(id);
296        }
297
298        // pattern (+ flags)
299        let flags = self
300            .lit(s, vocab::SH_FLAGS)
301            .map(|l| l.value().to_string())
302            .unwrap_or_default();
303        for pat in self.g.objects(s, vocab::SH_PATTERN) {
304            if let Term::Literal(l) = pat {
305                let id = self.arena.insert(Shape::TestType(ValueType::Pattern {
306                    regex: l.value().to_string(),
307                    flags: flags.clone(),
308                }));
309                value.push(id);
310            }
311        }
312
313        // sh:languageIn
314        for li in self.g.objects(s, vocab::SH_LANGUAGE_IN) {
315            let langs: Vec<String> = self
316                .g
317                .read_list(&li)
318                .into_iter()
319                .filter_map(|m| match m {
320                    Term::Literal(l) => Some(l.value().to_string()),
321                    _ => None,
322                })
323                .collect();
324            let id = self.arena.insert(Shape::TestType(ValueType::LangIn(langs)));
325            value.push(id);
326        }
327
328        // sh:in  ≡  ⋁ test(member)
329        for inl in self.g.objects(s, vocab::SH_IN) {
330            let alts: Vec<ShapeId> = self
331                .g
332                .read_list(&inl)
333                .into_iter()
334                .map(|m| self.arena.insert(Shape::TestConst(m)))
335                .collect();
336            let or = self.arena.or(alts);
337            value.push(or);
338        }
339
340        // sh:node — each value node must conform to the referenced shape
341        for n in self.g.objects(s, vocab::SH_NODE) {
342            if let Some(nn) = term_to_node(&n) {
343                let id = self.lower_shape(&nn);
344                value.push(id);
345            }
346        }
347
348        // sh:property — like sh:node, each *value node* must conform to the
349        // referenced property shape (so on a property shape it is scoped under
350        // ∀path, not applied to the focus node directly).
351        for prop in self.g.objects(s, vocab::SH_PROPERTY) {
352            if let Some(pn) = term_to_node(&prop) {
353                let id = self.lower_shape(&pn);
354                value.push(id);
355            }
356        }
357
358        // sh:not
359        for n in self.g.objects(s, vocab::SH_NOT) {
360            if let Some(nn) = term_to_node(&n) {
361                let id = self.lower_shape(&nn);
362                let neg = self.arena.not(id);
363                value.push(neg);
364            }
365        }
366
367        // sh:and / sh:or / sh:xone (each object is an rdf:list of shapes)
368        for l in self.g.objects(s, vocab::SH_AND) {
369            let ids = self.lower_shape_list(&l);
370            let a = self.arena.and(ids);
371            value.push(a);
372        }
373        for l in self.g.objects(s, vocab::SH_OR) {
374            let ids = self.lower_shape_list(&l);
375            let o = self.arena.or(ids);
376            value.push(o);
377        }
378        for l in self.g.objects(s, vocab::SH_XONE) {
379            let ids = self.lower_shape_list(&l);
380            let x = self.arena.xone(ids);
381            value.push(x);
382        }
383
384        value
385    }
386
387    /// Path-level constraints (cardinality, qualified counts, property pairs,
388    /// hasValue, uniqueLang). Most require a path; without one they are ignored
389    /// with a diagnostic, except `sh:hasValue` which applies to the focus node.
390    fn collect_path_constraints(
391        &mut self,
392        s: &NamedOrBlankNode,
393        path: Option<&Path>,
394        conjuncts: &mut Vec<ShapeId>,
395    ) {
396        let need_path = |me: &mut Self, what: &str| {
397            me.diag(DiagLevel::Warning, format!("{what} ignored: no sh:path"), s);
398        };
399
400        let min_count = self.int(s, vocab::SH_MIN_COUNT);
401        let max_count = self.int(s, vocab::SH_MAX_COUNT);
402        if min_count.is_some() || max_count.is_some() {
403            match path {
404                Some(p) => {
405                    let top = self.arena.top();
406                    let c = self.arena.count(p.clone(), min_count, max_count, top);
407                    conjuncts.push(c);
408                }
409                None => need_path(self, "sh:minCount/sh:maxCount"),
410            }
411        }
412
413        // sh:hasValue
414        for v in self.g.objects(s, vocab::SH_HAS_VALUE) {
415            match path {
416                Some(p) => {
417                    let tc = self.arena.insert(Shape::TestConst(v));
418                    let c = self.arena.count(p.clone(), Some(1), None, tc);
419                    conjuncts.push(c);
420                }
421                None => {
422                    let tc = self.arena.insert(Shape::TestConst(v));
423                    conjuncts.push(tc);
424                }
425            }
426        }
427
428        // sh:qualifiedValueShape + qualifiedMin/MaxCount
429        for q in self.g.objects(s, vocab::SH_QUALIFIED_VALUE_SHAPE) {
430            if let Some(qn) = term_to_node(&q) {
431                let qmin = self.int(s, vocab::SH_QUALIFIED_MIN_COUNT);
432                let qmax = self.int(s, vocab::SH_QUALIFIED_MAX_COUNT);
433                match path {
434                    Some(p) => {
435                        let mut qualifiers = vec![self.lower_shape(&qn)];
436                        if self.bool_prop(s, vocab::SH_QUALIFIED_VALUE_SHAPES_DISJOINT) {
437                            for sibling in self.sibling_qualified_shapes(s, &qn) {
438                                let sibling = self.lower_shape(&sibling);
439                                qualifiers.push(self.arena.not(sibling));
440                            }
441                        }
442                        let qualifier = self.arena.and(qualifiers);
443                        let c = self.arena.count(p.clone(), qmin, qmax, qualifier);
444                        conjuncts.push(c);
445                    }
446                    None => need_path(self, "sh:qualifiedValueShape"),
447                }
448            }
449        }
450
451        // property-pair constraints
452        let pairs = [
453            (vocab::SH_EQUALS, "equals"),
454            (vocab::SH_DISJOINT, "disjoint"),
455            (vocab::SH_LESS_THAN, "lessThan"),
456            (vocab::SH_LESS_THAN_OR_EQUALS, "lessThanOrEquals"),
457        ];
458        for (pred, name) in pairs {
459            for other in self.g.objects(s, pred) {
460                let Term::NamedNode(op) = other else { continue };
461                match path {
462                    Some(p) => {
463                        let shape = match name {
464                            "equals" => Shape::Eq(p.clone(), op),
465                            "disjoint" => Shape::Disj(p.clone(), op),
466                            "lessThan" => Shape::Lt(p.clone(), op),
467                            _ => Shape::Le(p.clone(), op),
468                        };
469                        let c = self.arena.insert(shape);
470                        conjuncts.push(c);
471                    }
472                    None if matches!(name, "equals" | "disjoint") => {
473                        let shape = if name == "equals" {
474                            Shape::Eq(Path::Id, op)
475                        } else {
476                            Shape::Disj(Path::Id, op)
477                        };
478                        let c = self.arena.insert(shape);
479                        conjuncts.push(c);
480                    }
481                    None => need_path(self, &format!("sh:{name}")),
482                }
483            }
484        }
485
486        // sh:uniqueLang
487        if self.bool_prop(s, vocab::SH_UNIQUE_LANG) {
488            match path {
489                Some(p) => {
490                    let c = self.arena.insert(Shape::UniqueLang(p.clone()));
491                    conjuncts.push(c);
492                }
493                None => need_path(self, "sh:uniqueLang"),
494            }
495        }
496    }
497
498    /// The target selectors of a shape (used by both its statements and rules).
499    fn target_selectors(&mut self, s: &NamedOrBlankNode) -> Vec<Selector> {
500        let mut sels = Vec::new();
501
502        for c in self.g.objects(s, vocab::SH_TARGET_NODE) {
503            sels.push(Selector::IsConst(c));
504        }
505        for c in self.g.objects(s, vocab::SH_TARGET_CLASS) {
506            sels.push(self.class_selector(c));
507        }
508        for p in self.g.objects(s, vocab::SH_TARGET_SUBJECTS_OF) {
509            if let Term::NamedNode(n) = p {
510                sels.push(Selector::HasOut(n));
511            }
512        }
513        for p in self.g.objects(s, vocab::SH_TARGET_OBJECTS_OF) {
514            if let Term::NamedNode(n) = p {
515                sels.push(Selector::HasIn(n));
516            }
517        }
518
519        // implicit class target: a shape that is also an rdfs:Class / owl:Class
520        if (self.g.is_instance_of(s, vocab::RDFS_CLASS)
521            || self.g.is_instance_of(s, vocab::OWL_CLASS))
522            && let NamedOrBlankNode::NamedNode(n) = s
523        {
524            sels.push(self.class_selector(Term::NamedNode(n.clone())));
525        }
526
527        for target_term in self.g.objects(s, vocab::SH_TARGET) {
528            let Some(target_node) = term_to_node(&target_term) else {
529                self.diag(DiagLevel::Error, "sh:target must reference a resource", s);
530                continue;
531            };
532            match self.g.object(&target_node, vocab::SH_SELECT) {
533                Some(Term::Literal(query)) => {
534                    if let Some(query) =
535                        self.canonical_sparql(&target_node, query.value(), ExpectedQuery::Select)
536                    {
537                        sels.push(Selector::Sparql(SparqlTarget { query }));
538                    }
539                }
540                _ => self.diag(
541                    DiagLevel::Unsupported,
542                    "custom sh:target without sh:select is not yet lowered",
543                    &target_node,
544                ),
545            }
546        }
547
548        sels
549    }
550
551    /// Lower the `sh:rule`s of a shape (SHACL-AF). A rule fires on the shape's
552    /// targets, so we emit one [`Rule`] per selector.
553    fn parse_rules(&mut self, s: &NamedOrBlankNode, selectors: &[Selector]) {
554        for rule_term in self.g.objects(s, vocab::SH_RULE) {
555            let Some(rn) = term_to_node(&rule_term) else {
556                continue;
557            };
558            let Some(head) = self.parse_rule_head(&rn) else {
559                continue;
560            };
561
562            let conditions: Vec<ShapeId> = self
563                .g
564                .objects(&rn, vocab::SH_CONDITION)
565                .iter()
566                .filter_map(term_to_node)
567                .map(|c| self.lower_shape(&c))
568                .collect();
569            let order = self.order(&rn);
570            let deactivated = self.bool_prop(&rn, vocab::SH_DEACTIVATED);
571
572            for sel in selectors {
573                self.rules.push(Rule {
574                    selector: sel.clone(),
575                    conditions: conditions.clone(),
576                    head: head.clone(),
577                    order,
578                    deactivated,
579                });
580            }
581        }
582    }
583
584    /// Qualified value shapes attached through the same parent `sh:property`
585    /// declaration, excluding the current qualified shape itself.
586    fn sibling_qualified_shapes(
587        &self,
588        shape: &NamedOrBlankNode,
589        qualifier: &NamedOrBlankNode,
590    ) -> Vec<NamedOrBlankNode> {
591        let mut siblings = HashSet::new();
592        for triple in self.g.graph.triples_for_predicate(vocab::SH_PROPERTY) {
593            if term_to_node(&triple.object.into_owned()).as_ref() != Some(shape) {
594                continue;
595            }
596            let parent = triple.subject.into_owned();
597            for property in self.g.objects(&parent, vocab::SH_PROPERTY) {
598                let Some(property) = term_to_node(&property) else {
599                    continue;
600                };
601                for sibling in self.g.objects(&property, vocab::SH_QUALIFIED_VALUE_SHAPE) {
602                    if let Some(sibling) = term_to_node(&sibling) {
603                        siblings.insert(sibling);
604                    }
605                }
606            }
607        }
608        siblings.remove(qualifier);
609        let mut siblings: Vec<_> = siblings.into_iter().collect();
610        siblings.sort_by_key(|node| node.to_string());
611        siblings
612    }
613
614    fn parse_rule_head(&mut self, rn: &NamedOrBlankNode) -> Option<RuleHead> {
615        // sh:SPARQLRule — parse and canonicalize the CONSTRUCT while retaining
616        // an opaque algebra leaf for later query rewriting.
617        if let Some(Term::Literal(q)) = self.g.object(rn, vocab::SH_CONSTRUCT) {
618            let query = self.canonical_sparql(rn, q.value(), ExpectedQuery::Construct)?;
619            return Some(RuleHead::Sparql(SparqlConstruct { query }));
620        }
621        // sh:TripleRule — subject/predicate/object node expressions
622        let (subj, pred, obj) = (
623            self.g.object(rn, vocab::SH_SUBJECT),
624            self.g.object(rn, vocab::SH_PREDICATE),
625            self.g.object(rn, vocab::SH_OBJECT),
626        );
627        if subj.is_none() && pred.is_none() && obj.is_none() {
628            self.diag(DiagLevel::Unsupported, "unrecognized sh:rule head", rn);
629            return None;
630        }
631        let (Some(subj), Some(pred), Some(obj)) = (subj, pred, obj) else {
632            self.diag(
633                DiagLevel::Error,
634                "sh:TripleRule missing subject/predicate/object",
635                rn,
636            );
637            return None;
638        };
639        Some(RuleHead::Triple {
640            subject: self.parse_node_expr(subj, rn)?,
641            predicate: self.parse_node_expr(pred, rn)?,
642            object: self.parse_node_expr(obj, rn)?,
643        })
644    }
645
646    /// Parse a node expression (SHACL-AF §5). Handles `sh:this`, constants,
647    /// path expressions, and SPARQL function calls `[ ex:fn (arg …) ]`.
648    fn parse_node_expr(&mut self, term: Term, owner: &NamedOrBlankNode) -> Option<NodeExpr> {
649        match &term {
650            Term::NamedNode(n) if n.as_ref() == vocab::SH_THIS => Some(NodeExpr::This),
651            Term::NamedNode(_) | Term::Literal(_) => Some(NodeExpr::Constant(term)),
652            Term::BlankNode(_) => {
653                let node = term_to_node(&term).expect("blank node");
654                if let Some(path_term) = self.g.object(&node, vocab::SH_PATH) {
655                    match parse_path(self.g, &path_term) {
656                        Ok(path) => Some(NodeExpr::Path(path)),
657                        Err(e) => {
658                            self.diag(
659                                DiagLevel::Error,
660                                format!("invalid node-expression path: {e}"),
661                                owner,
662                            );
663                            None
664                        }
665                    }
666                } else if let Some(expr) = self.try_function_call(&node, owner) {
667                    Some(expr)
668                } else {
669                    self.diag(
670                        DiagLevel::Unsupported,
671                        "complex node expression not yet lowered",
672                        owner,
673                    );
674                    None
675                }
676            }
677        }
678    }
679
680    /// Detect a function-call node expression `[ ex:fn ( arg1 arg2 … ) ]`.
681    ///
682    /// The blank node must have exactly one non-SHACL/RDF/RDFS/OWL predicate;
683    /// its object must be an RDF list of argument node expressions.
684    fn try_function_call(
685        &mut self,
686        node: &NamedOrBlankNode,
687        owner: &NamedOrBlankNode,
688    ) -> Option<NodeExpr> {
689        let func_preds: Vec<(NamedNode, Term)> = self
690            .g
691            .graph
692            .triples_for_subject(node)
693            .map(|t| (t.predicate.into_owned(), t.object.into_owned()))
694            .filter(|(p, _)| {
695                let s = p.as_str();
696                !s.starts_with(vocab::SH)
697                    && !s.starts_with(vocab::RDF)
698                    && !s.starts_with(vocab::RDFS)
699                    && !s.starts_with(vocab::OWL)
700            })
701            .collect();
702
703        if func_preds.len() != 1 {
704            return None;
705        }
706        let (func_iri, list_head) = func_preds.into_iter().next().unwrap();
707        let arg_terms = self.g.read_list(&list_head);
708        let n = arg_terms.len();
709        let args: Vec<NodeExpr> = arg_terms
710            .into_iter()
711            .filter_map(|t| self.parse_node_expr(t, owner))
712            .collect();
713        if args.len() != n {
714            return None;
715        }
716        Some(NodeExpr::Function {
717            iri: func_iri,
718            args,
719        })
720    }
721
722    fn order(&self, s: &NamedOrBlankNode) -> Option<i64> {
723        match self.g.object(s, vocab::SH_ORDER) {
724            Some(Term::Literal(l)) => l.value().parse().ok(),
725            _ => None,
726        }
727    }
728
729    /// `∃≥1 (rdf:type/rdfs:subClassOf*) . test(class)` as a selector.
730    fn class_selector(&mut self, class: Term) -> Selector {
731        let tn = self.arena.insert(Shape::TestConst(class));
732        Selector::HasPath(class_path(), tn)
733    }
734
735    fn lower_shape_list(&mut self, list_head: &Term) -> Vec<ShapeId> {
736        self.g
737            .read_list(list_head)
738            .into_iter()
739            .filter_map(|m| term_to_node(&m))
740            .map(|n| self.lower_shape(&n))
741            .collect()
742    }
743
744    fn parse_shape_path(&mut self, s: &NamedOrBlankNode) -> Option<Path> {
745        let term = self.g.object(s, vocab::SH_PATH)?;
746        match parse_path(self.g, &term) {
747            Ok(p) => Some(p),
748            Err(e) => {
749                self.diag(DiagLevel::Error, format!("invalid sh:path: {e}"), s);
750                None
751            }
752        }
753    }
754
755    fn closed_allowed(&self, s: &NamedOrBlankNode) -> BTreeSet<oxrdf::NamedNode> {
756        let mut q = BTreeSet::new();
757        for prop in self.g.objects(s, vocab::SH_PROPERTY) {
758            if let Some(pn) = term_to_node(&prop)
759                && let Some(Term::NamedNode(n)) = self.g.object(&pn, vocab::SH_PATH)
760            {
761                q.insert(n);
762            }
763        }
764        for ip in self.g.objects(s, vocab::SH_IGNORED_PROPERTIES) {
765            for m in self.g.read_list(&ip) {
766                if let Term::NamedNode(n) = m {
767                    q.insert(n);
768                }
769            }
770        }
771        q
772    }
773
774    fn bool_prop(&self, s: &NamedOrBlankNode, pred: oxrdf::NamedNodeRef) -> bool {
775        matches!(self.g.object(s, pred), Some(Term::Literal(l)) if l.value() == "true")
776    }
777
778    fn int(&self, s: &NamedOrBlankNode, pred: oxrdf::NamedNodeRef) -> Option<u64> {
779        match self.g.object(s, pred) {
780            Some(Term::Literal(l)) => l.value().parse().ok(),
781            _ => None,
782        }
783    }
784
785    fn lit(&self, s: &NamedOrBlankNode, pred: oxrdf::NamedNodeRef) -> Option<Literal> {
786        match self.g.object(s, pred) {
787            Some(Term::Literal(l)) => Some(l),
788            _ => None,
789        }
790    }
791
792    /// Parse a SHACL SPARQL query once, resolving both document prefixes and
793    /// `sh:prefixes` declarations. `Query::to_string` expands prefix names, so
794    /// the IR remains self-contained and can be reparsed or rewritten later.
795    fn canonical_sparql(
796        &mut self,
797        owner: &NamedOrBlankNode,
798        raw: &str,
799        expected: ExpectedQuery,
800    ) -> Option<String> {
801        let (query, canonical) = match canonical_sparql_query(self.g, owner, raw) {
802            Ok(result) => result,
803            Err(message) => {
804                self.diag(DiagLevel::Error, message, owner);
805                return None;
806            }
807        };
808        let actual = match &query {
809            Query::Select { .. } => ExpectedQuery::Select,
810            Query::Ask { .. } => ExpectedQuery::Ask,
811            Query::Construct { .. } => ExpectedQuery::Construct,
812            Query::Describe { .. } => ExpectedQuery::Describe,
813        };
814        if actual != expected {
815            self.diag(
816                DiagLevel::Error,
817                format!("expected SPARQL {expected}, found {actual}"),
818                owner,
819            );
820            return None;
821        }
822        Some(canonical)
823    }
824}
825
826/// Build the canonical, prefix-expanded form of a SHACL SPARQL query string.
827///
828/// Resolves the document base IRI, document-level prefixes, and the
829/// `sh:prefixes` / `sh:declare` chains (following `owl:imports`) declared on
830/// `owner`, parses `raw`, and returns the parsed query together with its
831/// canonical string form. `Query::to_string` expands prefix names, so the
832/// result is self-contained and can be reparsed without external declarations.
833///
834/// Errors are returned as messages so callers can decide how to surface them:
835/// the lowerer routes them to diagnostics; the report validator drops the
836/// offending constraint, matching the lowering path.
837pub fn canonical_sparql_query(
838    g: &Loaded,
839    owner: &NamedOrBlankNode,
840    raw: &str,
841) -> Result<(Query, String), String> {
842    let mut parser = SparqlParser::new();
843    if let Some(base) = &g.base {
844        parser = parser
845            .with_base_iri(base)
846            .map_err(|e| format!("invalid SPARQL base IRI: {e}"))?;
847    }
848    for (prefix, namespace) in &g.prefixes {
849        parser = parser
850            .with_prefix(prefix, namespace)
851            .map_err(|e| format!("invalid SPARQL prefix declaration {prefix}: {e}"))?;
852    }
853    let mut prefix_sources: Vec<NamedOrBlankNode> = g
854        .objects(owner, vocab::SH_PREFIXES)
855        .iter()
856        .filter_map(term_to_node)
857        .collect();
858    let mut seen_sources = HashSet::new();
859    while let Some(source) = prefix_sources.pop() {
860        if !seen_sources.insert(source.clone()) {
861            continue;
862        }
863        prefix_sources.extend(
864            g.objects(&source, vocab::OWL_IMPORTS)
865                .iter()
866                .filter_map(term_to_node),
867        );
868        for declaration_term in g.objects(&source, vocab::SH_DECLARE) {
869            let Some(declaration) = term_to_node(&declaration_term) else {
870                continue;
871            };
872            let (Some(Term::Literal(prefix)), Some(Term::Literal(namespace))) = (
873                g.object(&declaration, vocab::SH_PREFIX),
874                g.object(&declaration, vocab::SH_NAMESPACE),
875            ) else {
876                continue;
877            };
878            parser = parser
879                .with_prefix(prefix.value(), namespace.value())
880                .map_err(|e| format!("invalid SHACL SPARQL prefix declaration: {e}"))?;
881        }
882    }
883    let query = parser
884        .parse_query(raw)
885        .map_err(|e| format!("invalid SPARQL query: {e}"))?;
886    let canonical = query.to_string();
887    Ok((query, canonical))
888}
889
890#[derive(Clone, Copy, PartialEq, Eq)]
891enum ExpectedQuery {
892    Select,
893    Ask,
894    Construct,
895    Describe,
896}
897
898impl std::fmt::Display for ExpectedQuery {
899    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
900        f.write_str(match self {
901            Self::Select => "SELECT",
902            Self::Ask => "ASK",
903            Self::Construct => "CONSTRUCT",
904            Self::Describe => "DESCRIBE",
905        })
906    }
907}
908
909fn class_path() -> Path {
910    Path::seq(vec![
911        Path::Pred(vocab::rdf_type()),
912        Path::star(Path::Pred(vocab::rdfs_subclassof())),
913    ])
914}
915
916fn map_node_kind(term: &Term) -> Option<NodeKindSet> {
917    let Term::NamedNode(n) = term else {
918        return None;
919    };
920    let r = n.as_ref();
921    Some(if r == vocab::SH_IRI {
922        NodeKindSet::IRI
923    } else if r == vocab::SH_BLANK_NODE {
924        NodeKindSet::BLANK_NODE
925    } else if r == vocab::SH_LITERAL {
926        NodeKindSet::LITERAL
927    } else if r == vocab::SH_BLANK_NODE_OR_IRI {
928        NodeKindSet::BLANK_NODE_OR_IRI
929    } else if r == vocab::SH_BLANK_NODE_OR_LITERAL {
930        NodeKindSet::BLANK_NODE_OR_LITERAL
931    } else if r == vocab::SH_IRI_OR_LITERAL {
932        NodeKindSet::IRI_OR_LITERAL
933    } else {
934        return None;
935    })
936}