Skip to main content

xsd_schema/xpath/
bind.rs

1//! XPath AST binding phase.
2//!
3//! This module provides the `bind_node()` function which performs static analysis
4//! on a parsed XPath AST. During binding:
5//!
6//! - Function calls are resolved to `FunctionId` via the global registry
7//! - Variable references are resolved to slot indices via `NameBinder`
8//! - Namespace prefixes are resolved to namespace URIs
9//! - Name tests are resolved to interned QNames
10//! - Type expressions are resolved to interned atomic type QNames
11//!
12//! Binding must complete successfully before evaluation can proceed.
13
14use crate::namespace::qname::QualifiedName;
15use crate::namespace::table::XS_NAMESPACE;
16use crate::types::{NameTest as ResolvedNameTest, XmlTypeCode};
17use crate::xpath::arena::{AstArena, AstNodeId};
18use crate::xpath::ast::{
19    AstNode, FunctionCallNode, ItemTypeNode, NameTest, NodeTest, OccurrenceIndicator, QName,
20    SequenceTypeNode, TypeExprKind, TypeExprNode,
21};
22use crate::xpath::context::{NameBinder, XPathContext};
23use crate::xpath::error::XPathError;
24use crate::xpath::XPathMode;
25
26/// Bind an AST node and all its children.
27///
28/// This function performs static analysis on the AST:
29/// - Resolves function calls to `FunctionId`
30/// - Resolves variable references to slot indices
31/// - Validates namespace prefixes
32///
33/// # Arguments
34/// * `arena` - The AST arena containing all nodes
35/// * `id` - The ID of the node to bind
36/// * `ctx` - The static context for namespace resolution
37/// * `binder` - The name binder for variable slot allocation
38///
39/// # Returns
40/// * `Ok(())` if binding succeeds
41/// * `Err(XPathError)` with appropriate error code if binding fails
42///
43/// # Errors
44/// * `XPST0081` - Undefined namespace prefix
45/// * `XPST0017` - Function not found
46/// * `XPST0008` - Undefined variable
47pub fn bind_node(
48    arena: &mut AstArena,
49    id: AstNodeId,
50    ctx: &XPathContext<'_>,
51    binder: &mut NameBinder,
52) -> Result<(), XPathError> {
53    // Clone the node to avoid borrow conflicts
54    let node = arena.get(id).clone();
55
56    match node {
57        AstNode::Expr(expr) => {
58            // Bind all items in the expression sequence
59            for item_id in &expr.items {
60                bind_node(arena, *item_id, ctx, binder)?;
61            }
62        }
63
64        AstNode::Value(_) => {
65            // Literal values need no binding
66        }
67
68        AstNode::ContextItem(_) => {
69            // Context item needs no binding
70        }
71
72        AstNode::VarRef(var_ref) => {
73            // Resolve the variable to a slot
74            let name = resolve_var_qname(&var_ref.prefix, &var_ref.local_name, ctx)?;
75            let var = binder.resolve_with_names(&name, ctx.names)?;
76
77            // Update the node with the resolved slot
78            if let AstNode::VarRef(ref mut node) = arena.get_mut(id) {
79                node.slot = Some(var.slot);
80            }
81        }
82
83        AstNode::If(if_node) => {
84            // Bind test, then, and else branches
85            bind_node(arena, if_node.test, ctx, binder)?;
86            bind_node(arena, if_node.then_branch, ctx, binder)?;
87            bind_node(arena, if_node.else_branch, ctx, binder)?;
88        }
89
90        AstNode::For(for_node) => {
91            // For expressions introduce variables into scope
92            // Each binding's in_expr is evaluated in the outer scope,
93            // then the variable is pushed for the next binding and return_expr
94            for binding_idx in 0..for_node.bindings.len() {
95                let binding = &for_node.bindings[binding_idx];
96                // Bind the in_expr in current scope
97                bind_node(arena, binding.in_expr, ctx, binder)?;
98
99                // Push the variable into scope
100                let name = resolve_var_qname(&binding.prefix, &binding.local_name, ctx)?;
101                let var = binder.push_var(name);
102
103                // Update the binding with the resolved slot (by index, not name)
104                if let AstNode::For(ref mut node) = arena.get_mut(id) {
105                    node.bindings[binding_idx].slot = Some(var.slot);
106                }
107            }
108
109            // Bind the return expression with all variables in scope
110            bind_node(arena, for_node.return_expr, ctx, binder)?;
111
112            // Pop all the variables (in reverse order)
113            for _ in &for_node.bindings {
114                binder.pop_var();
115            }
116        }
117
118        AstNode::Quantified(quant_node) => {
119            // Similar to for expressions
120            for binding_idx in 0..quant_node.bindings.len() {
121                let binding = &quant_node.bindings[binding_idx];
122                bind_node(arena, binding.in_expr, ctx, binder)?;
123
124                let name = resolve_var_qname(&binding.prefix, &binding.local_name, ctx)?;
125                let var = binder.push_var(name);
126
127                if let AstNode::Quantified(ref mut node) = arena.get_mut(id) {
128                    node.bindings[binding_idx].slot = Some(var.slot);
129                }
130            }
131
132            bind_node(arena, quant_node.satisfies, ctx, binder)?;
133
134            for _ in &quant_node.bindings {
135                binder.pop_var();
136            }
137        }
138
139        AstNode::FunctionCall(func_call) => {
140            // First bind all argument expressions
141            for arg_id in &func_call.args {
142                bind_node(arena, *arg_id, ctx, binder)?;
143            }
144
145            // Resolve the function namespace
146            let namespace = if func_call.prefix.is_empty() {
147                // Empty prefix -> use default function namespace
148                ctx.default_function_namespace().to_string()
149            } else {
150                // Resolve the prefix to a namespace URI
151                ctx.resolve_prefix(&func_call.prefix)
152                    .ok_or_else(|| XPathError::undefined_prefix(&func_call.prefix))?
153                    .to_string()
154            };
155
156            // Check if this is an XPath 2.0 constructor function (e.g. xs:integer(...))
157            if let Some(type_expr) = try_bind_constructor_function(&func_call, &namespace, ctx)? {
158                *arena.get_mut(id) = AstNode::TypeExpr(type_expr);
159                return Ok(());
160            }
161
162            // Look up the function via the catalog (supports custom functions)
163            let arity = func_call.args.len();
164            let handle = ctx
165                .function_catalog()
166                .lookup(&namespace, &func_call.local_name, arity)
167                .ok_or_else(|| {
168                    XPathError::function_not_found(&func_call.local_name, arity, &namespace)
169                })?;
170
171            // Store the resolved function handle
172            if let AstNode::FunctionCall(ref mut node) = arena.get_mut(id) {
173                node.function_handle = Some(handle);
174            }
175        }
176
177        AstNode::PathExpr(path_expr) => {
178            // Bind all steps in the path
179            for step_id in &path_expr.steps {
180                bind_node(arena, *step_id, ctx, binder)?;
181            }
182        }
183
184        AstNode::FilterExpr(filter_expr) => {
185            // Bind the base expression and all predicates
186            bind_node(arena, filter_expr.base, ctx, binder)?;
187            for pred_id in &filter_expr.predicates {
188                bind_node(arena, *pred_id, ctx, binder)?;
189            }
190        }
191
192        AstNode::Range(range_node) => {
193            bind_node(arena, range_node.start, ctx, binder)?;
194            bind_node(arena, range_node.end, ctx, binder)?;
195        }
196
197        AstNode::UnaryOp(unary_op) => {
198            bind_node(arena, unary_op.operand, ctx, binder)?;
199        }
200
201        AstNode::BinaryOp(binary_op) => {
202            bind_node(arena, binary_op.left, ctx, binder)?;
203            bind_node(arena, binary_op.right, ctx, binder)?;
204        }
205
206        AstNode::PathStep(path_step) => {
207            // Bind predicates in the step
208            for pred_id in &path_step.predicates {
209                bind_node(arena, *pred_id, ctx, binder)?;
210            }
211
212            // Resolve the name test if present.
213            //
214            // The default-element-namespace only applies to element name
215            // tests. For attribute and namespace axes the expanded QName
216            // of an unprefixed local name is always (no-namespace, local).
217            // §2.5.2 of the XPath 2.0 spec — see also W3C cta0044, where
218            // a schema with `xpathDefaultNamespace="abc"` declaring
219            // unqualified attributes would otherwise see `@switch`
220            // resolved into the default namespace and miss the actual
221            // unprefixed attribute on the instance.
222            let axis = path_step.axis;
223            let is_attribute_axis = matches!(
224                axis,
225                super::ast::Axis::Attribute | super::ast::Axis::Namespace
226            );
227            let resolved = resolve_node_test_with_axis(&path_step.test, ctx, is_attribute_axis)?;
228            if let AstNode::PathStep(ref mut node) = arena.get_mut(id) {
229                node.resolved_test = resolved;
230            }
231        }
232
233        AstNode::TypeExpr(type_expr) => {
234            bind_node(arena, type_expr.operand, ctx, binder)?;
235
236            // Resolve atomic type QName if present
237            if let Some(ItemTypeNode::Atomic(ref qname)) = type_expr.target_type.item_type {
238                let resolved = resolve_atomic_type_qname(qname, ctx)?;
239                if let AstNode::TypeExpr(ref mut node) = arena.get_mut(id) {
240                    node.resolved_atomic_type = Some(resolved);
241                }
242            }
243        }
244    }
245
246    Ok(())
247}
248
249/// Resolve a variable QName from prefix and local name.
250///
251/// This function interns the local name and prefix into the NameTable using `add()`,
252/// which always succeeds (returning existing NameId or creating new one).
253/// Only namespace prefix resolution can fail if the prefix is not bound.
254fn resolve_var_qname(
255    prefix: &str,
256    local_name: &str,
257    ctx: &XPathContext<'_>,
258) -> Result<QualifiedName, XPathError> {
259    // Intern the local name (always succeeds)
260    let local_id = ctx.names.add(local_name);
261
262    if prefix.is_empty() {
263        Ok(QualifiedName::local(local_id))
264    } else {
265        // Intern the prefix
266        let prefix_id = ctx.names.add(prefix);
267
268        // Resolve prefix to namespace - THIS can still fail legitimately
269        let ns_id = ctx
270            .resolve_prefix_id(prefix_id)
271            .ok_or_else(|| XPathError::undefined_prefix(prefix))?;
272
273        Ok(QualifiedName::new(Some(ns_id), local_id, Some(prefix_id)))
274    }
275}
276
277/// Resolve a NodeTest to a ResolvedNameTest with axis-aware namespace
278/// handling. When `is_attribute_axis` is true (attribute or namespace
279/// axis), an unprefixed local name resolves to the no-namespace QName
280/// instead of using the default-element-namespace.
281fn resolve_node_test_with_axis(
282    test: &NodeTest,
283    ctx: &XPathContext<'_>,
284    is_attribute_axis: bool,
285) -> Result<Option<ResolvedNameTest>, XPathError> {
286    match test {
287        NodeTest::Name(name_test) => {
288            let resolved = resolve_name_test_with_axis(name_test, ctx, is_attribute_axis)?;
289            Ok(Some(resolved))
290        }
291        NodeTest::Kind(_) => {
292            // Kind tests (node(), text(), element(), etc.) don't need name resolution
293            // The QNames inside element()/attribute() tests could be resolved,
294            // but that's handled separately during evaluation
295            Ok(None)
296        }
297    }
298}
299
300/// Resolve an AST-level NameTest to a type-system NameTest with interned names.
301///
302/// Handles all wildcard patterns:
303/// - `*` -> Wildcard
304/// - `prefix:*` -> LocalWildcard (namespace URI)
305/// - `*:local` -> NamespaceWildcard (local name)
306/// - `prefix:local` or `local` -> QName
307fn resolve_name_test_with_axis(
308    name_test: &NameTest,
309    ctx: &XPathContext<'_>,
310    is_attribute_axis: bool,
311) -> Result<ResolvedNameTest, XPathError> {
312    match (&name_test.prefix, &name_test.local_name) {
313        // * - wildcard matches any name
314        (None, None) => Ok(ResolvedNameTest::Wildcard),
315
316        // *:local - any namespace with specific local name
317        (None, Some(local)) => {
318            let local_id = ctx.names.add(local);
319            Ok(ResolvedNameTest::NamespaceWildcard(local_id))
320        }
321
322        // prefix:* - any local name in namespace
323        (Some(prefix), None) => {
324            if prefix.is_empty() {
325                // Empty prefix with wildcard local. For element axes
326                // the default-element-namespace applies; for attribute
327                // and namespace axes an unprefixed name is always in
328                // no namespace per XPath 2.0 §2.5.2.
329                if !is_attribute_axis {
330                    if let Some(ns_id) = ctx.default_element_ns {
331                        return Ok(ResolvedNameTest::LocalWildcard(ns_id));
332                    }
333                }
334                let empty_ns = ctx.names.add("");
335                Ok(ResolvedNameTest::LocalWildcard(empty_ns))
336            } else {
337                let prefix_id = ctx.names.add(prefix);
338                let ns_id = ctx
339                    .resolve_prefix_id(prefix_id)
340                    .ok_or_else(|| XPathError::undefined_prefix(prefix))?;
341                Ok(ResolvedNameTest::LocalWildcard(ns_id))
342            }
343        }
344
345        // prefix:local - specific QName
346        (Some(prefix), Some(local)) => {
347            let local_id = ctx.names.add(local);
348            if prefix.is_empty() {
349                // Unprefixed local name. Element axes pick up the
350                // default-element-namespace; attribute and namespace
351                // axes always resolve to no namespace per XPath 2.0
352                // §2.5.2 (W3C cta0044 regression).
353                let ns_id = if is_attribute_axis {
354                    None
355                } else {
356                    ctx.default_element_ns
357                };
358                Ok(ResolvedNameTest::QName(QualifiedName::new(
359                    ns_id, local_id, None,
360                )))
361            } else {
362                let prefix_id = ctx.names.add(prefix);
363                let ns_id = ctx
364                    .resolve_prefix_id(prefix_id)
365                    .ok_or_else(|| XPathError::undefined_prefix(prefix))?;
366                Ok(ResolvedNameTest::QName(QualifiedName::new(
367                    Some(ns_id),
368                    local_id,
369                    Some(prefix_id),
370                )))
371            }
372        }
373    }
374}
375
376/// Resolve an atomic type QName (e.g., xs:integer) to interned form.
377///
378/// Atomic types use the XML Schema namespace by default when unprefixed.
379fn resolve_atomic_type_qname(
380    qname: &QName,
381    ctx: &XPathContext<'_>,
382) -> Result<QualifiedName, XPathError> {
383    let local_id = ctx.names.add(&qname.local);
384
385    if qname.prefix.is_empty() {
386        // Unprefixed atomic types: in XPath 2.0, unprefixed type names in
387        // cast/instance-of use the default element namespace, not xs:
388        // But for compatibility, many implementations treat them as xs: types
389        // Use default element namespace if set, otherwise no namespace
390        let ns_id = ctx.default_element_ns;
391        Ok(QualifiedName::new(ns_id, local_id, None))
392    } else {
393        let prefix_id = ctx.names.add(&qname.prefix);
394        let ns_id = ctx
395            .resolve_prefix_id(prefix_id)
396            .ok_or_else(|| XPathError::undefined_prefix(&qname.prefix))?;
397        Ok(QualifiedName::new(Some(ns_id), local_id, Some(prefix_id)))
398    }
399}
400
401/// Try to bind a function call as an XPath 2.0 constructor function.
402///
403/// Constructor functions allow XML Schema type names (e.g. `xs:integer`, `xs:date`)
404/// to be used as single-argument function calls for type casting. For example,
405/// `xs:unsignedShort(42)` is equivalent to `42 cast as xs:unsignedShort`.
406///
407/// Returns `Ok(Some(type_expr))` if this is a valid constructor function,
408/// `Ok(None)` if it's not a constructor (fall through to normal lookup),
409/// or `Err` for invalid usage (e.g. `xs:NOTATION(...)` is XPST0051).
410fn try_bind_constructor_function(
411    func_call: &FunctionCallNode,
412    namespace: &str,
413    ctx: &XPathContext<'_>,
414) -> Result<Option<TypeExprNode>, XPathError> {
415    // Constructor functions are an XPath 2.0 feature
416    if ctx.mode() != XPathMode::XPath20 {
417        return Ok(None);
418    }
419
420    // Must be in the XML Schema namespace with exactly 1 argument
421    if namespace != XS_NAMESPACE || func_call.args.len() != 1 {
422        return Ok(None);
423    }
424
425    // Check if the local name matches a known XSD type
426    let type_code = match XmlTypeCode::from_local_name(&func_call.local_name) {
427        Some(tc) => tc,
428        None => return Ok(None),
429    };
430
431    // NOTATION is not allowed as a constructor target (XPST0051)
432    if type_code == XmlTypeCode::Notation {
433        return Err(XPathError::unknown_type(&func_call.local_name));
434    }
435
436    // List types and abstract types are not constructor functions —
437    // fall through to normal function lookup (which will produce XPST0017)
438    if type_code.is_list() || matches!(type_code, XmlTypeCode::AnyType | XmlTypeCode::AnySimpleType)
439    {
440        return Ok(None);
441    }
442
443    // Build a CastAs type expression
444    // Per XPath 2.0 spec B.1, constructor functions are xs:TYPE($arg as xs:anyAtomicType?) as xs:TYPE?
445    // so they use ZeroOrOne occurrence (equivalent to "cast as xs:TYPE?")
446    let qname = QName {
447        prefix: func_call.prefix.clone(),
448        local: func_call.local_name.clone(),
449    };
450    let target_type = SequenceTypeNode::single(
451        ItemTypeNode::Atomic(qname.clone()),
452        OccurrenceIndicator::ZeroOrOne,
453        func_call.span,
454    );
455    let mut type_expr = TypeExprNode::new(
456        TypeExprKind::CastAs,
457        func_call.args[0],
458        target_type,
459        func_call.span,
460    );
461
462    // Eagerly resolve the atomic type QName
463    let resolved = resolve_atomic_type_qname(&qname, ctx)?;
464    type_expr.resolved_atomic_type = Some(resolved);
465
466    Ok(Some(type_expr))
467}
468
469#[cfg(test)]
470#[path = "bind_tests.rs"]
471mod bind_tests;