xsd_schema/xpath/bind.rs
1//! XPath AST binding phase.
2//!
3//! This module provides the `bind_node()` function which performs static analysis
4//! on a parsed XPath AST. During binding:
5//!
6//! - Function calls are resolved to `FunctionId` via the global registry
7//! - Variable references are resolved to slot indices via `NameBinder`
8//! - Namespace prefixes are resolved to namespace URIs
9//! - Name tests are resolved to interned QNames
10//! - Type expressions are resolved to interned atomic type QNames
11//!
12//! Binding must complete successfully before evaluation can proceed.
13
14use crate::namespace::qname::QualifiedName;
15use crate::namespace::table::XS_NAMESPACE;
16use crate::types::{NameTest as ResolvedNameTest, XmlTypeCode};
17use crate::xpath::arena::{AstArena, AstNodeId};
18use crate::xpath::ast::{
19 AstNode, FunctionCallNode, ItemTypeNode, NameTest, NodeTest, OccurrenceIndicator, QName,
20 SequenceTypeNode, TypeExprKind, TypeExprNode,
21};
22use crate::xpath::context::{NameBinder, XPathContext};
23use crate::xpath::error::XPathError;
24use crate::xpath::XPathMode;
25
26/// Bind an AST node and all its children.
27///
28/// This function performs static analysis on the AST:
29/// - Resolves function calls to `FunctionId`
30/// - Resolves variable references to slot indices
31/// - Validates namespace prefixes
32///
33/// # Arguments
34/// * `arena` - The AST arena containing all nodes
35/// * `id` - The ID of the node to bind
36/// * `ctx` - The static context for namespace resolution
37/// * `binder` - The name binder for variable slot allocation
38///
39/// # Returns
40/// * `Ok(())` if binding succeeds
41/// * `Err(XPathError)` with appropriate error code if binding fails
42///
43/// # Errors
44/// * `XPST0081` - Undefined namespace prefix
45/// * `XPST0017` - Function not found
46/// * `XPST0008` - Undefined variable
47pub fn bind_node(
48 arena: &mut AstArena,
49 id: AstNodeId,
50 ctx: &XPathContext<'_>,
51 binder: &mut NameBinder,
52) -> Result<(), XPathError> {
53 // Clone the node to avoid borrow conflicts
54 let node = arena.get(id).clone();
55
56 match node {
57 AstNode::Expr(expr) => {
58 // Bind all items in the expression sequence
59 for item_id in &expr.items {
60 bind_node(arena, *item_id, ctx, binder)?;
61 }
62 }
63
64 AstNode::Value(_) => {
65 // Literal values need no binding
66 }
67
68 AstNode::ContextItem(_) => {
69 // Context item needs no binding
70 }
71
72 AstNode::VarRef(var_ref) => {
73 // Resolve the variable to a slot
74 let name = resolve_var_qname(&var_ref.prefix, &var_ref.local_name, ctx)?;
75 let var = binder.resolve_with_names(&name, ctx.names)?;
76
77 // Update the node with the resolved slot
78 if let AstNode::VarRef(ref mut node) = arena.get_mut(id) {
79 node.slot = Some(var.slot);
80 }
81 }
82
83 AstNode::If(if_node) => {
84 // Bind test, then, and else branches
85 bind_node(arena, if_node.test, ctx, binder)?;
86 bind_node(arena, if_node.then_branch, ctx, binder)?;
87 bind_node(arena, if_node.else_branch, ctx, binder)?;
88 }
89
90 AstNode::For(for_node) => {
91 // For expressions introduce variables into scope
92 // Each binding's in_expr is evaluated in the outer scope,
93 // then the variable is pushed for the next binding and return_expr
94 for binding_idx in 0..for_node.bindings.len() {
95 let binding = &for_node.bindings[binding_idx];
96 // Bind the in_expr in current scope
97 bind_node(arena, binding.in_expr, ctx, binder)?;
98
99 // Push the variable into scope
100 let name = resolve_var_qname(&binding.prefix, &binding.local_name, ctx)?;
101 let var = binder.push_var(name);
102
103 // Update the binding with the resolved slot (by index, not name)
104 if let AstNode::For(ref mut node) = arena.get_mut(id) {
105 node.bindings[binding_idx].slot = Some(var.slot);
106 }
107 }
108
109 // Bind the return expression with all variables in scope
110 bind_node(arena, for_node.return_expr, ctx, binder)?;
111
112 // Pop all the variables (in reverse order)
113 for _ in &for_node.bindings {
114 binder.pop_var();
115 }
116 }
117
118 AstNode::Quantified(quant_node) => {
119 // Similar to for expressions
120 for binding_idx in 0..quant_node.bindings.len() {
121 let binding = &quant_node.bindings[binding_idx];
122 bind_node(arena, binding.in_expr, ctx, binder)?;
123
124 let name = resolve_var_qname(&binding.prefix, &binding.local_name, ctx)?;
125 let var = binder.push_var(name);
126
127 if let AstNode::Quantified(ref mut node) = arena.get_mut(id) {
128 node.bindings[binding_idx].slot = Some(var.slot);
129 }
130 }
131
132 bind_node(arena, quant_node.satisfies, ctx, binder)?;
133
134 for _ in &quant_node.bindings {
135 binder.pop_var();
136 }
137 }
138
139 AstNode::FunctionCall(func_call) => {
140 // First bind all argument expressions
141 for arg_id in &func_call.args {
142 bind_node(arena, *arg_id, ctx, binder)?;
143 }
144
145 // Resolve the function namespace
146 let namespace = if func_call.prefix.is_empty() {
147 // Empty prefix -> use default function namespace
148 ctx.default_function_namespace().to_string()
149 } else {
150 // Resolve the prefix to a namespace URI
151 ctx.resolve_prefix(&func_call.prefix)
152 .ok_or_else(|| XPathError::undefined_prefix(&func_call.prefix))?
153 .to_string()
154 };
155
156 // Check if this is an XPath 2.0 constructor function (e.g. xs:integer(...))
157 if let Some(type_expr) = try_bind_constructor_function(&func_call, &namespace, ctx)? {
158 *arena.get_mut(id) = AstNode::TypeExpr(type_expr);
159 return Ok(());
160 }
161
162 // Look up the function via the catalog (supports custom functions)
163 let arity = func_call.args.len();
164 let handle = ctx
165 .function_catalog()
166 .lookup(&namespace, &func_call.local_name, arity)
167 .ok_or_else(|| {
168 XPathError::function_not_found(&func_call.local_name, arity, &namespace)
169 })?;
170
171 // Store the resolved function handle
172 if let AstNode::FunctionCall(ref mut node) = arena.get_mut(id) {
173 node.function_handle = Some(handle);
174 }
175 }
176
177 AstNode::PathExpr(path_expr) => {
178 // Bind all steps in the path
179 for step_id in &path_expr.steps {
180 bind_node(arena, *step_id, ctx, binder)?;
181 }
182 }
183
184 AstNode::FilterExpr(filter_expr) => {
185 // Bind the base expression and all predicates
186 bind_node(arena, filter_expr.base, ctx, binder)?;
187 for pred_id in &filter_expr.predicates {
188 bind_node(arena, *pred_id, ctx, binder)?;
189 }
190 }
191
192 AstNode::Range(range_node) => {
193 bind_node(arena, range_node.start, ctx, binder)?;
194 bind_node(arena, range_node.end, ctx, binder)?;
195 }
196
197 AstNode::UnaryOp(unary_op) => {
198 bind_node(arena, unary_op.operand, ctx, binder)?;
199 }
200
201 AstNode::BinaryOp(binary_op) => {
202 bind_node(arena, binary_op.left, ctx, binder)?;
203 bind_node(arena, binary_op.right, ctx, binder)?;
204 }
205
206 AstNode::PathStep(path_step) => {
207 // Bind predicates in the step
208 for pred_id in &path_step.predicates {
209 bind_node(arena, *pred_id, ctx, binder)?;
210 }
211
212 // Resolve the name test if present.
213 //
214 // The default-element-namespace only applies to element name
215 // tests. For attribute and namespace axes the expanded QName
216 // of an unprefixed local name is always (no-namespace, local).
217 // §2.5.2 of the XPath 2.0 spec — see also W3C cta0044, where
218 // a schema with `xpathDefaultNamespace="abc"` declaring
219 // unqualified attributes would otherwise see `@switch`
220 // resolved into the default namespace and miss the actual
221 // unprefixed attribute on the instance.
222 let axis = path_step.axis;
223 let is_attribute_axis = matches!(
224 axis,
225 super::ast::Axis::Attribute | super::ast::Axis::Namespace
226 );
227 let resolved = resolve_node_test_with_axis(&path_step.test, ctx, is_attribute_axis)?;
228 if let AstNode::PathStep(ref mut node) = arena.get_mut(id) {
229 node.resolved_test = resolved;
230 }
231 }
232
233 AstNode::TypeExpr(type_expr) => {
234 bind_node(arena, type_expr.operand, ctx, binder)?;
235
236 // Resolve atomic type QName if present
237 if let Some(ItemTypeNode::Atomic(ref qname)) = type_expr.target_type.item_type {
238 let resolved = resolve_atomic_type_qname(qname, ctx)?;
239 if let AstNode::TypeExpr(ref mut node) = arena.get_mut(id) {
240 node.resolved_atomic_type = Some(resolved);
241 }
242 }
243 }
244 }
245
246 Ok(())
247}
248
249/// Resolve a variable QName from prefix and local name.
250///
251/// This function interns the local name and prefix into the NameTable using `add()`,
252/// which always succeeds (returning existing NameId or creating new one).
253/// Only namespace prefix resolution can fail if the prefix is not bound.
254fn resolve_var_qname(
255 prefix: &str,
256 local_name: &str,
257 ctx: &XPathContext<'_>,
258) -> Result<QualifiedName, XPathError> {
259 // Intern the local name (always succeeds)
260 let local_id = ctx.names.add(local_name);
261
262 if prefix.is_empty() {
263 Ok(QualifiedName::local(local_id))
264 } else {
265 // Intern the prefix
266 let prefix_id = ctx.names.add(prefix);
267
268 // Resolve prefix to namespace - THIS can still fail legitimately
269 let ns_id = ctx
270 .resolve_prefix_id(prefix_id)
271 .ok_or_else(|| XPathError::undefined_prefix(prefix))?;
272
273 Ok(QualifiedName::new(Some(ns_id), local_id, Some(prefix_id)))
274 }
275}
276
277/// Resolve a NodeTest to a ResolvedNameTest with axis-aware namespace
278/// handling. When `is_attribute_axis` is true (attribute or namespace
279/// axis), an unprefixed local name resolves to the no-namespace QName
280/// instead of using the default-element-namespace.
281fn resolve_node_test_with_axis(
282 test: &NodeTest,
283 ctx: &XPathContext<'_>,
284 is_attribute_axis: bool,
285) -> Result<Option<ResolvedNameTest>, XPathError> {
286 match test {
287 NodeTest::Name(name_test) => {
288 let resolved = resolve_name_test_with_axis(name_test, ctx, is_attribute_axis)?;
289 Ok(Some(resolved))
290 }
291 NodeTest::Kind(_) => {
292 // Kind tests (node(), text(), element(), etc.) don't need name resolution
293 // The QNames inside element()/attribute() tests could be resolved,
294 // but that's handled separately during evaluation
295 Ok(None)
296 }
297 }
298}
299
300/// Resolve an AST-level NameTest to a type-system NameTest with interned names.
301///
302/// Handles all wildcard patterns:
303/// - `*` -> Wildcard
304/// - `prefix:*` -> LocalWildcard (namespace URI)
305/// - `*:local` -> NamespaceWildcard (local name)
306/// - `prefix:local` or `local` -> QName
307fn resolve_name_test_with_axis(
308 name_test: &NameTest,
309 ctx: &XPathContext<'_>,
310 is_attribute_axis: bool,
311) -> Result<ResolvedNameTest, XPathError> {
312 match (&name_test.prefix, &name_test.local_name) {
313 // * - wildcard matches any name
314 (None, None) => Ok(ResolvedNameTest::Wildcard),
315
316 // *:local - any namespace with specific local name
317 (None, Some(local)) => {
318 let local_id = ctx.names.add(local);
319 Ok(ResolvedNameTest::NamespaceWildcard(local_id))
320 }
321
322 // prefix:* - any local name in namespace
323 (Some(prefix), None) => {
324 if prefix.is_empty() {
325 // Empty prefix with wildcard local. For element axes
326 // the default-element-namespace applies; for attribute
327 // and namespace axes an unprefixed name is always in
328 // no namespace per XPath 2.0 §2.5.2.
329 if !is_attribute_axis {
330 if let Some(ns_id) = ctx.default_element_ns {
331 return Ok(ResolvedNameTest::LocalWildcard(ns_id));
332 }
333 }
334 let empty_ns = ctx.names.add("");
335 Ok(ResolvedNameTest::LocalWildcard(empty_ns))
336 } else {
337 let prefix_id = ctx.names.add(prefix);
338 let ns_id = ctx
339 .resolve_prefix_id(prefix_id)
340 .ok_or_else(|| XPathError::undefined_prefix(prefix))?;
341 Ok(ResolvedNameTest::LocalWildcard(ns_id))
342 }
343 }
344
345 // prefix:local - specific QName
346 (Some(prefix), Some(local)) => {
347 let local_id = ctx.names.add(local);
348 if prefix.is_empty() {
349 // Unprefixed local name. Element axes pick up the
350 // default-element-namespace; attribute and namespace
351 // axes always resolve to no namespace per XPath 2.0
352 // §2.5.2 (W3C cta0044 regression).
353 let ns_id = if is_attribute_axis {
354 None
355 } else {
356 ctx.default_element_ns
357 };
358 Ok(ResolvedNameTest::QName(QualifiedName::new(
359 ns_id, local_id, None,
360 )))
361 } else {
362 let prefix_id = ctx.names.add(prefix);
363 let ns_id = ctx
364 .resolve_prefix_id(prefix_id)
365 .ok_or_else(|| XPathError::undefined_prefix(prefix))?;
366 Ok(ResolvedNameTest::QName(QualifiedName::new(
367 Some(ns_id),
368 local_id,
369 Some(prefix_id),
370 )))
371 }
372 }
373 }
374}
375
376/// Resolve an atomic type QName (e.g., xs:integer) to interned form.
377///
378/// Atomic types use the XML Schema namespace by default when unprefixed.
379fn resolve_atomic_type_qname(
380 qname: &QName,
381 ctx: &XPathContext<'_>,
382) -> Result<QualifiedName, XPathError> {
383 let local_id = ctx.names.add(&qname.local);
384
385 if qname.prefix.is_empty() {
386 // Unprefixed atomic types: in XPath 2.0, unprefixed type names in
387 // cast/instance-of use the default element namespace, not xs:
388 // But for compatibility, many implementations treat them as xs: types
389 // Use default element namespace if set, otherwise no namespace
390 let ns_id = ctx.default_element_ns;
391 Ok(QualifiedName::new(ns_id, local_id, None))
392 } else {
393 let prefix_id = ctx.names.add(&qname.prefix);
394 let ns_id = ctx
395 .resolve_prefix_id(prefix_id)
396 .ok_or_else(|| XPathError::undefined_prefix(&qname.prefix))?;
397 Ok(QualifiedName::new(Some(ns_id), local_id, Some(prefix_id)))
398 }
399}
400
401/// Try to bind a function call as an XPath 2.0 constructor function.
402///
403/// Constructor functions allow XML Schema type names (e.g. `xs:integer`, `xs:date`)
404/// to be used as single-argument function calls for type casting. For example,
405/// `xs:unsignedShort(42)` is equivalent to `42 cast as xs:unsignedShort`.
406///
407/// Returns `Ok(Some(type_expr))` if this is a valid constructor function,
408/// `Ok(None)` if it's not a constructor (fall through to normal lookup),
409/// or `Err` for invalid usage (e.g. `xs:NOTATION(...)` is XPST0051).
410fn try_bind_constructor_function(
411 func_call: &FunctionCallNode,
412 namespace: &str,
413 ctx: &XPathContext<'_>,
414) -> Result<Option<TypeExprNode>, XPathError> {
415 // Constructor functions are an XPath 2.0 feature
416 if ctx.mode() != XPathMode::XPath20 {
417 return Ok(None);
418 }
419
420 // Must be in the XML Schema namespace with exactly 1 argument
421 if namespace != XS_NAMESPACE || func_call.args.len() != 1 {
422 return Ok(None);
423 }
424
425 // Check if the local name matches a known XSD type
426 let type_code = match XmlTypeCode::from_local_name(&func_call.local_name) {
427 Some(tc) => tc,
428 None => return Ok(None),
429 };
430
431 // NOTATION is not allowed as a constructor target (XPST0051)
432 if type_code == XmlTypeCode::Notation {
433 return Err(XPathError::unknown_type(&func_call.local_name));
434 }
435
436 // List types and abstract types are not constructor functions —
437 // fall through to normal function lookup (which will produce XPST0017)
438 if type_code.is_list() || matches!(type_code, XmlTypeCode::AnyType | XmlTypeCode::AnySimpleType)
439 {
440 return Ok(None);
441 }
442
443 // Build a CastAs type expression
444 // Per XPath 2.0 spec B.1, constructor functions are xs:TYPE($arg as xs:anyAtomicType?) as xs:TYPE?
445 // so they use ZeroOrOne occurrence (equivalent to "cast as xs:TYPE?")
446 let qname = QName {
447 prefix: func_call.prefix.clone(),
448 local: func_call.local_name.clone(),
449 };
450 let target_type = SequenceTypeNode::single(
451 ItemTypeNode::Atomic(qname.clone()),
452 OccurrenceIndicator::ZeroOrOne,
453 func_call.span,
454 );
455 let mut type_expr = TypeExprNode::new(
456 TypeExprKind::CastAs,
457 func_call.args[0],
458 target_type,
459 func_call.span,
460 );
461
462 // Eagerly resolve the atomic type QName
463 let resolved = resolve_atomic_type_qname(&qname, ctx)?;
464 type_expr.resolved_atomic_type = Some(resolved);
465
466 Ok(Some(type_expr))
467}
468
469#[cfg(test)]
470#[path = "bind_tests.rs"]
471mod bind_tests;