Skip to main content

xsd_schema/xpath/
parser.rs

1//! XPath 2.0 Parser module.
2//!
3//! This module provides the public API for parsing XPath 2.0 expressions.
4//! It uses the LALRPOP-generated parser with a custom stateful lexer.
5
6use crate::xpath::arena::{AstArena, AstNodeId, SourceSpan};
7use crate::xpath::ast::AstNode;
8use crate::xpath::error::XPathError;
9use crate::xpath::lexer::{Lexer, LexerError, Token};
10use crate::xpath::{XPathMode, XPathParseOptions};
11use std::fmt;
12
13// The LALRPOP-generated parser.
14// This uses the lalrpop_mod! macro to include the generated code.
15// The grammar is defined in src/xpath/parser.lalrpop
16lalrpop_util::lalrpop_mod!(
17    #[allow(clippy::all)]
18    #[allow(unused)]
19    #[allow(dead_code)]
20    pub xpath_grammar,
21    "/xpath/parser.rs"
22);
23
24/// Error type for XPath parsing.
25#[derive(Debug, Clone)]
26pub enum ParseError {
27    /// Lexer error (tokenization failed).
28    Lexer(LexerError),
29    /// Parser error (grammar mismatch).
30    Parser {
31        message: String,
32        location: Option<usize>,
33    },
34    /// Unexpected end of input.
35    UnexpectedEof,
36}
37
38impl fmt::Display for ParseError {
39    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40        match self {
41            ParseError::Lexer(e) => write!(f, "Lexer error: {}", e),
42            ParseError::Parser { message, location } => {
43                if let Some(loc) = location {
44                    write!(f, "Parse error at position {}: {}", loc, message)
45                } else {
46                    write!(f, "Parse error: {}", message)
47                }
48            }
49            ParseError::UnexpectedEof => write!(f, "Unexpected end of input"),
50        }
51    }
52}
53
54impl std::error::Error for ParseError {}
55
56impl From<LexerError> for ParseError {
57    fn from(e: LexerError) -> Self {
58        ParseError::Lexer(e)
59    }
60}
61
62/// Map a LALRPOP parse error to our `ParseError` type.
63fn map_lalrpop_error(e: lalrpop_util::ParseError<usize, Token, LexerError>) -> ParseError {
64    match e {
65        lalrpop_util::ParseError::InvalidToken { location } => ParseError::Parser {
66            message: "Invalid token".to_string(),
67            location: Some(location),
68        },
69        lalrpop_util::ParseError::UnrecognizedEof { location, expected } => ParseError::Parser {
70            message: format!("Unexpected end of input, expected one of: {:?}", expected),
71            location: Some(location),
72        },
73        lalrpop_util::ParseError::UnrecognizedToken { token, expected } => ParseError::Parser {
74            message: format!(
75                "Unexpected token {:?}, expected one of: {:?}",
76                token.1, expected
77            ),
78            location: Some(token.0),
79        },
80        lalrpop_util::ParseError::ExtraToken { token } => ParseError::Parser {
81            message: format!("Extra token: {:?}", token.1),
82            location: Some(token.0),
83        },
84        lalrpop_util::ParseError::User { error } => ParseError::Lexer(error),
85    }
86}
87
88/// Result of parsing an XPath expression.
89#[derive(Debug)]
90pub struct ParsedXPath {
91    /// The arena containing all AST nodes.
92    pub arena: AstArena,
93    /// The root node ID of the parsed expression.
94    pub root: AstNodeId,
95    /// Source span of the entire expression.
96    pub span: SourceSpan,
97}
98
99impl ParsedXPath {
100    /// Get a reference to the root AST node.
101    pub fn root_node(&self) -> &AstNode {
102        self.arena.get(self.root)
103    }
104
105    /// Get a reference to any node by ID.
106    pub fn get_node(&self, id: AstNodeId) -> &AstNode {
107        self.arena.get(id)
108    }
109
110    /// Get the number of nodes in the AST.
111    pub fn node_count(&self) -> usize {
112        self.arena.len()
113    }
114}
115
116/// Parse an XPath 2.0 expression string.
117///
118/// Returns a `ParsedXPath` containing the AST arena and root node ID.
119///
120/// # Example
121///
122/// ```
123/// use xsd_schema::xpath::parser::parse;
124///
125/// let result = parse("/a/b/c").unwrap();
126/// println!("Parsed {} nodes", result.node_count());
127/// ```
128pub fn parse(input: &str) -> Result<ParsedXPath, ParseError> {
129    let mut arena = AstArena::new();
130    let lexer = Lexer::new(input);
131
132    let root = xpath_grammar::ExprParser::new()
133        .parse(&mut arena, lexer)
134        .map_err(map_lalrpop_error)?;
135
136    Ok(ParsedXPath {
137        arena,
138        root,
139        span: SourceSpan::new(0, input.len()),
140    })
141}
142
143/// Parse an XPath expression and return just the root node ID.
144///
145/// This is a convenience function when you only need the arena and root.
146pub fn parse_expr(input: &str, arena: &mut AstArena) -> Result<AstNodeId, ParseError> {
147    let lexer = Lexer::new(input);
148
149    xpath_grammar::ExprParser::new()
150        .parse(arena, lexer)
151        .map_err(map_lalrpop_error)
152}
153
154/// Parse an XPath expression with a specific mode (XPath 1.0 or 2.0).
155pub fn parse_with_mode(input: &str, mode: XPathMode) -> Result<ParsedXPath, ParseError> {
156    let mut arena = AstArena::new();
157    let lexer = Lexer::new_with_mode(input, mode);
158
159    let root = xpath_grammar::ExprParser::new()
160        .parse(&mut arena, lexer)
161        .map_err(map_lalrpop_error)?;
162
163    Ok(ParsedXPath {
164        arena,
165        root,
166        span: SourceSpan::new(0, input.len()),
167    })
168}
169
170/// Parse an XPath expression with a specific mode and return just the root node ID.
171pub fn parse_expr_with_mode(
172    input: &str,
173    mode: XPathMode,
174    arena: &mut AstArena,
175) -> Result<AstNodeId, ParseError> {
176    let lexer = Lexer::new_with_mode(input, mode);
177
178    xpath_grammar::ExprParser::new()
179        .parse(arena, lexer)
180        .map_err(map_lalrpop_error)
181}
182
183/// Parse an XPath expression with structured options, returning `XPathError` on failure.
184///
185/// This is the primary entry point for the parser API. It selects the lexer mode
186/// based on `opts.mode` and returns `XPathError` (not `ParseError`), making it
187/// suitable for use alongside bind and eval phases that also return `XPathError`.
188///
189/// # Example
190///
191/// ```
192/// use xsd_schema::xpath::parser::parse_with_options;
193/// use xsd_schema::xpath::{XPathParseOptions, XPathMode};
194///
195/// let opts = XPathParseOptions { mode: XPathMode::XPath10 };
196/// let result = parse_with_options("/a/b", &opts).unwrap();
197/// println!("Parsed {} nodes", result.node_count());
198/// ```
199pub fn parse_with_options(
200    input: &str,
201    opts: &XPathParseOptions,
202) -> Result<ParsedXPath, XPathError> {
203    let parsed = parse_with_mode(input, opts.mode)?; // ParseError → XPathError via From
204                                                     // XPath 1.0 validation: the lexer blocks all major 2.0-only constructs by suppressing
205                                                     // their tokens. The 3 remaining edge cases (comma sequences, empty parens, double literals)
206                                                     // are caught at eval time in eval_node(). No separate AST validator is needed.
207    Ok(parsed)
208}
209
210/// Parse an XPath expression in XPath 1.0 mode, returning `XPathError` on failure.
211///
212/// Convenience wrapper around [`parse_with_options`] with `XPathMode::XPath10`.
213///
214/// # Example
215///
216/// ```
217/// use xsd_schema::xpath::parser::parse_xpath10;
218///
219/// let result = parse_xpath10("/a/b").unwrap();
220/// println!("Parsed {} nodes", result.node_count());
221/// ```
222pub fn parse_xpath10(input: &str) -> Result<ParsedXPath, XPathError> {
223    parse_with_options(
224        input,
225        &XPathParseOptions {
226            mode: XPathMode::XPath10,
227        },
228    )
229}
230
231/// Parse an XPath expression in XPath 2.0 mode, returning `XPathError` on failure.
232///
233/// Convenience wrapper around [`parse_with_options`] with `XPathMode::XPath20`.
234///
235/// # Example
236///
237/// ```
238/// use xsd_schema::xpath::parser::parse_xpath20;
239///
240/// let result = parse_xpath20("for $x in 1 to 10 return $x").unwrap();
241/// println!("Parsed {} nodes", result.node_count());
242/// ```
243pub fn parse_xpath20(input: &str) -> Result<ParsedXPath, XPathError> {
244    parse_with_options(
245        input,
246        &XPathParseOptions {
247            mode: XPathMode::XPath20,
248        },
249    )
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255    use crate::xpath::ast::*;
256
257    #[test]
258    fn test_parse_arithmetic() {
259        let result = parse("1 + 2");
260        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
261    }
262
263    #[test]
264    fn test_parse_path() {
265        let result = parse("/a/b/c");
266        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
267    }
268
269    #[test]
270    fn test_parse_variable() {
271        let result = parse("$x");
272        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
273    }
274
275    #[test]
276    fn test_parse_function() {
277        let result = parse("fn:count(*)");
278        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
279    }
280
281    #[test]
282    fn test_lexer_errors() {
283        // Test that lexer errors are properly propagated
284        // Most inputs should lex successfully
285        let result = parse("'unclosed string");
286        assert!(result.is_err());
287    }
288
289    #[test]
290    fn test_parse_xpath10_basic_path() {
291        let result = parse_with_mode("/a/b", XPathMode::XPath10);
292        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
293    }
294
295    #[test]
296    fn test_parse_xpath10_keyword_as_element_name() {
297        // "union" is a valid element name in XPath 1.0
298        let result = parse_with_mode("//union", XPathMode::XPath10);
299        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
300    }
301
302    #[test]
303    fn test_parse_xpath10_unary_minus() {
304        // -a|b should parse as -(a|b) in XPath 1.0
305        let result = parse_with_mode("-a|b", XPathMode::XPath10);
306        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
307        let parsed = result.unwrap();
308        // Root is Expr wrapping the actual expression
309        let root = parsed.root_node();
310        let inner_id = match root {
311            AstNode::Expr(expr) => {
312                assert_eq!(expr.items.len(), 1);
313                expr.items[0]
314            }
315            _ => panic!("Expected Expr root, got {:?}", root),
316        };
317        // The inner node should be UnaryOp(Negate) wrapping a Union
318        let inner = parsed.get_node(inner_id);
319        match inner {
320            AstNode::UnaryOp(unary) => {
321                assert_eq!(unary.kind, UnaryOpKind::Negate);
322                // The operand should be a BinaryOp(Union)
323                let operand = parsed.get_node(unary.operand);
324                match operand {
325                    AstNode::BinaryOp(binop) => {
326                        assert_eq!(binop.kind, BinaryOpKind::Union);
327                    }
328                    _ => panic!("Expected Union operand, got {:?}", operand),
329                }
330            }
331            _ => panic!("Expected UnaryOp, got {:?}", inner),
332        }
333    }
334
335    #[test]
336    fn test_parse_xpath20_unary_minus() {
337        // -a|b should parse as (-a)|b in XPath 2.0
338        let result = parse("-a|b");
339        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
340        let parsed = result.unwrap();
341        // Root is Expr wrapping the actual expression
342        let root = parsed.root_node();
343        let inner_id = match root {
344            AstNode::Expr(expr) => {
345                assert_eq!(expr.items.len(), 1);
346                expr.items[0]
347            }
348            _ => panic!("Expected Expr root, got {:?}", root),
349        };
350        // The inner node should be a BinaryOp(Union) with left = UnaryOp(Negate)
351        let inner = parsed.get_node(inner_id);
352        match inner {
353            AstNode::BinaryOp(binop) => {
354                assert_eq!(binop.kind, BinaryOpKind::Union);
355                let left = parsed.get_node(binop.left);
356                match left {
357                    AstNode::UnaryOp(unary) => {
358                        assert_eq!(unary.kind, UnaryOpKind::Negate);
359                    }
360                    _ => panic!("Expected UnaryOp on left of Union, got {:?}", left),
361                }
362            }
363            _ => panic!("Expected BinaryOp(Union), got {:?}", inner),
364        }
365    }
366
367    #[test]
368    fn test_parse_xpath10_unary_plus() {
369        // +a|b should parse as +(a|b) in XPath 1.0
370        let result = parse_with_mode("+a|b", XPathMode::XPath10);
371        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
372        let parsed = result.unwrap();
373        let root = parsed.root_node();
374        let inner_id = match root {
375            AstNode::Expr(expr) => {
376                assert_eq!(expr.items.len(), 1);
377                expr.items[0]
378            }
379            _ => panic!("Expected Expr root, got {:?}", root),
380        };
381        let inner = parsed.get_node(inner_id);
382        match inner {
383            AstNode::UnaryOp(unary) => {
384                assert_eq!(unary.kind, UnaryOpKind::Identity);
385                let operand = parsed.get_node(unary.operand);
386                match operand {
387                    AstNode::BinaryOp(binop) => {
388                        assert_eq!(binop.kind, BinaryOpKind::Union);
389                    }
390                    _ => panic!("Expected Union operand, got {:?}", operand),
391                }
392            }
393            _ => panic!("Expected UnaryOp, got {:?}", inner),
394        }
395    }
396
397    #[test]
398    fn test_parse_xpath20_unary_plus() {
399        // +a|b should parse as (+a)|b in XPath 2.0
400        let result = parse("+a|b");
401        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
402        let parsed = result.unwrap();
403        let root = parsed.root_node();
404        let inner_id = match root {
405            AstNode::Expr(expr) => {
406                assert_eq!(expr.items.len(), 1);
407                expr.items[0]
408            }
409            _ => panic!("Expected Expr root, got {:?}", root),
410        };
411        let inner = parsed.get_node(inner_id);
412        match inner {
413            AstNode::BinaryOp(binop) => {
414                assert_eq!(binop.kind, BinaryOpKind::Union);
415                let left = parsed.get_node(binop.left);
416                match left {
417                    AstNode::UnaryOp(unary) => {
418                        assert_eq!(unary.kind, UnaryOpKind::Identity);
419                    }
420                    _ => panic!("Expected UnaryOp on left of Union, got {:?}", left),
421                }
422            }
423            _ => panic!("Expected BinaryOp(Union), got {:?}", inner),
424        }
425    }
426
427    #[test]
428    fn test_parse_xpath10_double_unary() {
429        // --a|b should parse as -(-(a|b)) in XPath 1.0
430        let result = parse_with_mode("--a|b", XPathMode::XPath10);
431        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
432        let parsed = result.unwrap();
433        let root = parsed.root_node();
434        let inner_id = match root {
435            AstNode::Expr(expr) => {
436                assert_eq!(expr.items.len(), 1);
437                expr.items[0]
438            }
439            _ => panic!("Expected Expr root, got {:?}", root),
440        };
441        // Outer: UnaryOp(Negate)
442        let outer = parsed.get_node(inner_id);
443        match outer {
444            AstNode::UnaryOp(unary_outer) => {
445                assert_eq!(unary_outer.kind, UnaryOpKind::Negate);
446                // Inner: UnaryOp(Negate)
447                let mid = parsed.get_node(unary_outer.operand);
448                match mid {
449                    AstNode::UnaryOp(unary_inner) => {
450                        assert_eq!(unary_inner.kind, UnaryOpKind::Negate);
451                        // Innermost: Union
452                        let operand = parsed.get_node(unary_inner.operand);
453                        match operand {
454                            AstNode::BinaryOp(binop) => {
455                                assert_eq!(binop.kind, BinaryOpKind::Union);
456                            }
457                            _ => panic!("Expected Union operand, got {:?}", operand),
458                        }
459                    }
460                    _ => panic!("Expected inner UnaryOp, got {:?}", mid),
461                }
462            }
463            _ => panic!("Expected outer UnaryOp, got {:?}", outer),
464        }
465    }
466
467    #[test]
468    fn test_parse_xpath20_double_unary() {
469        // --a|b should parse as (--a)|b in XPath 2.0
470        let result = parse("--a|b");
471        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
472        let parsed = result.unwrap();
473        let root = parsed.root_node();
474        let inner_id = match root {
475            AstNode::Expr(expr) => {
476                assert_eq!(expr.items.len(), 1);
477                expr.items[0]
478            }
479            _ => panic!("Expected Expr root, got {:?}", root),
480        };
481        // Root: BinaryOp(Union)
482        let inner = parsed.get_node(inner_id);
483        match inner {
484            AstNode::BinaryOp(binop) => {
485                assert_eq!(binop.kind, BinaryOpKind::Union);
486                // Left: UnaryOp(Negate) wrapping UnaryOp(Negate)
487                let left = parsed.get_node(binop.left);
488                match left {
489                    AstNode::UnaryOp(unary_outer) => {
490                        assert_eq!(unary_outer.kind, UnaryOpKind::Negate);
491                        let inner_unary = parsed.get_node(unary_outer.operand);
492                        match inner_unary {
493                            AstNode::UnaryOp(unary_inner) => {
494                                assert_eq!(unary_inner.kind, UnaryOpKind::Negate);
495                            }
496                            _ => panic!("Expected inner UnaryOp, got {:?}", inner_unary),
497                        }
498                    }
499                    _ => panic!("Expected UnaryOp on left of Union, got {:?}", left),
500                }
501            }
502            _ => panic!("Expected BinaryOp(Union), got {:?}", inner),
503        }
504    }
505
506    #[test]
507    fn test_parse_xpath10_unary_multi_union() {
508        // -a|b|c should parse as -(a|b|c) in XPath 1.0
509        // The union chain may be nested as (a|b)|c or a|(b|c)
510        let result = parse_with_mode("-a|b|c", XPathMode::XPath10);
511        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
512        let parsed = result.unwrap();
513        let root = parsed.root_node();
514        let inner_id = match root {
515            AstNode::Expr(expr) => {
516                assert_eq!(expr.items.len(), 1);
517                expr.items[0]
518            }
519            _ => panic!("Expected Expr root, got {:?}", root),
520        };
521        // Root should be UnaryOp(Negate) wrapping a Union
522        let inner = parsed.get_node(inner_id);
523        match inner {
524            AstNode::UnaryOp(unary) => {
525                assert_eq!(unary.kind, UnaryOpKind::Negate);
526                // The operand should be a Union
527                let operand = parsed.get_node(unary.operand);
528                match operand {
529                    AstNode::BinaryOp(binop) => {
530                        assert_eq!(binop.kind, BinaryOpKind::Union);
531                    }
532                    _ => panic!("Expected Union operand, got {:?}", operand),
533                }
534            }
535            _ => panic!("Expected UnaryOp, got {:?}", inner),
536        }
537    }
538
539    #[test]
540    fn test_parse_xpath20_unary_multi_union() {
541        // -a|b|c should parse as (-a)|b|c in XPath 2.0
542        let result = parse("-a|b|c");
543        assert!(result.is_ok(), "Parse failed: {:?}", result.err());
544        let parsed = result.unwrap();
545        let root = parsed.root_node();
546        let inner_id = match root {
547            AstNode::Expr(expr) => {
548                assert_eq!(expr.items.len(), 1);
549                expr.items[0]
550            }
551            _ => panic!("Expected Expr root, got {:?}", root),
552        };
553        // Root should be a Union, with left side containing the unary negate
554        let inner = parsed.get_node(inner_id);
555        match inner {
556            AstNode::BinaryOp(binop) => {
557                assert_eq!(binop.kind, BinaryOpKind::Union);
558                // Walk left until we find a UnaryOp(Negate) somewhere
559                // The structure could be ((- a) | b) | c or (- a) | (b | c)
560                fn has_unary_negate(parsed: &ParsedXPath, node_id: AstNodeId) -> bool {
561                    match parsed.get_node(node_id) {
562                        AstNode::UnaryOp(u) => u.kind == UnaryOpKind::Negate,
563                        AstNode::BinaryOp(b) => {
564                            has_unary_negate(parsed, b.left) || has_unary_negate(parsed, b.right)
565                        }
566                        _ => false,
567                    }
568                }
569                assert!(
570                    has_unary_negate(&parsed, binop.left),
571                    "Expected UnaryOp(Negate) somewhere on the left side of Union"
572                );
573            }
574            _ => panic!("Expected BinaryOp(Union), got {:?}", inner),
575        }
576    }
577
578    #[test]
579    fn test_parse_xpath10_convenience() {
580        let result = parse_xpath10("/a/b");
581        assert!(result.is_ok(), "parse_xpath10 failed: {:?}", result.err());
582    }
583
584    #[test]
585    fn test_parse_xpath20_convenience() {
586        let result = parse_xpath20("for $x in 1 to 10 return $x");
587        assert!(result.is_ok(), "parse_xpath20 failed: {:?}", result.err());
588    }
589
590    #[test]
591    fn test_parse_with_options_xpath10() {
592        let opts = XPathParseOptions {
593            mode: XPathMode::XPath10,
594        };
595        let result = parse_with_options("//union", &opts);
596        assert!(
597            result.is_ok(),
598            "parse_with_options failed: {:?}",
599            result.err()
600        );
601    }
602
603    #[test]
604    fn test_parse_with_options_returns_xpath_error() {
605        // Verify that parse_with_options returns XPathError, not ParseError
606        let result = parse_with_options("'unclosed string", &XPathParseOptions::default());
607        assert!(result.is_err());
608        let err = result.unwrap_err();
609        assert_eq!(err.error_code(), Some("XPST0003"));
610    }
611
612    #[test]
613    fn test_parse_error_to_xpath_error_conversion() {
614        // Test From<ParseError> for XPathError
615        let parse_err = ParseError::Parser {
616            message: "test error".to_string(),
617            location: Some(5),
618        };
619        let xpath_err: crate::xpath::error::XPathError = parse_err.into();
620        assert_eq!(xpath_err.error_code(), Some("XPST0003"));
621        assert!(xpath_err.to_string().contains("test error"));
622    }
623}