Parser

Struct Parser 

Source
pub struct Parser<K: TokenKind, R: RuleId> { /* private fields */ }
Expand description

Main parser entry point.

The Parser is responsible for evaluating grammar rules and building concrete syntax trees (CST) from token streams. It supports both grammar-based parsing and Pratt parsing for operator precedence.

§Performance Characteristics

  • Time Complexity: O(n) for most grammars, where n is the number of tokens
  • Space Complexity: O(n) for the CST arena
  • Memoization: Enabled by default for recursive grammars

§Example

use sipha_core::{Parser, helpers, ParserState, cst::{NodeArena, RawNodeId}};

let mut parser = Parser::create();
parser.register_rule(
    Rule::Expr,
    helpers::seq(vec![
        helpers::token(Token::Ident),
        helpers::token(Token::Plus),
        helpers::token(Token::Ident),
    ]),
);

Implementations§

Source§

impl<K: TokenKind, R: RuleId> Parser<K, R>

Source

pub fn new<'fn_lifetime>() -> ParserBuilder<'fn_lifetime, K, R, (), (), (), (), ()>

Creating a builder.

§Optional Fields
§grammar_rules
  • Type: HashMap < R, GrammarRule < K, R > >
  • Default: HashMap :: new()

Grammar rules registered with the parser.

§max_recovery_depth
  • Type: usize
  • Default: 10

Maximum depth for error recovery attempts.

§sync_tokens
  • Type: HashSet < K >
  • Default: HashSet :: new()

Synchronization tokens for error recovery.

Source§

impl<K: TokenKind, R: RuleId> Parser<K, R>

Source

pub fn create() -> Self

Create a new parser with default settings.

For builder pattern usage, use Parser::new() which returns a builder.

Examples found in repository?
examples/basic_parser.rs (line 34)
32fn main() {
33    // Create a parser
34    let mut parser = Parser::create();
35
36    // Register a simple rule: Expr -> Ident Plus Ident
37    parser.register_rule(
38        Rule::Expr,
39        helpers::seq(vec![
40            helpers::token(Token::Ident),
41            helpers::token(Token::Plus),
42            helpers::token(Token::Ident),
43        ]),
44    );
45
46    // Create token stream: "a + b"
47    let tokens = vec![
48        TokenStruct::create(Token::Ident, Span::new(0..1), "a", Vec::new(), Vec::new()),
49        TokenStruct::create(Token::Plus, Span::new(2..3), "+", Vec::new(), Vec::new()),
50        TokenStruct::create(Token::Ident, Span::new(4..5), "b", Vec::new(), Vec::new()),
51    ];
52
53    // Parse the tokens
54    let mut arena: NodeArena<Token, Rule, RawNodeId> = NodeArena::new();
55    let mut state = ParserState::new(&tokens, &mut arena, false, ());
56
57    match parser.parse_rule(Rule::Expr, &mut state) {
58        Ok(_) => {
59            println!("✓ Successfully parsed!");
60            println!("CST contains {} nodes", arena.len());
61        }
62        Err(e) => {
63            println!("✗ Parse error: {:?}", e);
64        }
65    }
66}
More examples
Hide additional examples
examples/error_recovery.rs (line 33)
32fn main() {
33    let mut parser = Parser::create();
34
35    // Define a simple statement rule: Ident Plus Ident Semicolon
36    parser.register_rule(
37        Rule::Statement,
38        helpers::seq(vec![
39            helpers::token(Token::Ident),
40            helpers::token(Token::Plus),
41            helpers::token(Token::Ident),
42            helpers::token(Token::Semicolon),
43        ]),
44    );
45
46    // Configure error recovery: use semicolon as sync token
47    parser.set_sync_tokens(vec![Token::Semicolon]);
48
49    // Create token stream with an error: "a + invalid ; b + c ;"
50    // The parser should recover after the first semicolon
51    let tokens = vec![
52        TokenStruct::create(Token::Ident, Span::new(0..1), "a", Vec::new(), Vec::new()),
53        TokenStruct::create(Token::Plus, Span::new(2..3), "+", Vec::new(), Vec::new()),
54        TokenStruct::create(
55            Token::Invalid,
56            Span::new(4..10),
57            "invalid",
58            Vec::new(),
59            Vec::new(),
60        ),
61        TokenStruct::create(
62            Token::Semicolon,
63            Span::new(11..12),
64            ";",
65            Vec::new(),
66            Vec::new(),
67        ),
68        TokenStruct::create(Token::Ident, Span::new(13..14), "b", Vec::new(), Vec::new()),
69        TokenStruct::create(Token::Plus, Span::new(15..16), "+", Vec::new(), Vec::new()),
70        TokenStruct::create(Token::Ident, Span::new(17..18), "c", Vec::new(), Vec::new()),
71        TokenStruct::create(
72            Token::Semicolon,
73            Span::new(19..20),
74            ";",
75            Vec::new(),
76            Vec::new(),
77        ),
78    ];
79
80    let mut arena: NodeArena<Token, Rule, RawNodeId> = NodeArena::new();
81    let mut state = ParserState::new(&tokens, &mut arena, false, ());
82
83    println!("Parsing with error recovery enabled...");
84    println!("Input: a + invalid ; b + c ;");
85    println!();
86
87    // Try to parse - should recover after first error
88    match parser.parse_rule(Rule::Statement, &mut state) {
89        Ok(_) => {
90            println!("✓ Parsed successfully (with recovery)!");
91        }
92        Err(e) => {
93            println!("✗ Parse error: {:?}", e);
94            println!("Note: Error recovery attempted to sync at semicolon");
95        }
96    }
97}
Source

pub fn set_max_recovery_depth(&mut self, depth: usize)

Set the maximum depth for error recovery attempts.

Source

pub fn register_rule(&mut self, rule_id: R, rule: GrammarRule<K, R>)

Register a grammar rule.

Validates the rule before registration (e.g., ensures range min <= max). Invalid rules will cause a panic to prevent silent failures.

Examples found in repository?
examples/basic_parser.rs (lines 37-44)
32fn main() {
33    // Create a parser
34    let mut parser = Parser::create();
35
36    // Register a simple rule: Expr -> Ident Plus Ident
37    parser.register_rule(
38        Rule::Expr,
39        helpers::seq(vec![
40            helpers::token(Token::Ident),
41            helpers::token(Token::Plus),
42            helpers::token(Token::Ident),
43        ]),
44    );
45
46    // Create token stream: "a + b"
47    let tokens = vec![
48        TokenStruct::create(Token::Ident, Span::new(0..1), "a", Vec::new(), Vec::new()),
49        TokenStruct::create(Token::Plus, Span::new(2..3), "+", Vec::new(), Vec::new()),
50        TokenStruct::create(Token::Ident, Span::new(4..5), "b", Vec::new(), Vec::new()),
51    ];
52
53    // Parse the tokens
54    let mut arena: NodeArena<Token, Rule, RawNodeId> = NodeArena::new();
55    let mut state = ParserState::new(&tokens, &mut arena, false, ());
56
57    match parser.parse_rule(Rule::Expr, &mut state) {
58        Ok(_) => {
59            println!("✓ Successfully parsed!");
60            println!("CST contains {} nodes", arena.len());
61        }
62        Err(e) => {
63            println!("✗ Parse error: {:?}", e);
64        }
65    }
66}
More examples
Hide additional examples
examples/error_recovery.rs (lines 36-44)
32fn main() {
33    let mut parser = Parser::create();
34
35    // Define a simple statement rule: Ident Plus Ident Semicolon
36    parser.register_rule(
37        Rule::Statement,
38        helpers::seq(vec![
39            helpers::token(Token::Ident),
40            helpers::token(Token::Plus),
41            helpers::token(Token::Ident),
42            helpers::token(Token::Semicolon),
43        ]),
44    );
45
46    // Configure error recovery: use semicolon as sync token
47    parser.set_sync_tokens(vec![Token::Semicolon]);
48
49    // Create token stream with an error: "a + invalid ; b + c ;"
50    // The parser should recover after the first semicolon
51    let tokens = vec![
52        TokenStruct::create(Token::Ident, Span::new(0..1), "a", Vec::new(), Vec::new()),
53        TokenStruct::create(Token::Plus, Span::new(2..3), "+", Vec::new(), Vec::new()),
54        TokenStruct::create(
55            Token::Invalid,
56            Span::new(4..10),
57            "invalid",
58            Vec::new(),
59            Vec::new(),
60        ),
61        TokenStruct::create(
62            Token::Semicolon,
63            Span::new(11..12),
64            ";",
65            Vec::new(),
66            Vec::new(),
67        ),
68        TokenStruct::create(Token::Ident, Span::new(13..14), "b", Vec::new(), Vec::new()),
69        TokenStruct::create(Token::Plus, Span::new(15..16), "+", Vec::new(), Vec::new()),
70        TokenStruct::create(Token::Ident, Span::new(17..18), "c", Vec::new(), Vec::new()),
71        TokenStruct::create(
72            Token::Semicolon,
73            Span::new(19..20),
74            ";",
75            Vec::new(),
76            Vec::new(),
77        ),
78    ];
79
80    let mut arena: NodeArena<Token, Rule, RawNodeId> = NodeArena::new();
81    let mut state = ParserState::new(&tokens, &mut arena, false, ());
82
83    println!("Parsing with error recovery enabled...");
84    println!("Input: a + invalid ; b + c ;");
85    println!();
86
87    // Try to parse - should recover after first error
88    match parser.parse_rule(Rule::Statement, &mut state) {
89        Ok(_) => {
90            println!("✓ Parsed successfully (with recovery)!");
91        }
92        Err(e) => {
93            println!("✗ Parse error: {:?}", e);
94            println!("Note: Error recovery attempted to sync at semicolon");
95        }
96    }
97}
Source

pub fn set_sync_tokens(&mut self, tokens: Vec<K>)

Configure synchronization tokens for error recovery.

Examples found in repository?
examples/error_recovery.rs (line 47)
32fn main() {
33    let mut parser = Parser::create();
34
35    // Define a simple statement rule: Ident Plus Ident Semicolon
36    parser.register_rule(
37        Rule::Statement,
38        helpers::seq(vec![
39            helpers::token(Token::Ident),
40            helpers::token(Token::Plus),
41            helpers::token(Token::Ident),
42            helpers::token(Token::Semicolon),
43        ]),
44    );
45
46    // Configure error recovery: use semicolon as sync token
47    parser.set_sync_tokens(vec![Token::Semicolon]);
48
49    // Create token stream with an error: "a + invalid ; b + c ;"
50    // The parser should recover after the first semicolon
51    let tokens = vec![
52        TokenStruct::create(Token::Ident, Span::new(0..1), "a", Vec::new(), Vec::new()),
53        TokenStruct::create(Token::Plus, Span::new(2..3), "+", Vec::new(), Vec::new()),
54        TokenStruct::create(
55            Token::Invalid,
56            Span::new(4..10),
57            "invalid",
58            Vec::new(),
59            Vec::new(),
60        ),
61        TokenStruct::create(
62            Token::Semicolon,
63            Span::new(11..12),
64            ";",
65            Vec::new(),
66            Vec::new(),
67        ),
68        TokenStruct::create(Token::Ident, Span::new(13..14), "b", Vec::new(), Vec::new()),
69        TokenStruct::create(Token::Plus, Span::new(15..16), "+", Vec::new(), Vec::new()),
70        TokenStruct::create(Token::Ident, Span::new(17..18), "c", Vec::new(), Vec::new()),
71        TokenStruct::create(
72            Token::Semicolon,
73            Span::new(19..20),
74            ";",
75            Vec::new(),
76            Vec::new(),
77        ),
78    ];
79
80    let mut arena: NodeArena<Token, Rule, RawNodeId> = NodeArena::new();
81    let mut state = ParserState::new(&tokens, &mut arena, false, ());
82
83    println!("Parsing with error recovery enabled...");
84    println!("Input: a + invalid ; b + c ;");
85    println!();
86
87    // Try to parse - should recover after first error
88    match parser.parse_rule(Rule::Statement, &mut state) {
89        Ok(_) => {
90            println!("✓ Parsed successfully (with recovery)!");
91        }
92        Err(e) => {
93            println!("✗ Parse error: {:?}", e);
94            println!("Note: Error recovery attempted to sync at semicolon");
95        }
96    }
97}
Source

pub fn parse_rule<'tokens, 'arena, N, Ctx>( &mut self, rule_id: R, state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>, ) -> Result<N, ParseError<K, R>>
where N: NodeId, Ctx: GrammarContext,

Parse a top-level rule by identifier.

Examples found in repository?
examples/basic_parser.rs (line 57)
32fn main() {
33    // Create a parser
34    let mut parser = Parser::create();
35
36    // Register a simple rule: Expr -> Ident Plus Ident
37    parser.register_rule(
38        Rule::Expr,
39        helpers::seq(vec![
40            helpers::token(Token::Ident),
41            helpers::token(Token::Plus),
42            helpers::token(Token::Ident),
43        ]),
44    );
45
46    // Create token stream: "a + b"
47    let tokens = vec![
48        TokenStruct::create(Token::Ident, Span::new(0..1), "a", Vec::new(), Vec::new()),
49        TokenStruct::create(Token::Plus, Span::new(2..3), "+", Vec::new(), Vec::new()),
50        TokenStruct::create(Token::Ident, Span::new(4..5), "b", Vec::new(), Vec::new()),
51    ];
52
53    // Parse the tokens
54    let mut arena: NodeArena<Token, Rule, RawNodeId> = NodeArena::new();
55    let mut state = ParserState::new(&tokens, &mut arena, false, ());
56
57    match parser.parse_rule(Rule::Expr, &mut state) {
58        Ok(_) => {
59            println!("✓ Successfully parsed!");
60            println!("CST contains {} nodes", arena.len());
61        }
62        Err(e) => {
63            println!("✗ Parse error: {:?}", e);
64        }
65    }
66}
More examples
Hide additional examples
examples/error_recovery.rs (line 88)
32fn main() {
33    let mut parser = Parser::create();
34
35    // Define a simple statement rule: Ident Plus Ident Semicolon
36    parser.register_rule(
37        Rule::Statement,
38        helpers::seq(vec![
39            helpers::token(Token::Ident),
40            helpers::token(Token::Plus),
41            helpers::token(Token::Ident),
42            helpers::token(Token::Semicolon),
43        ]),
44    );
45
46    // Configure error recovery: use semicolon as sync token
47    parser.set_sync_tokens(vec![Token::Semicolon]);
48
49    // Create token stream with an error: "a + invalid ; b + c ;"
50    // The parser should recover after the first semicolon
51    let tokens = vec![
52        TokenStruct::create(Token::Ident, Span::new(0..1), "a", Vec::new(), Vec::new()),
53        TokenStruct::create(Token::Plus, Span::new(2..3), "+", Vec::new(), Vec::new()),
54        TokenStruct::create(
55            Token::Invalid,
56            Span::new(4..10),
57            "invalid",
58            Vec::new(),
59            Vec::new(),
60        ),
61        TokenStruct::create(
62            Token::Semicolon,
63            Span::new(11..12),
64            ";",
65            Vec::new(),
66            Vec::new(),
67        ),
68        TokenStruct::create(Token::Ident, Span::new(13..14), "b", Vec::new(), Vec::new()),
69        TokenStruct::create(Token::Plus, Span::new(15..16), "+", Vec::new(), Vec::new()),
70        TokenStruct::create(Token::Ident, Span::new(17..18), "c", Vec::new(), Vec::new()),
71        TokenStruct::create(
72            Token::Semicolon,
73            Span::new(19..20),
74            ";",
75            Vec::new(),
76            Vec::new(),
77        ),
78    ];
79
80    let mut arena: NodeArena<Token, Rule, RawNodeId> = NodeArena::new();
81    let mut state = ParserState::new(&tokens, &mut arena, false, ());
82
83    println!("Parsing with error recovery enabled...");
84    println!("Input: a + invalid ; b + c ;");
85    println!();
86
87    // Try to parse - should recover after first error
88    match parser.parse_rule(Rule::Statement, &mut state) {
89        Ok(_) => {
90            println!("✓ Parsed successfully (with recovery)!");
91        }
92        Err(e) => {
93            println!("✗ Parse error: {:?}", e);
94            println!("Note: Error recovery attempted to sync at semicolon");
95        }
96    }
97}

Trait Implementations§

Source§

impl<K: TokenKind, R: RuleId> Default for Parser<K, R>

Source§

fn default() -> Self

Returns the “default value” for a type. Read more
Source§

impl<K: TokenKind, R: RuleId> GrammarRuleParser<K, R> for Parser<K, R>

Source§

fn parse_grammar_rule<'tokens, 'arena, N, Ctx>( &mut self, rule_id: R, rule: &GrammarRule<K, R>, state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>, ) -> Result<N, ParseError<K, R>>
where N: NodeId, Ctx: GrammarContext,

Parse the provided rule and return the resulting CST node.

Auto Trait Implementations§

§

impl<K, R> Freeze for Parser<K, R>

§

impl<K, R> RefUnwindSafe for Parser<K, R>

§

impl<K, R> Send for Parser<K, R>

§

impl<K, R> Sync for Parser<K, R>

§

impl<K, R> Unpin for Parser<K, R>
where R: Unpin, K: Unpin,

§

impl<K, R> UnwindSafe for Parser<K, R>
where R: UnwindSafe, K: UnwindSafe,

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.