pub struct Parser<K: TokenKind, R: RuleId> { /* private fields */ }Expand description
Main parser entry point.
The Parser is responsible for evaluating grammar rules and building
concrete syntax trees (CST) from token streams. It supports both
grammar-based parsing and Pratt parsing for operator precedence.
§Performance Characteristics
- Time Complexity: O(n) for most grammars, where n is the number of tokens
- Space Complexity: O(n) for the CST arena
- Memoization: Enabled by default for recursive grammars
§Example
use sipha_core::{Parser, helpers, ParserState, cst::{NodeArena, RawNodeId}};
let mut parser = Parser::create();
parser.register_rule(
Rule::Expr,
helpers::seq(vec![
helpers::token(Token::Ident),
helpers::token(Token::Plus),
helpers::token(Token::Ident),
]),
);Implementations§
Source§impl<K: TokenKind, R: RuleId> Parser<K, R>
impl<K: TokenKind, R: RuleId> Parser<K, R>
Sourcepub fn new<'fn_lifetime>() -> ParserBuilder<'fn_lifetime, K, R, (), (), (), (), ()>
pub fn new<'fn_lifetime>() -> ParserBuilder<'fn_lifetime, K, R, (), (), (), (), ()>
Creating a builder.
§Optional Fields
§grammar_rules
- Type:
HashMap < R, GrammarRule < K, R > > - Default:
HashMap :: new()
Grammar rules registered with the parser.
§max_recovery_depth
- Type:
usize - Default:
10
Maximum depth for error recovery attempts.
§sync_tokens
- Type:
HashSet < K > - Default:
HashSet :: new()
Synchronization tokens for error recovery.
Source§impl<K: TokenKind, R: RuleId> Parser<K, R>
impl<K: TokenKind, R: RuleId> Parser<K, R>
Sourcepub fn create() -> Self
pub fn create() -> Self
Create a new parser with default settings.
For builder pattern usage, use Parser::new() which returns a builder.
Examples found in repository?
32fn main() {
33 // Create a parser
34 let mut parser = Parser::create();
35
36 // Register a simple rule: Expr -> Ident Plus Ident
37 parser.register_rule(
38 Rule::Expr,
39 helpers::seq(vec![
40 helpers::token(Token::Ident),
41 helpers::token(Token::Plus),
42 helpers::token(Token::Ident),
43 ]),
44 );
45
46 // Create token stream: "a + b"
47 let tokens = vec![
48 TokenStruct::create(Token::Ident, Span::new(0..1), "a", Vec::new(), Vec::new()),
49 TokenStruct::create(Token::Plus, Span::new(2..3), "+", Vec::new(), Vec::new()),
50 TokenStruct::create(Token::Ident, Span::new(4..5), "b", Vec::new(), Vec::new()),
51 ];
52
53 // Parse the tokens
54 let mut arena: NodeArena<Token, Rule, RawNodeId> = NodeArena::new();
55 let mut state = ParserState::new(&tokens, &mut arena, false, ());
56
57 match parser.parse_rule(Rule::Expr, &mut state) {
58 Ok(_) => {
59 println!("✓ Successfully parsed!");
60 println!("CST contains {} nodes", arena.len());
61 }
62 Err(e) => {
63 println!("✗ Parse error: {:?}", e);
64 }
65 }
66}More examples
32fn main() {
33 let mut parser = Parser::create();
34
35 // Define a simple statement rule: Ident Plus Ident Semicolon
36 parser.register_rule(
37 Rule::Statement,
38 helpers::seq(vec![
39 helpers::token(Token::Ident),
40 helpers::token(Token::Plus),
41 helpers::token(Token::Ident),
42 helpers::token(Token::Semicolon),
43 ]),
44 );
45
46 // Configure error recovery: use semicolon as sync token
47 parser.set_sync_tokens(vec![Token::Semicolon]);
48
49 // Create token stream with an error: "a + invalid ; b + c ;"
50 // The parser should recover after the first semicolon
51 let tokens = vec![
52 TokenStruct::create(Token::Ident, Span::new(0..1), "a", Vec::new(), Vec::new()),
53 TokenStruct::create(Token::Plus, Span::new(2..3), "+", Vec::new(), Vec::new()),
54 TokenStruct::create(
55 Token::Invalid,
56 Span::new(4..10),
57 "invalid",
58 Vec::new(),
59 Vec::new(),
60 ),
61 TokenStruct::create(
62 Token::Semicolon,
63 Span::new(11..12),
64 ";",
65 Vec::new(),
66 Vec::new(),
67 ),
68 TokenStruct::create(Token::Ident, Span::new(13..14), "b", Vec::new(), Vec::new()),
69 TokenStruct::create(Token::Plus, Span::new(15..16), "+", Vec::new(), Vec::new()),
70 TokenStruct::create(Token::Ident, Span::new(17..18), "c", Vec::new(), Vec::new()),
71 TokenStruct::create(
72 Token::Semicolon,
73 Span::new(19..20),
74 ";",
75 Vec::new(),
76 Vec::new(),
77 ),
78 ];
79
80 let mut arena: NodeArena<Token, Rule, RawNodeId> = NodeArena::new();
81 let mut state = ParserState::new(&tokens, &mut arena, false, ());
82
83 println!("Parsing with error recovery enabled...");
84 println!("Input: a + invalid ; b + c ;");
85 println!();
86
87 // Try to parse - should recover after first error
88 match parser.parse_rule(Rule::Statement, &mut state) {
89 Ok(_) => {
90 println!("✓ Parsed successfully (with recovery)!");
91 }
92 Err(e) => {
93 println!("✗ Parse error: {:?}", e);
94 println!("Note: Error recovery attempted to sync at semicolon");
95 }
96 }
97}Sourcepub fn set_max_recovery_depth(&mut self, depth: usize)
pub fn set_max_recovery_depth(&mut self, depth: usize)
Set the maximum depth for error recovery attempts.
Sourcepub fn register_rule(&mut self, rule_id: R, rule: GrammarRule<K, R>)
pub fn register_rule(&mut self, rule_id: R, rule: GrammarRule<K, R>)
Register a grammar rule.
Validates the rule before registration (e.g., ensures range min <= max). Invalid rules will cause a panic to prevent silent failures.
Examples found in repository?
32fn main() {
33 // Create a parser
34 let mut parser = Parser::create();
35
36 // Register a simple rule: Expr -> Ident Plus Ident
37 parser.register_rule(
38 Rule::Expr,
39 helpers::seq(vec![
40 helpers::token(Token::Ident),
41 helpers::token(Token::Plus),
42 helpers::token(Token::Ident),
43 ]),
44 );
45
46 // Create token stream: "a + b"
47 let tokens = vec![
48 TokenStruct::create(Token::Ident, Span::new(0..1), "a", Vec::new(), Vec::new()),
49 TokenStruct::create(Token::Plus, Span::new(2..3), "+", Vec::new(), Vec::new()),
50 TokenStruct::create(Token::Ident, Span::new(4..5), "b", Vec::new(), Vec::new()),
51 ];
52
53 // Parse the tokens
54 let mut arena: NodeArena<Token, Rule, RawNodeId> = NodeArena::new();
55 let mut state = ParserState::new(&tokens, &mut arena, false, ());
56
57 match parser.parse_rule(Rule::Expr, &mut state) {
58 Ok(_) => {
59 println!("✓ Successfully parsed!");
60 println!("CST contains {} nodes", arena.len());
61 }
62 Err(e) => {
63 println!("✗ Parse error: {:?}", e);
64 }
65 }
66}More examples
32fn main() {
33 let mut parser = Parser::create();
34
35 // Define a simple statement rule: Ident Plus Ident Semicolon
36 parser.register_rule(
37 Rule::Statement,
38 helpers::seq(vec![
39 helpers::token(Token::Ident),
40 helpers::token(Token::Plus),
41 helpers::token(Token::Ident),
42 helpers::token(Token::Semicolon),
43 ]),
44 );
45
46 // Configure error recovery: use semicolon as sync token
47 parser.set_sync_tokens(vec![Token::Semicolon]);
48
49 // Create token stream with an error: "a + invalid ; b + c ;"
50 // The parser should recover after the first semicolon
51 let tokens = vec![
52 TokenStruct::create(Token::Ident, Span::new(0..1), "a", Vec::new(), Vec::new()),
53 TokenStruct::create(Token::Plus, Span::new(2..3), "+", Vec::new(), Vec::new()),
54 TokenStruct::create(
55 Token::Invalid,
56 Span::new(4..10),
57 "invalid",
58 Vec::new(),
59 Vec::new(),
60 ),
61 TokenStruct::create(
62 Token::Semicolon,
63 Span::new(11..12),
64 ";",
65 Vec::new(),
66 Vec::new(),
67 ),
68 TokenStruct::create(Token::Ident, Span::new(13..14), "b", Vec::new(), Vec::new()),
69 TokenStruct::create(Token::Plus, Span::new(15..16), "+", Vec::new(), Vec::new()),
70 TokenStruct::create(Token::Ident, Span::new(17..18), "c", Vec::new(), Vec::new()),
71 TokenStruct::create(
72 Token::Semicolon,
73 Span::new(19..20),
74 ";",
75 Vec::new(),
76 Vec::new(),
77 ),
78 ];
79
80 let mut arena: NodeArena<Token, Rule, RawNodeId> = NodeArena::new();
81 let mut state = ParserState::new(&tokens, &mut arena, false, ());
82
83 println!("Parsing with error recovery enabled...");
84 println!("Input: a + invalid ; b + c ;");
85 println!();
86
87 // Try to parse - should recover after first error
88 match parser.parse_rule(Rule::Statement, &mut state) {
89 Ok(_) => {
90 println!("✓ Parsed successfully (with recovery)!");
91 }
92 Err(e) => {
93 println!("✗ Parse error: {:?}", e);
94 println!("Note: Error recovery attempted to sync at semicolon");
95 }
96 }
97}Sourcepub fn set_sync_tokens(&mut self, tokens: Vec<K>)
pub fn set_sync_tokens(&mut self, tokens: Vec<K>)
Configure synchronization tokens for error recovery.
Examples found in repository?
32fn main() {
33 let mut parser = Parser::create();
34
35 // Define a simple statement rule: Ident Plus Ident Semicolon
36 parser.register_rule(
37 Rule::Statement,
38 helpers::seq(vec![
39 helpers::token(Token::Ident),
40 helpers::token(Token::Plus),
41 helpers::token(Token::Ident),
42 helpers::token(Token::Semicolon),
43 ]),
44 );
45
46 // Configure error recovery: use semicolon as sync token
47 parser.set_sync_tokens(vec![Token::Semicolon]);
48
49 // Create token stream with an error: "a + invalid ; b + c ;"
50 // The parser should recover after the first semicolon
51 let tokens = vec![
52 TokenStruct::create(Token::Ident, Span::new(0..1), "a", Vec::new(), Vec::new()),
53 TokenStruct::create(Token::Plus, Span::new(2..3), "+", Vec::new(), Vec::new()),
54 TokenStruct::create(
55 Token::Invalid,
56 Span::new(4..10),
57 "invalid",
58 Vec::new(),
59 Vec::new(),
60 ),
61 TokenStruct::create(
62 Token::Semicolon,
63 Span::new(11..12),
64 ";",
65 Vec::new(),
66 Vec::new(),
67 ),
68 TokenStruct::create(Token::Ident, Span::new(13..14), "b", Vec::new(), Vec::new()),
69 TokenStruct::create(Token::Plus, Span::new(15..16), "+", Vec::new(), Vec::new()),
70 TokenStruct::create(Token::Ident, Span::new(17..18), "c", Vec::new(), Vec::new()),
71 TokenStruct::create(
72 Token::Semicolon,
73 Span::new(19..20),
74 ";",
75 Vec::new(),
76 Vec::new(),
77 ),
78 ];
79
80 let mut arena: NodeArena<Token, Rule, RawNodeId> = NodeArena::new();
81 let mut state = ParserState::new(&tokens, &mut arena, false, ());
82
83 println!("Parsing with error recovery enabled...");
84 println!("Input: a + invalid ; b + c ;");
85 println!();
86
87 // Try to parse - should recover after first error
88 match parser.parse_rule(Rule::Statement, &mut state) {
89 Ok(_) => {
90 println!("✓ Parsed successfully (with recovery)!");
91 }
92 Err(e) => {
93 println!("✗ Parse error: {:?}", e);
94 println!("Note: Error recovery attempted to sync at semicolon");
95 }
96 }
97}Sourcepub fn parse_rule<'tokens, 'arena, N, Ctx>(
&mut self,
rule_id: R,
state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
) -> Result<N, ParseError<K, R>>where
N: NodeId,
Ctx: GrammarContext,
pub fn parse_rule<'tokens, 'arena, N, Ctx>(
&mut self,
rule_id: R,
state: &mut ParserState<'tokens, 'arena, K, R, N, Ctx>,
) -> Result<N, ParseError<K, R>>where
N: NodeId,
Ctx: GrammarContext,
Parse a top-level rule by identifier.
Examples found in repository?
32fn main() {
33 // Create a parser
34 let mut parser = Parser::create();
35
36 // Register a simple rule: Expr -> Ident Plus Ident
37 parser.register_rule(
38 Rule::Expr,
39 helpers::seq(vec![
40 helpers::token(Token::Ident),
41 helpers::token(Token::Plus),
42 helpers::token(Token::Ident),
43 ]),
44 );
45
46 // Create token stream: "a + b"
47 let tokens = vec![
48 TokenStruct::create(Token::Ident, Span::new(0..1), "a", Vec::new(), Vec::new()),
49 TokenStruct::create(Token::Plus, Span::new(2..3), "+", Vec::new(), Vec::new()),
50 TokenStruct::create(Token::Ident, Span::new(4..5), "b", Vec::new(), Vec::new()),
51 ];
52
53 // Parse the tokens
54 let mut arena: NodeArena<Token, Rule, RawNodeId> = NodeArena::new();
55 let mut state = ParserState::new(&tokens, &mut arena, false, ());
56
57 match parser.parse_rule(Rule::Expr, &mut state) {
58 Ok(_) => {
59 println!("✓ Successfully parsed!");
60 println!("CST contains {} nodes", arena.len());
61 }
62 Err(e) => {
63 println!("✗ Parse error: {:?}", e);
64 }
65 }
66}More examples
32fn main() {
33 let mut parser = Parser::create();
34
35 // Define a simple statement rule: Ident Plus Ident Semicolon
36 parser.register_rule(
37 Rule::Statement,
38 helpers::seq(vec![
39 helpers::token(Token::Ident),
40 helpers::token(Token::Plus),
41 helpers::token(Token::Ident),
42 helpers::token(Token::Semicolon),
43 ]),
44 );
45
46 // Configure error recovery: use semicolon as sync token
47 parser.set_sync_tokens(vec![Token::Semicolon]);
48
49 // Create token stream with an error: "a + invalid ; b + c ;"
50 // The parser should recover after the first semicolon
51 let tokens = vec![
52 TokenStruct::create(Token::Ident, Span::new(0..1), "a", Vec::new(), Vec::new()),
53 TokenStruct::create(Token::Plus, Span::new(2..3), "+", Vec::new(), Vec::new()),
54 TokenStruct::create(
55 Token::Invalid,
56 Span::new(4..10),
57 "invalid",
58 Vec::new(),
59 Vec::new(),
60 ),
61 TokenStruct::create(
62 Token::Semicolon,
63 Span::new(11..12),
64 ";",
65 Vec::new(),
66 Vec::new(),
67 ),
68 TokenStruct::create(Token::Ident, Span::new(13..14), "b", Vec::new(), Vec::new()),
69 TokenStruct::create(Token::Plus, Span::new(15..16), "+", Vec::new(), Vec::new()),
70 TokenStruct::create(Token::Ident, Span::new(17..18), "c", Vec::new(), Vec::new()),
71 TokenStruct::create(
72 Token::Semicolon,
73 Span::new(19..20),
74 ";",
75 Vec::new(),
76 Vec::new(),
77 ),
78 ];
79
80 let mut arena: NodeArena<Token, Rule, RawNodeId> = NodeArena::new();
81 let mut state = ParserState::new(&tokens, &mut arena, false, ());
82
83 println!("Parsing with error recovery enabled...");
84 println!("Input: a + invalid ; b + c ;");
85 println!();
86
87 // Try to parse - should recover after first error
88 match parser.parse_rule(Rule::Statement, &mut state) {
89 Ok(_) => {
90 println!("✓ Parsed successfully (with recovery)!");
91 }
92 Err(e) => {
93 println!("✗ Parse error: {:?}", e);
94 println!("Note: Error recovery attempted to sync at semicolon");
95 }
96 }
97}