ebnf_parser/
parser.rs

1use std::{collections::HashMap, mem};
2
3use crate::{
4    ast::*,
5    error::SyntaxError,
6    span::Span,
7    token::{Token, TokenKind},
8    Lexer,
9};
10
11pub type CommentMap<'src> = HashMap<usize, Vec<Comment<'src>>>;
12
13#[derive(Debug, Clone)]
14pub struct ParseResult<'src> {
15    pub comments: CommentMap<'src>,
16    pub tokens: Vec<Token<'src>>,
17    pub syntax: Syntax<'src>,
18}
19
20pub struct Parser<'src> {
21    lexer: Lexer<'src>,
22    prev_tok: Option<Token<'src>>,
23    curr_tok: Option<Token<'src>>,
24    prev_span: Span,
25    curr_span: Span,
26    tokens: Vec<Token<'src>>,
27    comments: CommentMap<'src>,
28}
29
30impl<'src> Parser<'src> {
31    pub fn new(lexer: Lexer<'src>) -> Self {
32        Self {
33            lexer,
34            prev_tok: None,
35            curr_tok: None,
36            prev_span: Span::new(0, 1),
37            curr_span: Span::new(0, 1),
38            tokens: vec![],
39            comments: HashMap::new(),
40        }
41    }
42
43    pub fn parse(mut self) -> Result<ParseResult<'src>, SyntaxError> {
44        self.next()?;
45        let syntax = self.syntax()?;
46        self.next()?; // add prev_tok to tokens list
47
48        if let Some(curr_tok) = &self.curr_tok {
49            return Err(SyntaxError::new(curr_tok.span, "Expected EOF".into()));
50        }
51        Ok(ParseResult {
52            comments: self.comments,
53            tokens: self.tokens,
54            syntax,
55        })
56    }
57
58    fn next(&mut self) -> Result<(), SyntaxError> {
59        if let Some(prev_tok) = self.prev_tok.take() {
60            self.tokens.push(prev_tok);
61        }
62
63        self.prev_tok = self.curr_tok.take();
64        self.curr_tok = self.lexer.next_token()?;
65
66        let mut comments = vec![];
67        while let Some(Token {
68            kind: TokenKind::Comment(_),
69            ..
70        }) = self.curr_tok
71        {
72            let comment = self.curr_tok.take().expect("`curr_tok` is a comment token");
73            comments.push(Comment::try_from(comment).expect("`comment` is a comment token"));
74            self.curr_tok = self.lexer.next_token()?;
75        }
76        if !comments.is_empty() {
77            self.comments.insert(
78                self.tokens.len() + self.prev_tok.is_some() as usize,
79                comments,
80            );
81        }
82
83        mem::swap(&mut self.prev_span, &mut self.curr_span);
84        if let Some(curr_tok) = &self.curr_tok {
85            self.curr_span = curr_tok.span;
86        } else {
87            self.curr_span = Span::new(self.lexer.index, self.lexer.index + 1);
88        }
89        Ok(())
90    }
91
92    fn is_kind(&mut self, kind: TokenKind) -> Result<bool, SyntaxError> {
93        Ok(matches!(self.curr_tok, Some(Token { kind: tok_kind, .. }) if tok_kind == kind))
94    }
95
96    fn expect(&mut self, kind: TokenKind) -> Result<(), SyntaxError> {
97        match self.curr_tok.as_ref().map(|tok| &tok.kind) {
98            Some(tok_kind) if tok_kind == &kind => {
99                self.next()?;
100                Ok(())
101            }
102            Some(tok_kind) => Err(SyntaxError::new(
103                self.curr_span,
104                format!("Expected '{kind}', was '{tok_kind}'").into(),
105            )),
106            None => Err(SyntaxError::new(
107                self.curr_span,
108                format!("Expected '{kind}'").into(),
109            )),
110        }
111    }
112
113    fn syntax(&mut self) -> Result<Syntax<'src>, SyntaxError> {
114        let start = self.curr_span.start;
115        let mut rules = vec![];
116
117        while self.curr_tok.is_some() {
118            rules.push(self.syntax_rule()?);
119        }
120        if rules.is_empty() {
121            return Err(SyntaxError::new(
122                self.curr_span,
123                "Syntax requires at least on syntax rule".into(),
124            ));
125        }
126
127        Ok(Syntax {
128            span: Span::new(start, self.prev_span.end),
129            rules,
130        })
131    }
132
133    fn syntax_rule(&mut self) -> Result<SyntaxRule<'src>, SyntaxError> {
134        let start = self.curr_span.start;
135
136        let name = match self.curr_tok {
137            Some(Token {
138                kind: TokenKind::Identifier(name),
139                ..
140            }) => name,
141            _ => {
142                return Err(SyntaxError::new(
143                    self.curr_span,
144                    "Expected identifier".into(),
145                ))
146            }
147        };
148        self.next()?;
149
150        self.expect(TokenKind::Equal)?;
151        let definitions = self.definitions_list()?;
152        self.expect(TokenKind::Semicolon)?;
153
154        Ok(SyntaxRule {
155            span: Span::new(start, self.prev_span.end),
156            name,
157            definitions,
158        })
159    }
160
161    fn definitions_list(&mut self) -> Result<Vec<SingleDefinition<'src>>, SyntaxError> {
162        let mut definitions = vec![self.single_definition()?];
163
164        while self.is_kind(TokenKind::Pipe)? {
165            self.next()?;
166            definitions.push(self.single_definition()?);
167        }
168
169        Ok(definitions)
170    }
171
172    fn single_definition(&mut self) -> Result<SingleDefinition<'src>, SyntaxError> {
173        let start = self.curr_span.start;
174        let mut terms = vec![self.syntactic_term()?];
175
176        while self.is_kind(TokenKind::Comma)? {
177            self.next()?;
178            terms.push(self.syntactic_term()?);
179        }
180
181        Ok(SingleDefinition {
182            span: Span::new(start, self.prev_span.end),
183            terms,
184        })
185    }
186
187    fn syntactic_term(&mut self) -> Result<SyntacticTerm<'src>, SyntaxError> {
188        let start = self.curr_span.start;
189        let factor = self.syntactic_factor()?;
190        let exception = match self.is_kind(TokenKind::Dash)? {
191            true => {
192                self.next()?;
193                Some(self.syntactic_exception()?)
194            }
195            false => None,
196        };
197
198        Ok(SyntacticTerm {
199            span: Span::new(start, self.prev_span.end),
200            factor,
201            exception,
202        })
203    }
204
205    #[inline]
206    fn syntactic_exception(&mut self) -> Result<SyntacticException<'src>, SyntaxError> {
207        self.syntactic_factor()
208    }
209
210    fn syntactic_factor(&mut self) -> Result<SyntacticFactor<'src>, SyntaxError> {
211        let start = self.curr_span.start;
212        let repetition = match self.curr_tok {
213            Some(Token {
214                kind: TokenKind::Integer(num),
215                ..
216            }) => {
217                self.next()?;
218                self.expect(TokenKind::Star)?;
219                Some(num)
220            }
221            _ => None,
222        };
223        let primary = self.syntactic_primary()?;
224
225        Ok(SyntacticFactor {
226            span: Span::new(start, self.prev_span.end),
227            repetition,
228            primary,
229        })
230    }
231
232    fn syntactic_primary(&mut self) -> Result<SyntacticPrimary<'src>, SyntaxError> {
233        let start = self.curr_span.start;
234        let kind = match self
235            .curr_tok
236            .as_ref()
237            .map_or(TokenKind::Semicolon, |tok| tok.kind)
238        {
239            TokenKind::LBracket => SyntacticPrimaryKind::OptionalSequence(
240                self.delimited_definitions_list(TokenKind::RBracket)?,
241            ),
242            TokenKind::LBrace => SyntacticPrimaryKind::RepeatedSequence(
243                self.delimited_definitions_list(TokenKind::RBrace)?,
244            ),
245            TokenKind::LParen => SyntacticPrimaryKind::GroupedSequence(
246                self.delimited_definitions_list(TokenKind::RParen)?,
247            ),
248            TokenKind::Identifier(name) => {
249                self.next()?;
250                SyntacticPrimaryKind::MetaIdentifier(name)
251            }
252            TokenKind::Terminal(text) => {
253                self.next()?;
254                SyntacticPrimaryKind::TerminalString(text)
255            }
256            TokenKind::SpecialSeq(text) => {
257                self.next()?;
258                SyntacticPrimaryKind::SpecialSequence(text)
259            }
260            _ => SyntacticPrimaryKind::EmptySequence,
261        };
262
263        Ok(SyntacticPrimary {
264            span: Span::new(start, self.prev_span.end),
265            kind,
266        })
267    }
268
269    fn delimited_definitions_list(
270        &mut self,
271        right_delimiter: TokenKind,
272    ) -> Result<Vec<SingleDefinition<'src>>, SyntaxError> {
273        self.next()?;
274        let definitions = self.definitions_list()?;
275        self.expect(right_delimiter)?;
276
277        Ok(definitions)
278    }
279}