1use std::{collections::HashMap, mem};
2
3use crate::{
4 ast::*,
5 error::SyntaxError,
6 span::Span,
7 token::{Token, TokenKind},
8 Lexer,
9};
10
11pub type CommentMap<'src> = HashMap<usize, Vec<Comment<'src>>>;
12
13#[derive(Debug, Clone)]
14pub struct ParseResult<'src> {
15 pub comments: CommentMap<'src>,
16 pub tokens: Vec<Token<'src>>,
17 pub syntax: Syntax<'src>,
18}
19
20pub struct Parser<'src> {
21 lexer: Lexer<'src>,
22 prev_tok: Option<Token<'src>>,
23 curr_tok: Option<Token<'src>>,
24 prev_span: Span,
25 curr_span: Span,
26 tokens: Vec<Token<'src>>,
27 comments: CommentMap<'src>,
28}
29
30impl<'src> Parser<'src> {
31 pub fn new(lexer: Lexer<'src>) -> Self {
32 Self {
33 lexer,
34 prev_tok: None,
35 curr_tok: None,
36 prev_span: Span::new(0, 1),
37 curr_span: Span::new(0, 1),
38 tokens: vec![],
39 comments: HashMap::new(),
40 }
41 }
42
43 pub fn parse(mut self) -> Result<ParseResult<'src>, SyntaxError> {
44 self.next()?;
45 let syntax = self.syntax()?;
46 self.next()?; if let Some(curr_tok) = &self.curr_tok {
49 return Err(SyntaxError::new(curr_tok.span, "Expected EOF".into()));
50 }
51 Ok(ParseResult {
52 comments: self.comments,
53 tokens: self.tokens,
54 syntax,
55 })
56 }
57
58 fn next(&mut self) -> Result<(), SyntaxError> {
59 if let Some(prev_tok) = self.prev_tok.take() {
60 self.tokens.push(prev_tok);
61 }
62
63 self.prev_tok = self.curr_tok.take();
64 self.curr_tok = self.lexer.next_token()?;
65
66 let mut comments = vec![];
67 while let Some(Token {
68 kind: TokenKind::Comment(_),
69 ..
70 }) = self.curr_tok
71 {
72 let comment = self.curr_tok.take().expect("`curr_tok` is a comment token");
73 comments.push(Comment::try_from(comment).expect("`comment` is a comment token"));
74 self.curr_tok = self.lexer.next_token()?;
75 }
76 if !comments.is_empty() {
77 self.comments.insert(
78 self.tokens.len() + self.prev_tok.is_some() as usize,
79 comments,
80 );
81 }
82
83 mem::swap(&mut self.prev_span, &mut self.curr_span);
84 if let Some(curr_tok) = &self.curr_tok {
85 self.curr_span = curr_tok.span;
86 } else {
87 self.curr_span = Span::new(self.lexer.index, self.lexer.index + 1);
88 }
89 Ok(())
90 }
91
92 fn is_kind(&mut self, kind: TokenKind) -> Result<bool, SyntaxError> {
93 Ok(matches!(self.curr_tok, Some(Token { kind: tok_kind, .. }) if tok_kind == kind))
94 }
95
96 fn expect(&mut self, kind: TokenKind) -> Result<(), SyntaxError> {
97 match self.curr_tok.as_ref().map(|tok| &tok.kind) {
98 Some(tok_kind) if tok_kind == &kind => {
99 self.next()?;
100 Ok(())
101 }
102 Some(tok_kind) => Err(SyntaxError::new(
103 self.curr_span,
104 format!("Expected '{kind}', was '{tok_kind}'").into(),
105 )),
106 None => Err(SyntaxError::new(
107 self.curr_span,
108 format!("Expected '{kind}'").into(),
109 )),
110 }
111 }
112
113 fn syntax(&mut self) -> Result<Syntax<'src>, SyntaxError> {
114 let start = self.curr_span.start;
115 let mut rules = vec![];
116
117 while self.curr_tok.is_some() {
118 rules.push(self.syntax_rule()?);
119 }
120 if rules.is_empty() {
121 return Err(SyntaxError::new(
122 self.curr_span,
123 "Syntax requires at least on syntax rule".into(),
124 ));
125 }
126
127 Ok(Syntax {
128 span: Span::new(start, self.prev_span.end),
129 rules,
130 })
131 }
132
133 fn syntax_rule(&mut self) -> Result<SyntaxRule<'src>, SyntaxError> {
134 let start = self.curr_span.start;
135
136 let name = match self.curr_tok {
137 Some(Token {
138 kind: TokenKind::Identifier(name),
139 ..
140 }) => name,
141 _ => {
142 return Err(SyntaxError::new(
143 self.curr_span,
144 "Expected identifier".into(),
145 ))
146 }
147 };
148 self.next()?;
149
150 self.expect(TokenKind::Equal)?;
151 let definitions = self.definitions_list()?;
152 self.expect(TokenKind::Semicolon)?;
153
154 Ok(SyntaxRule {
155 span: Span::new(start, self.prev_span.end),
156 name,
157 definitions,
158 })
159 }
160
161 fn definitions_list(&mut self) -> Result<Vec<SingleDefinition<'src>>, SyntaxError> {
162 let mut definitions = vec![self.single_definition()?];
163
164 while self.is_kind(TokenKind::Pipe)? {
165 self.next()?;
166 definitions.push(self.single_definition()?);
167 }
168
169 Ok(definitions)
170 }
171
172 fn single_definition(&mut self) -> Result<SingleDefinition<'src>, SyntaxError> {
173 let start = self.curr_span.start;
174 let mut terms = vec![self.syntactic_term()?];
175
176 while self.is_kind(TokenKind::Comma)? {
177 self.next()?;
178 terms.push(self.syntactic_term()?);
179 }
180
181 Ok(SingleDefinition {
182 span: Span::new(start, self.prev_span.end),
183 terms,
184 })
185 }
186
187 fn syntactic_term(&mut self) -> Result<SyntacticTerm<'src>, SyntaxError> {
188 let start = self.curr_span.start;
189 let factor = self.syntactic_factor()?;
190 let exception = match self.is_kind(TokenKind::Dash)? {
191 true => {
192 self.next()?;
193 Some(self.syntactic_exception()?)
194 }
195 false => None,
196 };
197
198 Ok(SyntacticTerm {
199 span: Span::new(start, self.prev_span.end),
200 factor,
201 exception,
202 })
203 }
204
205 #[inline]
206 fn syntactic_exception(&mut self) -> Result<SyntacticException<'src>, SyntaxError> {
207 self.syntactic_factor()
208 }
209
210 fn syntactic_factor(&mut self) -> Result<SyntacticFactor<'src>, SyntaxError> {
211 let start = self.curr_span.start;
212 let repetition = match self.curr_tok {
213 Some(Token {
214 kind: TokenKind::Integer(num),
215 ..
216 }) => {
217 self.next()?;
218 self.expect(TokenKind::Star)?;
219 Some(num)
220 }
221 _ => None,
222 };
223 let primary = self.syntactic_primary()?;
224
225 Ok(SyntacticFactor {
226 span: Span::new(start, self.prev_span.end),
227 repetition,
228 primary,
229 })
230 }
231
232 fn syntactic_primary(&mut self) -> Result<SyntacticPrimary<'src>, SyntaxError> {
233 let start = self.curr_span.start;
234 let kind = match self
235 .curr_tok
236 .as_ref()
237 .map_or(TokenKind::Semicolon, |tok| tok.kind)
238 {
239 TokenKind::LBracket => SyntacticPrimaryKind::OptionalSequence(
240 self.delimited_definitions_list(TokenKind::RBracket)?,
241 ),
242 TokenKind::LBrace => SyntacticPrimaryKind::RepeatedSequence(
243 self.delimited_definitions_list(TokenKind::RBrace)?,
244 ),
245 TokenKind::LParen => SyntacticPrimaryKind::GroupedSequence(
246 self.delimited_definitions_list(TokenKind::RParen)?,
247 ),
248 TokenKind::Identifier(name) => {
249 self.next()?;
250 SyntacticPrimaryKind::MetaIdentifier(name)
251 }
252 TokenKind::Terminal(text) => {
253 self.next()?;
254 SyntacticPrimaryKind::TerminalString(text)
255 }
256 TokenKind::SpecialSeq(text) => {
257 self.next()?;
258 SyntacticPrimaryKind::SpecialSequence(text)
259 }
260 _ => SyntacticPrimaryKind::EmptySequence,
261 };
262
263 Ok(SyntacticPrimary {
264 span: Span::new(start, self.prev_span.end),
265 kind,
266 })
267 }
268
269 fn delimited_definitions_list(
270 &mut self,
271 right_delimiter: TokenKind,
272 ) -> Result<Vec<SingleDefinition<'src>>, SyntaxError> {
273 self.next()?;
274 let definitions = self.definitions_list()?;
275 self.expect(right_delimiter)?;
276
277 Ok(definitions)
278 }
279}