rue_parser/
parser.rs

1use std::{cmp::Reverse, mem, ops::Range};
2
3use indexmap::IndexSet;
4use itertools::Itertools;
5use rowan::{Checkpoint, GreenNodeBuilder, Language};
6use rue_diagnostic::{Diagnostic, DiagnosticKind, Source, SrcLoc};
7use rue_lexer::{Token, TokenKind};
8
9use crate::{RueLang, SyntaxKind, SyntaxNode, T, document};
10
11#[derive(Debug)]
12struct ParseToken {
13    span: Range<usize>,
14    kind: SyntaxKind,
15}
16
17#[derive(Debug, Clone)]
18pub struct ParseResult {
19    pub diagnostics: Vec<Diagnostic>,
20    pub node: SyntaxNode,
21}
22
23#[derive(Debug)]
24pub struct Parser {
25    source: Source,
26    parse_tokens: Vec<ParseToken>,
27    pos: usize,
28    expected: IndexSet<SyntaxKind>,
29    diagnostics: Vec<Diagnostic>,
30    builder: GreenNodeBuilder<'static>,
31}
32
33impl Parser {
34    pub fn new(source: Source, tokens: Vec<Token>) -> Self {
35        let mut parse_tokens = Vec::with_capacity(tokens.len());
36        let mut diagnostics = Vec::new();
37
38        for token in tokens {
39            let kind = match token.kind {
40                TokenKind::Whitespace => SyntaxKind::Whitespace,
41                TokenKind::LineComment => SyntaxKind::LineComment,
42                TokenKind::BlockComment { is_terminated } => {
43                    if !is_terminated {
44                        diagnostics.push(Diagnostic::new(
45                            SrcLoc::new(source.clone(), token.span.clone()),
46                            DiagnosticKind::UnterminatedBlockComment,
47                        ));
48                    }
49                    SyntaxKind::BlockComment
50                }
51                TokenKind::String { is_terminated } => {
52                    if !is_terminated {
53                        diagnostics.push(Diagnostic::new(
54                            SrcLoc::new(source.clone(), token.span.clone()),
55                            DiagnosticKind::UnterminatedString,
56                        ));
57                    }
58                    SyntaxKind::String
59                }
60                TokenKind::Hex { is_terminated } => {
61                    if !is_terminated {
62                        diagnostics.push(Diagnostic::new(
63                            SrcLoc::new(source.clone(), token.span.clone()),
64                            DiagnosticKind::UnterminatedHex,
65                        ));
66                    }
67                    SyntaxKind::Hex
68                }
69                TokenKind::Binary { is_terminated } => {
70                    if !is_terminated {
71                        diagnostics.push(Diagnostic::new(
72                            SrcLoc::new(source.clone(), token.span.clone()),
73                            DiagnosticKind::UnterminatedBinary,
74                        ));
75                    }
76                    SyntaxKind::Binary
77                }
78                TokenKind::Octal { is_terminated } => {
79                    if !is_terminated {
80                        diagnostics.push(Diagnostic::new(
81                            SrcLoc::new(source.clone(), token.span.clone()),
82                            DiagnosticKind::UnterminatedOctal,
83                        ));
84                    }
85                    SyntaxKind::Octal
86                }
87                TokenKind::Integer => SyntaxKind::Integer,
88                TokenKind::Ident => SyntaxKind::Ident,
89                TokenKind::Nil => T![nil],
90                TokenKind::True => T![true],
91                TokenKind::False => T![false],
92                TokenKind::Export => T![export],
93                TokenKind::Extern => T![extern],
94                TokenKind::Inline => T![inline],
95                TokenKind::Test => T![test],
96                TokenKind::Mod => T![mod],
97                TokenKind::Fn => T![fn],
98                TokenKind::Const => T![const],
99                TokenKind::Type => T![type],
100                TokenKind::Struct => T![struct],
101                TokenKind::Let => T![let],
102                TokenKind::If => T![if],
103                TokenKind::Else => T![else],
104                TokenKind::Return => T![return],
105                TokenKind::Assert => T![assert],
106                TokenKind::Raise => T![raise],
107                TokenKind::Debug => T![debug],
108                TokenKind::Is => T![is],
109                TokenKind::As => T![as],
110                TokenKind::OpenParen => T!['('],
111                TokenKind::CloseParen => T![')'],
112                TokenKind::OpenBrace => T!['{'],
113                TokenKind::CloseBrace => T!['}'],
114                TokenKind::OpenBracket => T!['['],
115                TokenKind::CloseBracket => T![']'],
116                TokenKind::Plus => T![+],
117                TokenKind::Minus => T![-],
118                TokenKind::Star => T![*],
119                TokenKind::Slash => T![/],
120                TokenKind::Percent => T![%],
121                TokenKind::Equals => T![=],
122                TokenKind::LessThan => T![<],
123                TokenKind::GreaterThan => T![>],
124                TokenKind::Not => T![!],
125                TokenKind::And => T![&],
126                TokenKind::Or => T![|],
127                TokenKind::Tilde => T![~],
128                TokenKind::Xor => T![^],
129                TokenKind::Dot => T![.],
130                TokenKind::Comma => T![,],
131                TokenKind::Colon => T![:],
132                TokenKind::Semicolon => T![;],
133                TokenKind::Unknown => {
134                    diagnostics.push(Diagnostic::new(
135                        SrcLoc::new(source.clone(), token.span.clone()),
136                        DiagnosticKind::UnknownToken(source.text[token.span.clone()].to_string()),
137                    ));
138                    SyntaxKind::Error
139                }
140            };
141
142            parse_tokens.push(ParseToken {
143                span: token.span,
144                kind,
145            });
146        }
147
148        Self {
149            source,
150            parse_tokens,
151            pos: 0,
152            expected: IndexSet::new(),
153            diagnostics,
154            builder: GreenNodeBuilder::new(),
155        }
156    }
157
158    pub fn parse(mut self) -> ParseResult {
159        document(&mut self);
160        ParseResult {
161            diagnostics: self.diagnostics,
162            node: SyntaxNode::new_root(self.builder.finish()),
163        }
164    }
165
166    #[cfg(test)]
167    pub(crate) fn parse_raw(self) -> ParseResult {
168        ParseResult {
169            diagnostics: self.diagnostics,
170            node: SyntaxNode::new_root(self.builder.finish()),
171        }
172    }
173
174    pub(crate) fn checkpoint(&mut self) -> Checkpoint {
175        self.eat_trivia();
176        self.builder.checkpoint()
177    }
178
179    pub(crate) fn start_including_trivia(&mut self, kind: SyntaxKind) {
180        self.builder.start_node(RueLang::kind_to_raw(kind));
181    }
182
183    pub(crate) fn start(&mut self, kind: SyntaxKind) {
184        self.eat_trivia();
185        self.builder.start_node(RueLang::kind_to_raw(kind));
186    }
187
188    pub(crate) fn start_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
189        self.builder
190            .start_node_at(checkpoint, RueLang::kind_to_raw(kind));
191    }
192
193    pub(crate) fn finish(&mut self) {
194        self.builder.finish_node();
195    }
196
197    pub(crate) fn at_any(&mut self, kinds: &[SyntaxKind]) -> Option<SyntaxKind> {
198        for kind in kinds
199            .iter()
200            .sorted_by_key(|kind| Reverse(kind.split().len()))
201        {
202            if self.at(*kind) {
203                return Some(*kind);
204            }
205        }
206        None
207    }
208
209    pub(crate) fn at(&mut self, kind: SyntaxKind) -> bool {
210        self.eat_trivia();
211
212        self.expected.insert(kind);
213
214        let split = kind.split();
215
216        for (i, kind) in split.iter().enumerate() {
217            if self.nth(i) != *kind {
218                return false;
219            }
220        }
221
222        true
223    }
224
225    pub(crate) fn try_eat(&mut self, kind: SyntaxKind) -> bool {
226        if self.at(kind) {
227            self.bump(kind);
228            true
229        } else {
230            false
231        }
232    }
233
234    pub(crate) fn expect(&mut self, kind: SyntaxKind) {
235        if self.at(kind) {
236            self.bump(kind);
237        } else {
238            self.skip();
239        }
240    }
241
242    fn nth(&self, n: usize) -> SyntaxKind {
243        self.parse_tokens
244            .get(self.pos + n)
245            .map_or(SyntaxKind::Eof, |token| token.kind)
246    }
247
248    pub(crate) fn eat_trivia(&mut self) {
249        while self.nth(0).is_trivia() {
250            self.bump(self.nth(0));
251        }
252    }
253
254    pub(crate) fn skip(&mut self) {
255        let expected = mem::take(&mut self.expected);
256
257        let len = self.source.text.len();
258
259        let span = self
260            .parse_tokens
261            .get(self.pos)
262            .map_or(len..len, |token| token.span.clone());
263
264        self.diagnostics.push(Diagnostic::new(
265            SrcLoc::new(self.source.clone(), span),
266            DiagnosticKind::UnexpectedToken(
267                self.nth(0).to_string(),
268                expected.iter().map(ToString::to_string).collect(),
269            ),
270        ));
271
272        if self.pos < self.parse_tokens.len() {
273            self.pos += 1;
274        }
275    }
276
277    pub(crate) fn error(&mut self, kind: DiagnosticKind) {
278        self.expected.clear();
279
280        let len = self.source.text.len();
281
282        let span = self
283            .parse_tokens
284            .get(self.pos)
285            .map_or(len..len, |token| token.span.clone());
286
287        self.diagnostics.push(Diagnostic::new(
288            SrcLoc::new(self.source.clone(), span),
289            kind,
290        ));
291    }
292
293    fn bump(&mut self, kind: SyntaxKind) {
294        self.expected.clear();
295
296        let len = kind.split().len();
297
298        let span =
299            self.parse_tokens[self.pos].span.start..self.parse_tokens[self.pos + len - 1].span.end;
300
301        self.builder
302            .token(RueLang::kind_to_raw(kind), &self.source.text[span]);
303
304        self.pos += len;
305    }
306}