rue_parser/
parser.rs

1use std::{cmp::Reverse, mem, ops::Range};
2
3use indexmap::IndexSet;
4use itertools::Itertools;
5use rowan::{Checkpoint, GreenNodeBuilder, Language};
6use rue_diagnostic::{Diagnostic, DiagnosticKind, Source, SrcLoc};
7use rue_lexer::{Token, TokenKind};
8
9use crate::{RueLang, SyntaxKind, SyntaxNode, T, document};
10
11#[derive(Debug)]
12struct ParseToken {
13    span: Range<usize>,
14    kind: SyntaxKind,
15}
16
17#[derive(Debug, Clone)]
18pub struct ParseResult {
19    pub diagnostics: Vec<Diagnostic>,
20    pub node: SyntaxNode,
21}
22
23#[derive(Debug)]
24pub struct Parser {
25    source: Source,
26    parse_tokens: Vec<ParseToken>,
27    pos: usize,
28    expected: IndexSet<SyntaxKind>,
29    diagnostics: Vec<Diagnostic>,
30    builder: GreenNodeBuilder<'static>,
31}
32
33impl Parser {
34    pub fn new(source: Source, tokens: Vec<Token>) -> Self {
35        let mut parse_tokens = Vec::with_capacity(tokens.len());
36        let mut diagnostics = Vec::new();
37
38        for token in tokens {
39            let kind = match token.kind {
40                TokenKind::Whitespace => SyntaxKind::Whitespace,
41                TokenKind::LineComment => SyntaxKind::LineComment,
42                TokenKind::BlockComment { is_terminated } => {
43                    if !is_terminated {
44                        diagnostics.push(Diagnostic::new(
45                            SrcLoc::new(source.clone(), token.span.clone()),
46                            DiagnosticKind::UnterminatedBlockComment,
47                        ));
48                    }
49                    SyntaxKind::BlockComment
50                }
51                TokenKind::String { is_terminated } => {
52                    if !is_terminated {
53                        diagnostics.push(Diagnostic::new(
54                            SrcLoc::new(source.clone(), token.span.clone()),
55                            DiagnosticKind::UnterminatedString,
56                        ));
57                    }
58                    SyntaxKind::String
59                }
60                TokenKind::Hex { is_terminated } => {
61                    if !is_terminated {
62                        diagnostics.push(Diagnostic::new(
63                            SrcLoc::new(source.clone(), token.span.clone()),
64                            DiagnosticKind::UnterminatedHex,
65                        ));
66                    }
67                    SyntaxKind::Hex
68                }
69                TokenKind::Binary { is_terminated } => {
70                    if !is_terminated {
71                        diagnostics.push(Diagnostic::new(
72                            SrcLoc::new(source.clone(), token.span.clone()),
73                            DiagnosticKind::UnterminatedBinary,
74                        ));
75                    }
76                    SyntaxKind::Binary
77                }
78                TokenKind::Octal { is_terminated } => {
79                    if !is_terminated {
80                        diagnostics.push(Diagnostic::new(
81                            SrcLoc::new(source.clone(), token.span.clone()),
82                            DiagnosticKind::UnterminatedOctal,
83                        ));
84                    }
85                    SyntaxKind::Octal
86                }
87                TokenKind::Integer => SyntaxKind::Integer,
88                TokenKind::Ident => SyntaxKind::Ident,
89                TokenKind::Nil => T![nil],
90                TokenKind::True => T![true],
91                TokenKind::False => T![false],
92                TokenKind::Import => T![import],
93                TokenKind::Export => T![export],
94                TokenKind::Extern => T![extern],
95                TokenKind::Inline => T![inline],
96                TokenKind::Test => T![test],
97                TokenKind::Mod => T![mod],
98                TokenKind::Fn => T![fn],
99                TokenKind::Const => T![const],
100                TokenKind::Type => T![type],
101                TokenKind::Struct => T![struct],
102                TokenKind::Let => T![let],
103                TokenKind::If => T![if],
104                TokenKind::Else => T![else],
105                TokenKind::Return => T![return],
106                TokenKind::Assert => T![assert],
107                TokenKind::Raise => T![raise],
108                TokenKind::Debug => T![debug],
109                TokenKind::Is => T![is],
110                TokenKind::As => T![as],
111                TokenKind::Super => T![super],
112                TokenKind::OpenParen => T!['('],
113                TokenKind::CloseParen => T![')'],
114                TokenKind::OpenBrace => T!['{'],
115                TokenKind::CloseBrace => T!['}'],
116                TokenKind::OpenBracket => T!['['],
117                TokenKind::CloseBracket => T![']'],
118                TokenKind::Plus => T![+],
119                TokenKind::Minus => T![-],
120                TokenKind::Star => T![*],
121                TokenKind::Slash => T![/],
122                TokenKind::Percent => T![%],
123                TokenKind::Equals => T![=],
124                TokenKind::LessThan => T![<],
125                TokenKind::GreaterThan => T![>],
126                TokenKind::Not => T![!],
127                TokenKind::And => T![&],
128                TokenKind::Or => T![|],
129                TokenKind::Tilde => T![~],
130                TokenKind::Xor => T![^],
131                TokenKind::Dot => T![.],
132                TokenKind::Comma => T![,],
133                TokenKind::Colon => T![:],
134                TokenKind::Semicolon => T![;],
135                TokenKind::Unknown => {
136                    diagnostics.push(Diagnostic::new(
137                        SrcLoc::new(source.clone(), token.span.clone()),
138                        DiagnosticKind::UnknownToken(source.text[token.span.clone()].to_string()),
139                    ));
140                    SyntaxKind::Error
141                }
142            };
143
144            parse_tokens.push(ParseToken {
145                span: token.span,
146                kind,
147            });
148        }
149
150        Self {
151            source,
152            parse_tokens,
153            pos: 0,
154            expected: IndexSet::new(),
155            diagnostics,
156            builder: GreenNodeBuilder::new(),
157        }
158    }
159
160    pub fn parse(mut self) -> ParseResult {
161        document(&mut self);
162        ParseResult {
163            diagnostics: self.diagnostics,
164            node: SyntaxNode::new_root(self.builder.finish()),
165        }
166    }
167
168    #[cfg(test)]
169    pub(crate) fn parse_raw(self) -> ParseResult {
170        ParseResult {
171            diagnostics: self.diagnostics,
172            node: SyntaxNode::new_root(self.builder.finish()),
173        }
174    }
175
176    pub(crate) fn checkpoint(&mut self) -> Checkpoint {
177        self.eat_trivia();
178        self.builder.checkpoint()
179    }
180
181    pub(crate) fn start_including_trivia(&mut self, kind: SyntaxKind) {
182        self.builder.start_node(RueLang::kind_to_raw(kind));
183    }
184
185    pub(crate) fn start(&mut self, kind: SyntaxKind) {
186        self.eat_trivia();
187        self.builder.start_node(RueLang::kind_to_raw(kind));
188    }
189
190    pub(crate) fn start_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
191        self.builder
192            .start_node_at(checkpoint, RueLang::kind_to_raw(kind));
193    }
194
195    pub(crate) fn finish(&mut self) {
196        self.builder.finish_node();
197    }
198
199    pub(crate) fn at_any(&mut self, kinds: &[SyntaxKind]) -> Option<SyntaxKind> {
200        for kind in kinds
201            .iter()
202            .sorted_by_key(|kind| Reverse(kind.split().len()))
203        {
204            if self.at(*kind) {
205                return Some(*kind);
206            }
207        }
208        None
209    }
210
211    pub(crate) fn at(&mut self, kind: SyntaxKind) -> bool {
212        self.eat_trivia();
213
214        self.expected.insert(kind);
215
216        let split = kind.split();
217
218        for (i, kind) in split.iter().enumerate() {
219            if self.nth(i) != *kind {
220                return false;
221            }
222        }
223
224        true
225    }
226
227    pub(crate) fn try_eat(&mut self, kind: SyntaxKind) -> bool {
228        if self.at(kind) {
229            self.bump(kind);
230            true
231        } else {
232            false
233        }
234    }
235
236    pub(crate) fn expect(&mut self, kind: SyntaxKind) {
237        if self.at(kind) {
238            self.bump(kind);
239        } else {
240            self.skip();
241        }
242    }
243
244    fn nth(&self, n: usize) -> SyntaxKind {
245        self.parse_tokens
246            .get(self.pos + n)
247            .map_or(SyntaxKind::Eof, |token| token.kind)
248    }
249
250    pub(crate) fn eat_trivia(&mut self) {
251        while self.nth(0).is_trivia() {
252            self.bump(self.nth(0));
253        }
254    }
255
256    pub(crate) fn skip(&mut self) {
257        let expected = mem::take(&mut self.expected);
258
259        let len = self.source.text.len();
260
261        let span = self
262            .parse_tokens
263            .get(self.pos)
264            .map_or(len..len, |token| token.span.clone());
265
266        self.diagnostics.push(Diagnostic::new(
267            SrcLoc::new(self.source.clone(), span),
268            DiagnosticKind::UnexpectedToken(
269                self.nth(0).to_string(),
270                expected.iter().map(ToString::to_string).collect(),
271            ),
272        ));
273
274        if self.pos < self.parse_tokens.len() {
275            self.pos += 1;
276        }
277    }
278
279    pub(crate) fn error(&mut self, kind: DiagnosticKind) {
280        self.expected.clear();
281
282        let len = self.source.text.len();
283
284        let span = self
285            .parse_tokens
286            .get(self.pos)
287            .map_or(len..len, |token| token.span.clone());
288
289        self.diagnostics.push(Diagnostic::new(
290            SrcLoc::new(self.source.clone(), span),
291            kind,
292        ));
293    }
294
295    fn bump(&mut self, kind: SyntaxKind) {
296        self.expected.clear();
297
298        let len = kind.split().len();
299
300        let span =
301            self.parse_tokens[self.pos].span.start..self.parse_tokens[self.pos + len - 1].span.end;
302
303        self.builder
304            .token(RueLang::kind_to_raw(kind), &self.source.text[span]);
305
306        self.pos += len;
307    }
308}