rue_parser/
parser.rs

1use std::{cmp::Reverse, mem, ops::Range};
2
3use indexmap::IndexSet;
4use itertools::Itertools;
5use rowan::{Checkpoint, GreenNodeBuilder, Language};
6use rue_diagnostic::{Diagnostic, DiagnosticKind, Source, SrcLoc};
7use rue_lexer::{Token, TokenKind};
8
9use crate::{RueLang, SyntaxKind, SyntaxNode, T, document};
10
11#[derive(Debug)]
12struct ParseToken {
13    span: Range<usize>,
14    kind: SyntaxKind,
15}
16
17#[derive(Debug, Clone)]
18pub struct ParseResult {
19    pub diagnostics: Vec<Diagnostic>,
20    pub node: SyntaxNode,
21}
22
23#[derive(Debug)]
24pub struct Parser {
25    source: Source,
26    parse_tokens: Vec<ParseToken>,
27    pos: usize,
28    expected: IndexSet<SyntaxKind>,
29    diagnostics: Vec<Diagnostic>,
30    builder: GreenNodeBuilder<'static>,
31}
32
33impl Parser {
34    pub fn new(source: Source, tokens: Vec<Token>) -> Self {
35        let mut parse_tokens = Vec::with_capacity(tokens.len());
36        let mut diagnostics = Vec::new();
37
38        for token in tokens {
39            let kind = match token.kind {
40                TokenKind::Whitespace => SyntaxKind::Whitespace,
41                TokenKind::LineComment => SyntaxKind::LineComment,
42                TokenKind::BlockComment { is_terminated } => {
43                    if !is_terminated {
44                        diagnostics.push(Diagnostic::new(
45                            SrcLoc::new(source.clone(), token.span.clone()),
46                            DiagnosticKind::UnterminatedBlockComment,
47                        ));
48                    }
49                    SyntaxKind::BlockComment
50                }
51                TokenKind::String { is_terminated } => {
52                    if !is_terminated {
53                        diagnostics.push(Diagnostic::new(
54                            SrcLoc::new(source.clone(), token.span.clone()),
55                            DiagnosticKind::UnterminatedString,
56                        ));
57                    }
58                    SyntaxKind::String
59                }
60                TokenKind::Hex { is_terminated } => {
61                    if !is_terminated {
62                        diagnostics.push(Diagnostic::new(
63                            SrcLoc::new(source.clone(), token.span.clone()),
64                            DiagnosticKind::UnterminatedHex,
65                        ));
66                    }
67                    SyntaxKind::Hex
68                }
69                TokenKind::Integer => SyntaxKind::Integer,
70                TokenKind::Ident => SyntaxKind::Ident,
71                TokenKind::Nil => T![nil],
72                TokenKind::True => T![true],
73                TokenKind::False => T![false],
74                TokenKind::Export => T![export],
75                TokenKind::Extern => T![extern],
76                TokenKind::Inline => T![inline],
77                TokenKind::Test => T![test],
78                TokenKind::Mod => T![mod],
79                TokenKind::Fn => T![fn],
80                TokenKind::Const => T![const],
81                TokenKind::Type => T![type],
82                TokenKind::Struct => T![struct],
83                TokenKind::Let => T![let],
84                TokenKind::If => T![if],
85                TokenKind::Else => T![else],
86                TokenKind::Return => T![return],
87                TokenKind::Assert => T![assert],
88                TokenKind::Raise => T![raise],
89                TokenKind::Is => T![is],
90                TokenKind::As => T![as],
91                TokenKind::OpenParen => T!['('],
92                TokenKind::CloseParen => T![')'],
93                TokenKind::OpenBrace => T!['{'],
94                TokenKind::CloseBrace => T!['}'],
95                TokenKind::OpenBracket => T!['['],
96                TokenKind::CloseBracket => T![']'],
97                TokenKind::Plus => T![+],
98                TokenKind::Minus => T![-],
99                TokenKind::Star => T![*],
100                TokenKind::Slash => T![/],
101                TokenKind::Percent => T![%],
102                TokenKind::Equals => T![=],
103                TokenKind::LessThan => T![<],
104                TokenKind::GreaterThan => T![>],
105                TokenKind::Not => T![!],
106                TokenKind::And => T![&],
107                TokenKind::Or => T![|],
108                TokenKind::Tilde => T![~],
109                TokenKind::Xor => T![^],
110                TokenKind::Dot => T![.],
111                TokenKind::Comma => T![,],
112                TokenKind::Colon => T![:],
113                TokenKind::Semicolon => T![;],
114                TokenKind::Unknown => {
115                    diagnostics.push(Diagnostic::new(
116                        SrcLoc::new(source.clone(), token.span.clone()),
117                        DiagnosticKind::UnknownToken(source.text[token.span.clone()].to_string()),
118                    ));
119                    SyntaxKind::Error
120                }
121            };
122
123            parse_tokens.push(ParseToken {
124                span: token.span,
125                kind,
126            });
127        }
128
129        Self {
130            source,
131            parse_tokens,
132            pos: 0,
133            expected: IndexSet::new(),
134            diagnostics,
135            builder: GreenNodeBuilder::new(),
136        }
137    }
138
139    pub fn parse(mut self) -> ParseResult {
140        document(&mut self);
141        ParseResult {
142            diagnostics: self.diagnostics,
143            node: SyntaxNode::new_root(self.builder.finish()),
144        }
145    }
146
147    #[cfg(test)]
148    pub(crate) fn parse_raw(self) -> ParseResult {
149        ParseResult {
150            diagnostics: self.diagnostics,
151            node: SyntaxNode::new_root(self.builder.finish()),
152        }
153    }
154
155    pub(crate) fn checkpoint(&mut self) -> Checkpoint {
156        self.eat_trivia();
157        self.builder.checkpoint()
158    }
159
160    pub(crate) fn start_including_trivia(&mut self, kind: SyntaxKind) {
161        self.builder.start_node(RueLang::kind_to_raw(kind));
162    }
163
164    pub(crate) fn start(&mut self, kind: SyntaxKind) {
165        self.eat_trivia();
166        self.builder.start_node(RueLang::kind_to_raw(kind));
167    }
168
169    pub(crate) fn start_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
170        self.builder
171            .start_node_at(checkpoint, RueLang::kind_to_raw(kind));
172    }
173
174    pub(crate) fn finish(&mut self) {
175        self.builder.finish_node();
176    }
177
178    pub(crate) fn at_any(&mut self, kinds: &[SyntaxKind]) -> Option<SyntaxKind> {
179        for kind in kinds
180            .iter()
181            .sorted_by_key(|kind| Reverse(kind.split().len()))
182        {
183            if self.at(*kind) {
184                return Some(*kind);
185            }
186        }
187        None
188    }
189
190    pub(crate) fn at(&mut self, kind: SyntaxKind) -> bool {
191        self.eat_trivia();
192
193        self.expected.insert(kind);
194
195        let split = kind.split();
196
197        for (i, kind) in split.iter().enumerate() {
198            if self.nth(i) != *kind {
199                return false;
200            }
201        }
202
203        true
204    }
205
206    pub(crate) fn try_eat(&mut self, kind: SyntaxKind) -> bool {
207        if self.at(kind) {
208            self.bump(kind);
209            true
210        } else {
211            false
212        }
213    }
214
215    pub(crate) fn expect(&mut self, kind: SyntaxKind) {
216        if self.at(kind) {
217            self.bump(kind);
218        } else {
219            self.skip();
220        }
221    }
222
223    fn nth(&self, n: usize) -> SyntaxKind {
224        self.parse_tokens
225            .get(self.pos + n)
226            .map_or(SyntaxKind::Eof, |token| token.kind)
227    }
228
229    pub(crate) fn eat_trivia(&mut self) {
230        while self.nth(0).is_trivia() {
231            self.bump(self.nth(0));
232        }
233    }
234
235    pub(crate) fn skip(&mut self) {
236        let expected = mem::take(&mut self.expected);
237
238        let len = self.source.text.len();
239
240        let span = self
241            .parse_tokens
242            .get(self.pos)
243            .map_or(len..len, |token| token.span.clone());
244
245        self.diagnostics.push(Diagnostic::new(
246            SrcLoc::new(self.source.clone(), span),
247            DiagnosticKind::UnexpectedToken(
248                self.nth(0).to_string(),
249                expected.iter().map(ToString::to_string).collect(),
250            ),
251        ));
252
253        if self.pos < self.parse_tokens.len() {
254            self.pos += 1;
255        }
256    }
257
258    fn bump(&mut self, kind: SyntaxKind) {
259        self.expected.clear();
260
261        let len = kind.split().len();
262
263        let span =
264            self.parse_tokens[self.pos].span.start..self.parse_tokens[self.pos + len - 1].span.end;
265
266        self.builder
267            .token(RueLang::kind_to_raw(kind), &self.source.text[span]);
268
269        self.pos += len;
270    }
271}