1use std::{cmp::Reverse, mem, ops::Range};
2
3use indexmap::IndexSet;
4use itertools::Itertools;
5use rowan::{Checkpoint, GreenNodeBuilder, Language};
6use rue_diagnostic::{Diagnostic, DiagnosticKind, Source, SrcLoc};
7use rue_lexer::{Token, TokenKind};
8
9use crate::{RueLang, SyntaxKind, SyntaxNode, T, document};
10
11#[derive(Debug)]
12struct ParseToken {
13 span: Range<usize>,
14 kind: SyntaxKind,
15}
16
17#[derive(Debug, Clone)]
18pub struct ParseResult {
19 pub diagnostics: Vec<Diagnostic>,
20 pub node: SyntaxNode,
21}
22
23#[derive(Debug)]
24pub struct Parser {
25 source: Source,
26 parse_tokens: Vec<ParseToken>,
27 pos: usize,
28 expected: IndexSet<SyntaxKind>,
29 diagnostics: Vec<Diagnostic>,
30 builder: GreenNodeBuilder<'static>,
31}
32
33impl Parser {
34 pub fn new(source: Source, tokens: Vec<Token>) -> Self {
35 let mut parse_tokens = Vec::with_capacity(tokens.len());
36 let mut diagnostics = Vec::new();
37
38 for token in tokens {
39 let kind = match token.kind {
40 TokenKind::Whitespace => SyntaxKind::Whitespace,
41 TokenKind::LineComment => SyntaxKind::LineComment,
42 TokenKind::BlockComment { is_terminated } => {
43 if !is_terminated {
44 diagnostics.push(Diagnostic::new(
45 SrcLoc::new(source.clone(), token.span.clone()),
46 DiagnosticKind::UnterminatedBlockComment,
47 ));
48 }
49 SyntaxKind::BlockComment
50 }
51 TokenKind::String { is_terminated } => {
52 if !is_terminated {
53 diagnostics.push(Diagnostic::new(
54 SrcLoc::new(source.clone(), token.span.clone()),
55 DiagnosticKind::UnterminatedString,
56 ));
57 }
58 SyntaxKind::String
59 }
60 TokenKind::Hex { is_terminated } => {
61 if !is_terminated {
62 diagnostics.push(Diagnostic::new(
63 SrcLoc::new(source.clone(), token.span.clone()),
64 DiagnosticKind::UnterminatedHex,
65 ));
66 }
67 SyntaxKind::Hex
68 }
69 TokenKind::Binary { is_terminated } => {
70 if !is_terminated {
71 diagnostics.push(Diagnostic::new(
72 SrcLoc::new(source.clone(), token.span.clone()),
73 DiagnosticKind::UnterminatedBinary,
74 ));
75 }
76 SyntaxKind::Binary
77 }
78 TokenKind::Octal { is_terminated } => {
79 if !is_terminated {
80 diagnostics.push(Diagnostic::new(
81 SrcLoc::new(source.clone(), token.span.clone()),
82 DiagnosticKind::UnterminatedOctal,
83 ));
84 }
85 SyntaxKind::Octal
86 }
87 TokenKind::Integer => SyntaxKind::Integer,
88 TokenKind::Ident => SyntaxKind::Ident,
89 TokenKind::Nil => T![nil],
90 TokenKind::True => T![true],
91 TokenKind::False => T![false],
92 TokenKind::Import => T![import],
93 TokenKind::Export => T![export],
94 TokenKind::Extern => T![extern],
95 TokenKind::Inline => T![inline],
96 TokenKind::Test => T![test],
97 TokenKind::Mod => T![mod],
98 TokenKind::Fn => T![fn],
99 TokenKind::Const => T![const],
100 TokenKind::Type => T![type],
101 TokenKind::Struct => T![struct],
102 TokenKind::Let => T![let],
103 TokenKind::If => T![if],
104 TokenKind::Else => T![else],
105 TokenKind::Return => T![return],
106 TokenKind::Assert => T![assert],
107 TokenKind::Raise => T![raise],
108 TokenKind::Debug => T![debug],
109 TokenKind::Is => T![is],
110 TokenKind::As => T![as],
111 TokenKind::Super => T![super],
112 TokenKind::OpenParen => T!['('],
113 TokenKind::CloseParen => T![')'],
114 TokenKind::OpenBrace => T!['{'],
115 TokenKind::CloseBrace => T!['}'],
116 TokenKind::OpenBracket => T!['['],
117 TokenKind::CloseBracket => T![']'],
118 TokenKind::Plus => T![+],
119 TokenKind::Minus => T![-],
120 TokenKind::Star => T![*],
121 TokenKind::Slash => T![/],
122 TokenKind::Percent => T![%],
123 TokenKind::Equals => T![=],
124 TokenKind::LessThan => T![<],
125 TokenKind::GreaterThan => T![>],
126 TokenKind::Not => T![!],
127 TokenKind::And => T![&],
128 TokenKind::Or => T![|],
129 TokenKind::Tilde => T![~],
130 TokenKind::Xor => T![^],
131 TokenKind::Dot => T![.],
132 TokenKind::Comma => T![,],
133 TokenKind::Colon => T![:],
134 TokenKind::Semicolon => T![;],
135 TokenKind::Unknown => {
136 diagnostics.push(Diagnostic::new(
137 SrcLoc::new(source.clone(), token.span.clone()),
138 DiagnosticKind::UnknownToken(source.text[token.span.clone()].to_string()),
139 ));
140 SyntaxKind::Error
141 }
142 };
143
144 parse_tokens.push(ParseToken {
145 span: token.span,
146 kind,
147 });
148 }
149
150 Self {
151 source,
152 parse_tokens,
153 pos: 0,
154 expected: IndexSet::new(),
155 diagnostics,
156 builder: GreenNodeBuilder::new(),
157 }
158 }
159
160 pub fn parse(mut self) -> ParseResult {
161 document(&mut self);
162 ParseResult {
163 diagnostics: self.diagnostics,
164 node: SyntaxNode::new_root(self.builder.finish()),
165 }
166 }
167
168 #[cfg(test)]
169 pub(crate) fn parse_raw(self) -> ParseResult {
170 ParseResult {
171 diagnostics: self.diagnostics,
172 node: SyntaxNode::new_root(self.builder.finish()),
173 }
174 }
175
176 pub(crate) fn checkpoint(&mut self) -> Checkpoint {
177 self.eat_trivia();
178 self.builder.checkpoint()
179 }
180
181 pub(crate) fn start_including_trivia(&mut self, kind: SyntaxKind) {
182 self.builder.start_node(RueLang::kind_to_raw(kind));
183 }
184
185 pub(crate) fn start(&mut self, kind: SyntaxKind) {
186 self.eat_trivia();
187 self.builder.start_node(RueLang::kind_to_raw(kind));
188 }
189
190 pub(crate) fn start_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
191 self.builder
192 .start_node_at(checkpoint, RueLang::kind_to_raw(kind));
193 }
194
195 pub(crate) fn finish(&mut self) {
196 self.builder.finish_node();
197 }
198
199 pub(crate) fn at_any(&mut self, kinds: &[SyntaxKind]) -> Option<SyntaxKind> {
200 for kind in kinds
201 .iter()
202 .sorted_by_key(|kind| Reverse(kind.split().len()))
203 {
204 if self.at(*kind) {
205 return Some(*kind);
206 }
207 }
208 None
209 }
210
211 pub(crate) fn at(&mut self, kind: SyntaxKind) -> bool {
212 self.eat_trivia();
213
214 self.expected.insert(kind);
215
216 let split = kind.split();
217
218 for (i, kind) in split.iter().enumerate() {
219 if self.nth(i) != *kind {
220 return false;
221 }
222 }
223
224 true
225 }
226
227 pub(crate) fn try_eat(&mut self, kind: SyntaxKind) -> bool {
228 if self.at(kind) {
229 self.bump(kind);
230 true
231 } else {
232 false
233 }
234 }
235
236 pub(crate) fn expect(&mut self, kind: SyntaxKind) {
237 if self.at(kind) {
238 self.bump(kind);
239 } else {
240 self.skip();
241 }
242 }
243
244 fn nth(&self, n: usize) -> SyntaxKind {
245 self.parse_tokens
246 .get(self.pos + n)
247 .map_or(SyntaxKind::Eof, |token| token.kind)
248 }
249
250 pub(crate) fn eat_trivia(&mut self) {
251 while self.nth(0).is_trivia() {
252 self.bump(self.nth(0));
253 }
254 }
255
256 pub(crate) fn skip(&mut self) {
257 let expected = mem::take(&mut self.expected);
258
259 let len = self.source.text.len();
260
261 let span = self
262 .parse_tokens
263 .get(self.pos)
264 .map_or(len..len, |token| token.span.clone());
265
266 self.diagnostics.push(Diagnostic::new(
267 SrcLoc::new(self.source.clone(), span),
268 DiagnosticKind::UnexpectedToken(
269 self.nth(0).to_string(),
270 expected.iter().map(ToString::to_string).collect(),
271 ),
272 ));
273
274 if self.pos < self.parse_tokens.len() {
275 self.pos += 1;
276 }
277 }
278
279 pub(crate) fn error(&mut self, kind: DiagnosticKind) {
280 self.expected.clear();
281
282 let len = self.source.text.len();
283
284 let span = self
285 .parse_tokens
286 .get(self.pos)
287 .map_or(len..len, |token| token.span.clone());
288
289 self.diagnostics.push(Diagnostic::new(
290 SrcLoc::new(self.source.clone(), span),
291 kind,
292 ));
293 }
294
295 fn bump(&mut self, kind: SyntaxKind) {
296 self.expected.clear();
297
298 let len = kind.split().len();
299
300 let span =
301 self.parse_tokens[self.pos].span.start..self.parse_tokens[self.pos + len - 1].span.end;
302
303 self.builder
304 .token(RueLang::kind_to_raw(kind), &self.source.text[span]);
305
306 self.pos += len;
307 }
308}