1use std::{cmp::Reverse, mem, ops::Range};
2
3use indexmap::IndexSet;
4use itertools::Itertools;
5use rowan::{Checkpoint, GreenNodeBuilder, Language};
6use rue_diagnostic::{Diagnostic, DiagnosticKind, Source, SrcLoc};
7use rue_lexer::{Token, TokenKind};
8
9use crate::{RueLang, SyntaxKind, SyntaxNode, T, document};
10
11#[derive(Debug)]
12struct ParseToken {
13 span: Range<usize>,
14 kind: SyntaxKind,
15}
16
17#[derive(Debug, Clone)]
18pub struct ParseResult {
19 pub diagnostics: Vec<Diagnostic>,
20 pub node: SyntaxNode,
21}
22
23#[derive(Debug)]
24pub struct Parser {
25 source: Source,
26 parse_tokens: Vec<ParseToken>,
27 pos: usize,
28 expected: IndexSet<SyntaxKind>,
29 diagnostics: Vec<Diagnostic>,
30 builder: GreenNodeBuilder<'static>,
31}
32
33impl Parser {
34 pub fn new(source: Source, tokens: Vec<Token>) -> Self {
35 let mut parse_tokens = Vec::with_capacity(tokens.len());
36 let mut diagnostics = Vec::new();
37
38 for token in tokens {
39 let kind = match token.kind {
40 TokenKind::Whitespace => SyntaxKind::Whitespace,
41 TokenKind::LineComment => SyntaxKind::LineComment,
42 TokenKind::BlockComment { is_terminated } => {
43 if !is_terminated {
44 diagnostics.push(Diagnostic::new(
45 SrcLoc::new(source.clone(), token.span.clone()),
46 DiagnosticKind::UnterminatedBlockComment,
47 ));
48 }
49 SyntaxKind::BlockComment
50 }
51 TokenKind::String { is_terminated } => {
52 if !is_terminated {
53 diagnostics.push(Diagnostic::new(
54 SrcLoc::new(source.clone(), token.span.clone()),
55 DiagnosticKind::UnterminatedString,
56 ));
57 }
58 SyntaxKind::String
59 }
60 TokenKind::Hex { is_terminated } => {
61 if !is_terminated {
62 diagnostics.push(Diagnostic::new(
63 SrcLoc::new(source.clone(), token.span.clone()),
64 DiagnosticKind::UnterminatedHex,
65 ));
66 }
67 SyntaxKind::Hex
68 }
69 TokenKind::Integer => SyntaxKind::Integer,
70 TokenKind::Ident => SyntaxKind::Ident,
71 TokenKind::Nil => T![nil],
72 TokenKind::True => T![true],
73 TokenKind::False => T![false],
74 TokenKind::Export => T![export],
75 TokenKind::Inline => T![inline],
76 TokenKind::Mod => T![mod],
77 TokenKind::Fn => T![fn],
78 TokenKind::Const => T![const],
79 TokenKind::Type => T![type],
80 TokenKind::Struct => T![struct],
81 TokenKind::Let => T![let],
82 TokenKind::If => T![if],
83 TokenKind::Else => T![else],
84 TokenKind::Return => T![return],
85 TokenKind::Assert => T![assert],
86 TokenKind::Raise => T![raise],
87 TokenKind::Is => T![is],
88 TokenKind::As => T![as],
89 TokenKind::OpenParen => T!['('],
90 TokenKind::CloseParen => T![')'],
91 TokenKind::OpenBrace => T!['{'],
92 TokenKind::CloseBrace => T!['}'],
93 TokenKind::OpenBracket => T!['['],
94 TokenKind::CloseBracket => T![']'],
95 TokenKind::Plus => T![+],
96 TokenKind::Minus => T![-],
97 TokenKind::Star => T![*],
98 TokenKind::Slash => T![/],
99 TokenKind::Percent => T![%],
100 TokenKind::Equals => T![=],
101 TokenKind::LessThan => T![<],
102 TokenKind::GreaterThan => T![>],
103 TokenKind::Not => T![!],
104 TokenKind::And => T![&],
105 TokenKind::Or => T![|],
106 TokenKind::Tilde => T![~],
107 TokenKind::Xor => T![^],
108 TokenKind::Dot => T![.],
109 TokenKind::Comma => T![,],
110 TokenKind::Colon => T![:],
111 TokenKind::Semicolon => T![;],
112 TokenKind::Unknown => {
113 diagnostics.push(Diagnostic::new(
114 SrcLoc::new(source.clone(), token.span.clone()),
115 DiagnosticKind::UnknownToken(source.text[token.span.clone()].to_string()),
116 ));
117 SyntaxKind::Error
118 }
119 };
120
121 parse_tokens.push(ParseToken {
122 span: token.span,
123 kind,
124 });
125 }
126
127 Self {
128 source,
129 parse_tokens,
130 pos: 0,
131 expected: IndexSet::new(),
132 diagnostics,
133 builder: GreenNodeBuilder::new(),
134 }
135 }
136
137 pub fn parse(mut self) -> ParseResult {
138 document(&mut self);
139 ParseResult {
140 diagnostics: self.diagnostics,
141 node: SyntaxNode::new_root(self.builder.finish()),
142 }
143 }
144
145 #[cfg(test)]
146 pub(crate) fn parse_raw(self) -> ParseResult {
147 ParseResult {
148 diagnostics: self.diagnostics,
149 node: SyntaxNode::new_root(self.builder.finish()),
150 }
151 }
152
153 pub(crate) fn checkpoint(&mut self) -> Checkpoint {
154 self.builder.checkpoint()
155 }
156
157 pub(crate) fn start(&mut self, kind: SyntaxKind) {
158 self.builder.start_node(RueLang::kind_to_raw(kind));
159 }
160
161 pub(crate) fn start_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
162 self.builder
163 .start_node_at(checkpoint, RueLang::kind_to_raw(kind));
164 }
165
166 pub(crate) fn finish(&mut self) {
167 self.eat_trivia();
168 self.builder.finish_node();
169 }
170
171 pub(crate) fn at_any(&mut self, kinds: &[SyntaxKind]) -> Option<SyntaxKind> {
172 for kind in kinds
173 .iter()
174 .sorted_by_key(|kind| Reverse(kind.split().len()))
175 {
176 if self.at(*kind) {
177 return Some(*kind);
178 }
179 }
180 None
181 }
182
183 pub(crate) fn at(&mut self, kind: SyntaxKind) -> bool {
184 self.eat_trivia();
185
186 self.expected.insert(kind);
187
188 let split = kind.split();
189
190 for (i, kind) in split.iter().enumerate() {
191 if self.nth(i) != *kind {
192 return false;
193 }
194 }
195
196 true
197 }
198
199 pub(crate) fn try_eat(&mut self, kind: SyntaxKind) -> bool {
200 if self.at(kind) {
201 self.bump(kind);
202 true
203 } else {
204 false
205 }
206 }
207
208 pub(crate) fn expect(&mut self, kind: SyntaxKind) {
209 if self.at(kind) {
210 self.bump(kind);
211 } else {
212 self.skip();
213 }
214 }
215
216 fn nth(&self, n: usize) -> SyntaxKind {
217 self.parse_tokens
218 .get(self.pos + n)
219 .map_or(SyntaxKind::Eof, |token| token.kind)
220 }
221
222 fn eat_trivia(&mut self) {
223 while self.nth(0).is_trivia() {
224 self.bump(self.nth(0));
225 }
226 }
227
228 pub(crate) fn skip(&mut self) {
229 let expected = mem::take(&mut self.expected);
230
231 let len = self.source.text.len();
232
233 let span = self
234 .parse_tokens
235 .get(self.pos)
236 .map_or(len..len, |token| token.span.clone());
237
238 self.diagnostics.push(Diagnostic::new(
239 SrcLoc::new(self.source.clone(), span),
240 DiagnosticKind::UnexpectedToken(
241 self.nth(0).to_string(),
242 expected.iter().map(ToString::to_string).collect(),
243 ),
244 ));
245
246 if self.pos < self.parse_tokens.len() {
247 self.pos += 1;
248 }
249 }
250
251 fn bump(&mut self, kind: SyntaxKind) {
252 self.expected.clear();
253
254 let len = kind.split().len();
255
256 let span =
257 self.parse_tokens[self.pos].span.start..self.parse_tokens[self.pos + len - 1].span.end;
258
259 self.builder
260 .token(RueLang::kind_to_raw(kind), &self.source.text[span]);
261
262 self.pos += len;
263 }
264}