rue_parser/
parser.rs

1use std::{cmp::Reverse, mem, ops::Range};
2
3use indexmap::IndexSet;
4use itertools::Itertools;
5use rowan::{Checkpoint, GreenNodeBuilder, Language};
6use rue_diagnostic::{Diagnostic, DiagnosticKind, Source, SrcLoc};
7use rue_lexer::{Token, TokenKind};
8
9use crate::{RueLang, SyntaxKind, SyntaxNode, T, document};
10
11#[derive(Debug)]
12struct ParseToken {
13    span: Range<usize>,
14    kind: SyntaxKind,
15}
16
17#[derive(Debug, Clone)]
18pub struct ParseResult {
19    pub diagnostics: Vec<Diagnostic>,
20    pub node: SyntaxNode,
21}
22
23#[derive(Debug)]
24pub struct Parser {
25    source: Source,
26    parse_tokens: Vec<ParseToken>,
27    pos: usize,
28    expected: IndexSet<SyntaxKind>,
29    diagnostics: Vec<Diagnostic>,
30    builder: GreenNodeBuilder<'static>,
31}
32
33impl Parser {
34    pub fn new(source: Source, tokens: Vec<Token>) -> Self {
35        let mut parse_tokens = Vec::with_capacity(tokens.len());
36        let mut diagnostics = Vec::new();
37
38        for token in tokens {
39            let kind = match token.kind {
40                TokenKind::Whitespace => SyntaxKind::Whitespace,
41                TokenKind::LineComment => SyntaxKind::LineComment,
42                TokenKind::BlockComment { is_terminated } => {
43                    if !is_terminated {
44                        diagnostics.push(Diagnostic::new(
45                            SrcLoc::new(source.clone(), token.span.clone()),
46                            DiagnosticKind::UnterminatedBlockComment,
47                        ));
48                    }
49                    SyntaxKind::BlockComment
50                }
51                TokenKind::String { is_terminated } => {
52                    if !is_terminated {
53                        diagnostics.push(Diagnostic::new(
54                            SrcLoc::new(source.clone(), token.span.clone()),
55                            DiagnosticKind::UnterminatedString,
56                        ));
57                    }
58                    SyntaxKind::String
59                }
60                TokenKind::Hex { is_terminated } => {
61                    if !is_terminated {
62                        diagnostics.push(Diagnostic::new(
63                            SrcLoc::new(source.clone(), token.span.clone()),
64                            DiagnosticKind::UnterminatedHex,
65                        ));
66                    }
67                    SyntaxKind::Hex
68                }
69                TokenKind::Integer => SyntaxKind::Integer,
70                TokenKind::Ident => SyntaxKind::Ident,
71                TokenKind::Nil => T![nil],
72                TokenKind::True => T![true],
73                TokenKind::False => T![false],
74                TokenKind::Export => T![export],
75                TokenKind::Inline => T![inline],
76                TokenKind::Mod => T![mod],
77                TokenKind::Fn => T![fn],
78                TokenKind::Const => T![const],
79                TokenKind::Type => T![type],
80                TokenKind::Struct => T![struct],
81                TokenKind::Let => T![let],
82                TokenKind::If => T![if],
83                TokenKind::Else => T![else],
84                TokenKind::Return => T![return],
85                TokenKind::Assert => T![assert],
86                TokenKind::Raise => T![raise],
87                TokenKind::Is => T![is],
88                TokenKind::As => T![as],
89                TokenKind::OpenParen => T!['('],
90                TokenKind::CloseParen => T![')'],
91                TokenKind::OpenBrace => T!['{'],
92                TokenKind::CloseBrace => T!['}'],
93                TokenKind::OpenBracket => T!['['],
94                TokenKind::CloseBracket => T![']'],
95                TokenKind::Plus => T![+],
96                TokenKind::Minus => T![-],
97                TokenKind::Star => T![*],
98                TokenKind::Slash => T![/],
99                TokenKind::Percent => T![%],
100                TokenKind::Equals => T![=],
101                TokenKind::LessThan => T![<],
102                TokenKind::GreaterThan => T![>],
103                TokenKind::Not => T![!],
104                TokenKind::And => T![&],
105                TokenKind::Or => T![|],
106                TokenKind::Tilde => T![~],
107                TokenKind::Xor => T![^],
108                TokenKind::Dot => T![.],
109                TokenKind::Comma => T![,],
110                TokenKind::Colon => T![:],
111                TokenKind::Semicolon => T![;],
112                TokenKind::Unknown => {
113                    diagnostics.push(Diagnostic::new(
114                        SrcLoc::new(source.clone(), token.span.clone()),
115                        DiagnosticKind::UnknownToken(source.text[token.span.clone()].to_string()),
116                    ));
117                    SyntaxKind::Error
118                }
119            };
120
121            parse_tokens.push(ParseToken {
122                span: token.span,
123                kind,
124            });
125        }
126
127        Self {
128            source,
129            parse_tokens,
130            pos: 0,
131            expected: IndexSet::new(),
132            diagnostics,
133            builder: GreenNodeBuilder::new(),
134        }
135    }
136
137    pub fn parse(mut self) -> ParseResult {
138        document(&mut self);
139        ParseResult {
140            diagnostics: self.diagnostics,
141            node: SyntaxNode::new_root(self.builder.finish()),
142        }
143    }
144
145    #[cfg(test)]
146    pub(crate) fn parse_raw(self) -> ParseResult {
147        ParseResult {
148            diagnostics: self.diagnostics,
149            node: SyntaxNode::new_root(self.builder.finish()),
150        }
151    }
152
153    pub(crate) fn checkpoint(&mut self) -> Checkpoint {
154        self.builder.checkpoint()
155    }
156
157    pub(crate) fn start(&mut self, kind: SyntaxKind) {
158        self.builder.start_node(RueLang::kind_to_raw(kind));
159    }
160
161    pub(crate) fn start_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
162        self.builder
163            .start_node_at(checkpoint, RueLang::kind_to_raw(kind));
164    }
165
166    pub(crate) fn finish(&mut self) {
167        self.eat_trivia();
168        self.builder.finish_node();
169    }
170
171    pub(crate) fn at_any(&mut self, kinds: &[SyntaxKind]) -> Option<SyntaxKind> {
172        for kind in kinds
173            .iter()
174            .sorted_by_key(|kind| Reverse(kind.split().len()))
175        {
176            if self.at(*kind) {
177                return Some(*kind);
178            }
179        }
180        None
181    }
182
183    pub(crate) fn at(&mut self, kind: SyntaxKind) -> bool {
184        self.eat_trivia();
185
186        self.expected.insert(kind);
187
188        let split = kind.split();
189
190        for (i, kind) in split.iter().enumerate() {
191            if self.nth(i) != *kind {
192                return false;
193            }
194        }
195
196        true
197    }
198
199    pub(crate) fn try_eat(&mut self, kind: SyntaxKind) -> bool {
200        if self.at(kind) {
201            self.bump(kind);
202            true
203        } else {
204            false
205        }
206    }
207
208    pub(crate) fn expect(&mut self, kind: SyntaxKind) {
209        if self.at(kind) {
210            self.bump(kind);
211        } else {
212            self.skip();
213        }
214    }
215
216    fn nth(&self, n: usize) -> SyntaxKind {
217        self.parse_tokens
218            .get(self.pos + n)
219            .map_or(SyntaxKind::Eof, |token| token.kind)
220    }
221
222    fn eat_trivia(&mut self) {
223        while self.nth(0).is_trivia() {
224            self.bump(self.nth(0));
225        }
226    }
227
228    pub(crate) fn skip(&mut self) {
229        let expected = mem::take(&mut self.expected);
230
231        let len = self.source.text.len();
232
233        let span = self
234            .parse_tokens
235            .get(self.pos)
236            .map_or(len..len, |token| token.span.clone());
237
238        self.diagnostics.push(Diagnostic::new(
239            SrcLoc::new(self.source.clone(), span),
240            DiagnosticKind::UnexpectedToken(
241                self.nth(0).to_string(),
242                expected.iter().map(ToString::to_string).collect(),
243            ),
244        ));
245
246        if self.pos < self.parse_tokens.len() {
247            self.pos += 1;
248        }
249    }
250
251    fn bump(&mut self, kind: SyntaxKind) {
252        self.expected.clear();
253
254        let len = kind.split().len();
255
256        let span =
257            self.parse_tokens[self.pos].span.start..self.parse_tokens[self.pos + len - 1].span.end;
258
259        self.builder
260            .token(RueLang::kind_to_raw(kind), &self.source.text[span]);
261
262        self.pos += len;
263    }
264}