Skip to main content

rusty_javac/parser/
core.rs

1use crate::ast::JavaSyntaxKind;
2use crate::diagnostics::Diagnostic;
3use crate::lexer::Lexer;
4use crate::parser::display::token_display;
5use rowan::GreenNodeBuilder;
6use text_size::{TextRange, TextSize};
7
8pub struct Parse {
9    pub green_node: rowan::GreenNode,
10    pub errors: Vec<ParseError>,
11}
12
13#[derive(Debug, Clone)]
14pub struct ParseError {
15    pub message: String,
16    pub offset: usize,
17    pub len: usize,
18    pub label: String,
19    pub help: Option<String>,
20}
21
22impl ParseError {
23    pub fn diagnostic(&self) -> Diagnostic {
24        Diagnostic::error(self.message.clone(), self.range())
25            .with_code("P0001")
26            .with_primary_label(self.label.clone())
27            .with_help(
28                self.help
29                    .clone()
30                    .unwrap_or_else(|| "check the token at the highlighted position".to_string()),
31            )
32    }
33
34    fn range(&self) -> TextRange {
35        let start = TextSize::from(self.offset.min(u32::MAX as usize) as u32);
36        let end = TextSize::from((self.offset + self.len).min(u32::MAX as usize) as u32);
37        TextRange::new(start, end)
38    }
39}
40
41pub(crate) struct Token {
42    pub(crate) kind: JavaSyntaxKind,
43    pub(crate) text: String,
44    pub(crate) offset: usize,
45}
46
47pub struct Parser {
48    pub(crate) source: String,
49    pub(crate) tokens: Vec<Token>,
50    pub(crate) pos: usize,
51    trivia_end: usize,
52    pub(crate) builder: GreenNodeBuilder<'static>,
53    pub(crate) errors: Vec<ParseError>,
54}
55
56impl Parser {
57    pub fn parse(source: &str) -> Parse {
58        let lexer = Lexer::new(source);
59        let tokens: Vec<_> = lexer
60            .map(|t| Token {
61                kind: t.kind,
62                text: t.text,
63                offset: u32::from(t.range.start()) as usize,
64            })
65            .collect();
66
67        let mut parser = Parser {
68            source: source.to_string(),
69            tokens,
70            pos: 0,
71            trivia_end: 0,
72            builder: GreenNodeBuilder::new(),
73            errors: Vec::new(),
74        };
75
76        crate::parser::top_level::compilation_unit(&mut parser);
77        let green_node = parser.builder.finish();
78
79        Parse {
80            green_node,
81            errors: parser.errors,
82        }
83    }
84
85    pub(crate) fn start(&mut self) -> Marker {
86        let _pos = self.pos;
87        let checkpoint = self.builder.checkpoint();
88        Marker { _pos, checkpoint }
89    }
90
91    pub(crate) fn kind(&self) -> JavaSyntaxKind {
92        self.tokens
93            .get(self.pos)
94            .map(|t| t.kind)
95            .unwrap_or(JavaSyntaxKind::Error)
96    }
97
98    pub(crate) fn look(&self, ahead: usize) -> JavaSyntaxKind {
99        self.tokens
100            .get(self.pos + ahead)
101            .map(|t| t.kind)
102            .unwrap_or(JavaSyntaxKind::Error)
103    }
104
105    pub(crate) fn at(&self, k: JavaSyntaxKind) -> bool {
106        self.kind() == k
107    }
108
109    pub(crate) fn at_any(&self, ks: &[JavaSyntaxKind]) -> bool {
110        ks.contains(&self.kind())
111    }
112
113    pub(crate) fn bump(&mut self) {
114        if self.pos < self.tokens.len() {
115            let tok = &self.tokens[self.pos];
116            if self.trivia_end < tok.offset {
117                self.builder.token(
118                    JavaSyntaxKind::Whitespace.into(),
119                    &self.source[self.trivia_end..tok.offset],
120                );
121            }
122            self.builder.token(tok.kind.into(), tok.text.as_str());
123            self.trivia_end = tok.offset + tok.text.len();
124            self.pos += 1;
125        }
126    }
127
128    pub(crate) fn expect(&mut self, k: JavaSyntaxKind) {
129        if self.at(k) {
130            self.bump();
131        } else {
132            self.err_expected(k);
133        }
134    }
135
136    pub(crate) fn eat(&mut self, k: JavaSyntaxKind) -> bool {
137        if self.at(k) {
138            self.bump();
139            true
140        } else {
141            false
142        }
143    }
144
145    pub(crate) fn err(&mut self, msg: impl Into<String>) {
146        let msg = msg.into();
147        let (offset, len) = self.current_span();
148        let found = token_display(self.kind());
149        self.errors.push(ParseError {
150            message: msg,
151            offset,
152            len,
153            label: format!("found {found}"),
154            help: None,
155        });
156    }
157
158    pub(crate) fn err_and_bump(&mut self, msg: impl Into<String>) {
159        self.err(msg);
160        self.bump();
161    }
162
163    fn err_expected(&mut self, expected: JavaSyntaxKind) {
164        let (offset, len) = self.current_span();
165        let expected = token_display(expected);
166        let found = token_display(self.kind());
167        self.errors.push(ParseError {
168            message: format!("expected {expected}, found {found}"),
169            offset,
170            len,
171            label: format!("expected {expected} here"),
172            help: Some(format!("insert {expected} or remove {found}")),
173        });
174    }
175
176    fn current_span(&self) -> (usize, usize) {
177        self.tokens
178            .get(self.pos)
179            .map(|token| (token.offset, token.text.len().max(1)))
180            .unwrap_or_else(|| (self.source.len(), 0))
181    }
182
183    pub(crate) fn lookahead(&self) -> Lookahead<'_> {
184        Lookahead {
185            tokens: &self.tokens,
186            pos: self.pos,
187        }
188    }
189}
190
191pub(crate) struct Marker {
192    _pos: usize,
193    checkpoint: rowan::Checkpoint,
194}
195
196impl Marker {
197    pub(crate) fn complete(self, p: &mut Parser, kind: JavaSyntaxKind) {
198        p.builder.start_node_at(self.checkpoint, kind.into());
199        p.builder.finish_node();
200    }
201
202    pub(crate) fn abandon(self, _p: &mut Parser) {}
203}
204
205pub(crate) struct Lookahead<'a> {
206    tokens: &'a [Token],
207    pos: usize,
208}
209
210impl<'a> Lookahead<'a> {
211    pub(crate) fn at(&self, kind: JavaSyntaxKind) -> bool {
212        self.kind() == kind
213    }
214
215    pub(crate) fn kind(&self) -> JavaSyntaxKind {
216        self.tokens
217            .get(self.pos)
218            .map(|t| t.kind)
219            .unwrap_or(JavaSyntaxKind::Error)
220    }
221
222    pub(crate) fn at_any(&self, ks: &[JavaSyntaxKind]) -> bool {
223        ks.contains(&self.kind())
224    }
225
226    pub(crate) fn advance(&mut self) {
227        if self.pos < self.tokens.len() {
228            self.pos += 1;
229        }
230    }
231
232    pub(crate) fn eat(&mut self, kind: JavaSyntaxKind) -> bool {
233        if self.at(kind) {
234            self.advance();
235            true
236        } else {
237            false
238        }
239    }
240
241    pub(crate) fn skip_balanced(&mut self, open: JavaSyntaxKind, close: JavaSyntaxKind) {
242        if !self.eat(open) {
243            return;
244        }
245        let mut depth = 1usize;
246        while depth > 0 && self.pos < self.tokens.len() {
247            if self.at(open) {
248                depth += 1;
249            } else if self.at(close) {
250                depth -= 1;
251            }
252            self.advance();
253        }
254    }
255
256    pub(crate) fn skip_annotations(&mut self) {
257        use JavaSyntaxKind::*;
258        while self.eat(At) {
259            self.eat(Ident);
260            self.skip_balanced(LParen, RParen);
261        }
262    }
263
264    pub(crate) fn skip_trivia(&mut self) {
265        use JavaSyntaxKind::*;
266        while self.pos < self.tokens.len()
267            && matches!(self.tokens[self.pos].kind, Whitespace | Comment)
268        {
269            self.pos += 1;
270        }
271    }
272
273    pub(crate) fn skip_type(&mut self) {
274        use JavaSyntaxKind::*;
275        let primitives = [
276            IntKw, LongKw, ShortKw, ByteKw, CharKw, FloatKw, DoubleKw, BooleanKw, VoidKw,
277        ];
278        if self.at_any(&primitives) {
279            self.advance();
280        } else {
281            while self.eat(Ident) {
282                self.skip_balanced(Lt, Gt);
283                if !self.eat(Dot) {
284                    break;
285                }
286            }
287        }
288    }
289
290    pub(crate) fn skip_array_dims(&mut self) {
291        use JavaSyntaxKind::*;
292        while self.at(LBrack)
293            && self
294                .tokens
295                .get(self.pos + 1)
296                .is_some_and(|t| t.kind == RBrack)
297        {
298            self.pos += 2;
299        }
300    }
301}