Skip to main content

java_lang/
parse.rs

1use std::cell::{Cell, RefCell};
2
3use crate::{
4    ast::{Comment, CommentKind},
5    error::{Error, Result},
6    ident::Ident,
7    lexer,
8    span::Span,
9    token::{Token, TokenKind},
10};
11
12/// Saved parser state for speculative parsing with backtracking.
13pub struct ParserState {
14    cursor: usize,
15    pending_gts: u8,
16}
17
18/// The parsing cursor, similar to syn's `ParseBuffer`.
19///
20/// This is the primary interface for consuming tokens during parsing.
21pub struct ParseStream<'a> {
22    tokens: &'a [Token],
23    cursor: Cell<usize>,
24    errors: RefCell<Vec<Error>>,
25    /// Pending greater-than tokens from splitting >> or >>> for nested generics
26    pending_gts: Cell<u8>,
27    /// Pre-allocated synthetic `>` token for >> / >>> splitting
28    synthetic_gt: Token,
29    /// Comments skipped by skip_comments(), available for collection
30    pending_comments: RefCell<Vec<Comment>>,
31}
32
33impl<'a> ParseStream<'a> {
34    pub(crate) fn new(tokens: &'a [Token]) -> Self {
35        ParseStream {
36            tokens,
37            cursor: Cell::new(0),
38            errors: RefCell::new(Vec::new()),
39            pending_gts: Cell::new(0),
40            synthetic_gt: Token {
41                kind: TokenKind::Gt,
42                span: Span::new(0, 0),
43            },
44            pending_comments: RefCell::new(Vec::new()),
45        }
46    }
47
48    /// Returns true if there are no more tokens to parse (except EOF).
49    pub fn is_empty(&self) -> bool {
50        self.skip_comments();
51        let cursor = self.cursor.get();
52        cursor >= self.tokens.len() - 1
53    }
54
55    /// Returns true if there are no more tokens (including comments) except EOF.
56    pub fn is_empty_raw(&self) -> bool {
57        let cursor = self.cursor.get();
58        cursor >= self.tokens.len() - 1
59    }
60
61    /// Peek at the current token without consuming it.
62    /// Comment tokens are skipped transparently.
63    pub fn peek(&self) -> &Token {
64        if self.pending_gts.get() > 0 {
65            return &self.synthetic_gt;
66        }
67        self.skip_comments();
68        let cursor = self.cursor.get();
69        &self.tokens[cursor.min(self.tokens.len() - 1)]
70    }
71
72    /// Peek at the raw token at the cursor without skipping comments.
73    pub fn peek_raw(&self) -> &Token {
74        let cursor = self.cursor.get();
75        &self.tokens[cursor.min(self.tokens.len() - 1)]
76    }
77
78    /// Get the current cursor position.
79    pub fn cursor(&self) -> usize {
80        self.cursor.get()
81    }
82
83    /// Set the current cursor position (used for backtracking).
84    pub fn set_cursor(&self, pos: usize) {
85        self.cursor.set(pos);
86    }
87
88    /// Save the full parser state (cursor + pending_gts) for speculative parsing.
89    pub fn save_state(&self) -> ParserState {
90        ParserState {
91            cursor: self.cursor.get(),
92            pending_gts: self.pending_gts.get(),
93        }
94    }
95
96    /// Restore the full parser state (cursor + pending_gts) after speculative parsing.
97    pub fn restore_state(&self, state: ParserState) {
98        self.cursor.set(state.cursor);
99        self.pending_gts.set(state.pending_gts);
100    }
101
102    /// Advance past the current token and return it.
103    /// Comment tokens are skipped transparently.
104    fn advance(&self) -> &Token {
105        if self.pending_gts.get() > 0 {
106            self.pending_gts.set(self.pending_gts.get() - 1);
107            return &self.synthetic_gt;
108        }
109        let cursor = self.cursor.get();
110        let tok = &self.tokens[cursor];
111        if cursor < self.tokens.len() - 1 {
112            self.cursor.set(cursor + 1);
113        }
114        self.skip_comments();
115        tok
116    }
117
118    /// Push an error to the error list.
119    pub fn error<T: std::fmt::Display>(&self, span: Span, msg: T) -> Error {
120        let err = Error::new(span, msg);
121        self.errors.borrow_mut().push(err.clone());
122        err
123    }
124
125    /// Split a >> token into two > tokens for nested generic parsing.
126    pub fn split_gt(&self) {
127        let cursor = self.cursor.get();
128        let kind = &self.tokens[cursor.min(self.tokens.len() - 1)].kind;
129        match kind {
130            TokenKind::GtGt => {
131                self.advance();
132                self.pending_gts.set(self.pending_gts.get() + 1);
133            }
134            TokenKind::GtGtGt => {
135                self.advance();
136                self.pending_gts.set(self.pending_gts.get() + 2);
137            }
138            _ => {}
139        }
140    }
141
142    /// Consume and return the current token regardless of its kind.
143    pub fn next(&self) -> &Token {
144        self.advance()
145    }
146
147    /// Check if the current token matches the given kind.
148    pub fn is(&self, kind: &TokenKind) -> bool {
149        &self.peek().kind == kind
150    }
151
152    /// Check if the current token is an identifier with the given name.
153    pub fn is_ident(&self, name: &str) -> bool {
154        match &self.peek().kind {
155            TokenKind::Ident(s) => s == name,
156            _ => false,
157        }
158    }
159
160    /// Check if the current token is any identifier (including contextual keywords).
161    pub fn is_any_ident(&self) -> bool {
162        matches!(
163            &self.peek().kind,
164            TokenKind::Ident(_)
165                | TokenKind::Record
166                | TokenKind::Sealed
167                | TokenKind::Var
168                | TokenKind::Yield
169                | TokenKind::Open
170                | TokenKind::Provides
171                | TokenKind::Requires
172                | TokenKind::Uses
173                | TokenKind::With
174                | TokenKind::When
175                | TokenKind::To
176                | TokenKind::Exports
177                | TokenKind::Opens
178                | TokenKind::Transitive
179                | TokenKind::Permits
180                | TokenKind::NonSealed
181                | TokenKind::Module
182                | TokenKind::Byte
183                | TokenKind::Short
184                | TokenKind::Int
185                | TokenKind::Long
186                | TokenKind::Char
187                | TokenKind::Float
188                | TokenKind::Double
189                | TokenKind::Boolean
190                | TokenKind::Void
191        )
192    }
193
194    /// Check if the current token can be used as a type name.
195    pub fn is_type_ident(&self) -> bool {
196        self.is_any_ident() || self.is(&TokenKind::At)
197    }
198
199    /// Check if the current token is a given keyword.
200    pub fn is_keyword(&self, kind: TokenKind) -> bool {
201        self.peek().kind == kind
202    }
203
204    /// Consume the next token if it matches the expected kind.
205    pub fn eat(&self, expected: &TokenKind) -> bool {
206        if self.is(expected) {
207            self.advance();
208            true
209        } else {
210            false
211        }
212    }
213
214    /// If the current token matches the given kind, consume it.
215    /// Otherwise, report an error.
216    pub fn expect(&self, kind: TokenKind) -> Result<()> {
217        if self.is(&kind) {
218            self.advance();
219            Ok(())
220        } else {
221            Err(Error::expected_token(self.peek().span, &kind.to_string()))
222        }
223    }
224
225    /// Look ahead `n` tokens (skipping comments).
226    pub fn look_ahead(&self, n: usize) -> &Token {
227        let mut pos = self.cursor.get();
228        let mut remaining = n;
229        while pos < self.tokens.len() - 1 {
230            if !is_comment_token(&self.tokens[pos].kind) {
231                if remaining == 0 {
232                    break;
233                }
234                remaining -= 1;
235            }
236            pos += 1;
237        }
238        &self.tokens[pos.min(self.tokens.len() - 1)]
239    }
240
241    /// Parse a comma-separated list of items terminated by some token.
242    pub fn parse_terminated<T, F>(&self, mut parse_item: F) -> Result<Vec<T>>
243    where
244        F: FnMut(&ParseStream) -> Result<T>,
245    {
246        let mut items = Vec::new();
247        if self.is_empty() || !can_start_item(&self.peek().kind) {
248            return Ok(items);
249        }
250        loop {
251            items.push(parse_item(self)?);
252            if !self.eat(&TokenKind::Comma) {
253                break;
254            }
255        }
256        Ok(items)
257    }
258
259    /// Parse zero or more items separated by commas, not requiring a terminator.
260    pub fn parse_separated<T, F>(
261        &self,
262        can_start_fn: fn(&TokenKind) -> bool,
263        mut parse_item: F,
264    ) -> Result<Vec<T>>
265    where
266        F: FnMut(&ParseStream) -> Result<T>,
267    {
268        let mut items = Vec::new();
269        if self.is_empty() || !can_start_fn(&self.peek().kind) {
270            return Ok(items);
271        }
272        loop {
273            items.push(parse_item(self)?);
274            if !self.eat(&TokenKind::Comma) {
275                break;
276            }
277            if self.is_empty() {
278                break;
279            }
280        }
281        Ok(items)
282    }
283
284    /// Try to parse something. If parsing fails, revert the cursor.
285    pub fn try_parse<T, F>(&self, f: F) -> Option<T>
286    where
287        F: FnOnce(&ParseStream) -> Result<T>,
288    {
289        let saved = self.cursor.get();
290        match f(self) {
291            Ok(t) => Some(t),
292            Err(_) => {
293                self.cursor.set(saved);
294                None
295            }
296        }
297    }
298
299    /// Parse something inside parentheses.
300    pub fn parse_parenthesized<T, F>(&self, mut f: F) -> Result<T>
301    where
302        F: FnMut(&ParseStream) -> Result<T>,
303    {
304        self.expect(TokenKind::LParen)?;
305        let result = f(self)?;
306        self.expect(TokenKind::RParen)?;
307        Ok(result)
308    }
309
310    /// Parse any type that implements `Parse`.
311    pub fn parse<T: Parse>(&self) -> Result<T> {
312        T::parse(self)
313    }
314
315    /// Parse something inside braces.
316    pub fn parse_braced<T, F>(&self, mut f: F) -> Result<T>
317    where
318        F: FnMut(&ParseStream) -> Result<T>,
319    {
320        self.expect(TokenKind::LBrace)?;
321        let result = f(self)?;
322        self.expect(TokenKind::RBrace)?;
323        Ok(result)
324    }
325
326    /// Parse something inside brackets.
327    pub fn parse_bracketed<T, F>(&self, mut f: F) -> Result<T>
328    where
329        F: FnMut(&ParseStream) -> Result<T>,
330    {
331        self.expect(TokenKind::LBracket)?;
332        let result = f(self)?;
333        self.expect(TokenKind::RBracket)?;
334        Ok(result)
335    }
336
337    /// Consume the expected token, then return the span of the raw token at the cursor.
338    /// Unlike `expect(kind); peek().span`, this does NOT skip comments after consuming.
339    pub fn expect_then_raw_span(&self, kind: TokenKind) -> Result<Span> {
340        self.expect(kind)?;
341        Ok(self.peek_raw().span)
342    }
343    /// Also accepts contextual keywords (record, sealed, var, yield, open, etc.)
344    pub fn parse_ident(&self) -> Result<Ident> {
345        match &self.peek().kind {
346            TokenKind::Ident(s) => {
347                let span = self.peek().span;
348                self.advance();
349                Ok(Ident::new(s.clone(), span))
350            }
351            TokenKind::Record
352            | TokenKind::Sealed
353            | TokenKind::Var
354            | TokenKind::Yield
355            | TokenKind::Open
356            | TokenKind::Provides
357            | TokenKind::Requires
358            | TokenKind::Uses
359            | TokenKind::With
360            | TokenKind::When
361            | TokenKind::To
362            | TokenKind::Exports
363            | TokenKind::Opens
364            | TokenKind::Transitive
365            | TokenKind::Permits
366            | TokenKind::NonSealed
367            | TokenKind::Module
368            | TokenKind::Byte
369            | TokenKind::Short
370            | TokenKind::Int
371            | TokenKind::Long
372            | TokenKind::Char
373            | TokenKind::Float
374            | TokenKind::Double
375            | TokenKind::Boolean
376            | TokenKind::Void => {
377                let name = format!("{}", self.peek().kind);
378                let span = self.peek().span;
379                self.advance();
380                Ok(Ident::new(name, span))
381            }
382            _other => Err(Error::expected_token(self.peek().span, "identifier")),
383        }
384    }
385
386    /// Take any errors accumulated during parsing.
387    pub fn take_errors(&self) -> Vec<Error> {
388        self.errors.borrow_mut().drain(..).collect()
389    }
390
391    /// Create a span covering from the given start to the current position.
392    pub fn span_since(&self, start: Span) -> Span {
393        let end = if self.cursor.get() > 0 {
394            self.tokens[self.cursor.get() - 1].span
395        } else {
396            start
397        };
398        start.join(end)
399    }
400
401    /// Skip past any comment tokens at the current cursor position.
402    /// This is the public version for explicit comment skipping.
403    pub fn skip_comments_to_peek(&self) {
404        self.skip_comments();
405    }
406
407    fn skip_comments(&self) {
408        while self.cursor.get() < self.tokens.len()
409            && is_comment_token(&self.tokens[self.cursor.get()].kind)
410        {
411            let tok = &self.tokens[self.cursor.get()];
412            self.pending_comments
413                .borrow_mut()
414                .push(token_to_comment(tok));
415            self.cursor.set(self.cursor.get() + 1);
416        }
417    }
418
419    /// Collect pending doc comments (skipped by peek/advance).
420    /// Returns only doc comments (/// and /** */), discards regular comments.
421    pub fn collect_pending_doc_comments(&self) -> Vec<Comment> {
422        let all = self
423            .pending_comments
424            .borrow_mut()
425            .drain(..)
426            .collect::<Vec<_>>();
427        all.into_iter()
428            .filter(|c| c.kind == CommentKind::DocLine || c.kind == CommentKind::DocBlock)
429            .collect()
430    }
431
432    /// Collect all pending comments (skipped by peek/advance).
433    pub fn collect_pending_comments(&self) -> Vec<Comment> {
434        self.pending_comments.borrow_mut().drain(..).collect()
435    }
436
437    /// Collect and consume leading doc comments (/// and /** */).
438    /// Regular comments (// and /* */) are skipped.
439    pub fn collect_leading_doc_comments(&self) -> Vec<Comment> {
440        let mut comments = Vec::new();
441        while self.cursor.get() < self.tokens.len() {
442            match &self.tokens[self.cursor.get()].kind {
443                TokenKind::DocLineComment(_) | TokenKind::DocBlockComment(_) => {
444                    let tok = &self.tokens[self.cursor.get()];
445                    comments.push(token_to_comment(tok));
446                    self.cursor.set(self.cursor.get() + 1);
447                }
448                TokenKind::LineComment(_) | TokenKind::BlockComment(_) => {
449                    // Skip regular comments
450                    self.cursor.set(self.cursor.get() + 1);
451                }
452                _ => break,
453            }
454        }
455        comments
456    }
457
458    /// Collect and consume all leading comments (both doc and regular).
459    pub fn collect_leading_comments(&self) -> Vec<Comment> {
460        let mut comments = Vec::new();
461        while self.cursor.get() < self.tokens.len() {
462            match &self.tokens[self.cursor.get()].kind {
463                TokenKind::LineComment(_)
464                | TokenKind::BlockComment(_)
465                | TokenKind::DocLineComment(_)
466                | TokenKind::DocBlockComment(_) => {
467                    let tok = &self.tokens[self.cursor.get()];
468                    comments.push(token_to_comment(tok));
469                    self.cursor.set(self.cursor.get() + 1);
470                }
471                _ => break,
472            }
473        }
474        comments
475    }
476}
477
478fn is_comment_token(kind: &TokenKind) -> bool {
479    matches!(
480        kind,
481        TokenKind::LineComment(_)
482            | TokenKind::BlockComment(_)
483            | TokenKind::DocLineComment(_)
484            | TokenKind::DocBlockComment(_)
485    )
486}
487
488fn token_to_comment(tok: &Token) -> Comment {
489    let kind = match &tok.kind {
490        TokenKind::DocLineComment(_) => CommentKind::DocLine,
491        TokenKind::DocBlockComment(_) => CommentKind::DocBlock,
492        TokenKind::LineComment(_) => CommentKind::Line,
493        TokenKind::BlockComment(_) => CommentKind::Block,
494        _ => unreachable!(),
495    };
496    Comment {
497        kind,
498        span: tok.span,
499    }
500}
501
502fn can_start_item(kind: &TokenKind) -> bool {
503    !matches!(
504        kind,
505        TokenKind::Eof
506            | TokenKind::RParen
507            | TokenKind::RBrace
508            | TokenKind::RBracket
509            | TokenKind::Semicolon
510            | TokenKind::Comma
511    )
512}
513
514/// A trait for types that can be parsed from a `ParseStream`.
515///
516/// This is the core parsing trait, analogous to syn's `Parse` trait.
517///
518/// # Example
519///
520/// ```
521/// use java_lang::{Parse, ParseStream, parse_str, Ident};
522///
523/// struct SimpleName {
524///     name: Ident,
525/// }
526///
527/// impl Parse for SimpleName {
528///     fn parse(input: &ParseStream) -> java_lang::Result<Self> {
529///         Ok(SimpleName {
530///             name: input.parse_ident()?,
531///         })
532///     }
533/// }
534/// ```
535pub trait Parse: Sized {
536    /// Parse this type from the given `ParseStream`.
537    fn parse(input: &ParseStream) -> Result<Self>;
538}
539
540impl Parse for Ident {
541    fn parse(input: &ParseStream) -> Result<Self> {
542        input.parse_ident()
543    }
544}
545
546/// Parse a string into a value of type `T` that implements `Parse`.
547pub fn parse_str<T: Parse>(s: &str) -> Result<T> {
548    let tokens = lexer::tokenize(s);
549    let stream = ParseStream::new(&tokens);
550    let result = T::parse(&stream)?;
551    // Check for trailing tokens
552    if !stream.is_empty() {
553        return Err(Error::new(stream.peek().span, "unexpected trailing tokens"));
554    }
555    Ok(result)
556}
557
558/// Parse a string into a value of type `T` without checking for trailing tokens.
559pub fn parse<T: Parse>(s: &str) -> Result<T> {
560    let tokens = lexer::tokenize(s);
561    let stream = ParseStream::new(&tokens);
562    T::parse(&stream)
563}
564
565/// Parse a Java source file into a value of type `T`.
566pub fn parse_file<T: Parse>(path: &std::path::Path) -> Result<T> {
567    let content = std::fs::read_to_string(path)
568        .map_err(|e| Error::new(Span::call_site(), format!("failed to read file: {}", e)))?;
569    parse_str(&content)
570}
571
572/// Peek at the next token and check if it matches.
573#[macro_export]
574macro_rules! peek {
575    ($stream:expr, $kind:ident) => {
576        $stream.is(&$crate::token::TokenKind::$kind)
577    };
578}
579
580/// Peek at the next token and check if it is an identifier.
581#[macro_export]
582macro_rules! peek_ident {
583    ($stream:expr) => {
584        $stream.is_any_ident()
585    };
586}
587
588/// Optionally parse something. Returns `None` if the next token doesn't match.
589#[macro_export]
590macro_rules! opt {
591    ($stream:expr, $method:ident $(, $arg:expr)*) => {
592        if $stream.is_empty() {
593            None
594        } else {
595            match $stream.$method($($arg),*) {
596                Ok(val) => Some(val),
597                Err(_) => None,
598            }
599        }
600    };
601}