rusty-javac 0.2.2

A Java compiler written in Rust.
Documentation
use crate::ast::JavaSyntaxKind;
use crate::diagnostics::Diagnostic;
use crate::lexer::Lexer;
use crate::parser::display::token_display;
use rowan::GreenNodeBuilder;
use text_size::{TextRange, TextSize};

pub struct Parse {
    pub green_node: rowan::GreenNode,
    pub errors: Vec<ParseError>,
}

#[derive(Debug, Clone)]
pub struct ParseError {
    pub message: String,
    pub offset: usize,
    pub len: usize,
    pub label: String,
    pub help: Option<String>,
}

impl ParseError {
    pub fn diagnostic(&self) -> Diagnostic {
        Diagnostic::error(self.message.clone(), self.range())
            .with_code("P0001")
            .with_primary_label(self.label.clone())
            .with_help(
                self.help
                    .clone()
                    .unwrap_or_else(|| "check the token at the highlighted position".to_string()),
            )
    }

    fn range(&self) -> TextRange {
        let start = TextSize::from(self.offset.min(u32::MAX as usize) as u32);
        let end = TextSize::from((self.offset + self.len).min(u32::MAX as usize) as u32);
        TextRange::new(start, end)
    }
}

pub(crate) struct Token {
    pub(crate) kind: JavaSyntaxKind,
    pub(crate) text: String,
    pub(crate) offset: usize,
}

pub struct Parser {
    pub(crate) source: String,
    pub(crate) tokens: Vec<Token>,
    pub(crate) pos: usize,
    trivia_end: usize,
    pub(crate) builder: GreenNodeBuilder<'static>,
    pub(crate) errors: Vec<ParseError>,
}

impl Parser {
    pub fn parse(source: &str) -> Parse {
        let lexer = Lexer::new(source);
        let tokens: Vec<_> = lexer
            .map(|t| Token {
                kind: t.kind,
                text: t.text,
                offset: u32::from(t.range.start()) as usize,
            })
            .collect();

        let mut parser = Parser {
            source: source.to_string(),
            tokens,
            pos: 0,
            trivia_end: 0,
            builder: GreenNodeBuilder::new(),
            errors: Vec::new(),
        };

        crate::parser::top_level::compilation_unit(&mut parser);
        let green_node = parser.builder.finish();

        Parse {
            green_node,
            errors: parser.errors,
        }
    }

    pub(crate) fn start(&mut self) -> Marker {
        let _pos = self.pos;
        let checkpoint = self.builder.checkpoint();
        Marker { _pos, checkpoint }
    }

    pub(crate) fn kind(&self) -> JavaSyntaxKind {
        self.tokens
            .get(self.pos)
            .map(|t| t.kind)
            .unwrap_or(JavaSyntaxKind::Error)
    }

    pub(crate) fn look(&self, ahead: usize) -> JavaSyntaxKind {
        self.tokens
            .get(self.pos + ahead)
            .map(|t| t.kind)
            .unwrap_or(JavaSyntaxKind::Error)
    }

    pub(crate) fn at(&self, k: JavaSyntaxKind) -> bool {
        self.kind() == k
    }

    pub(crate) fn at_any(&self, ks: &[JavaSyntaxKind]) -> bool {
        ks.contains(&self.kind())
    }

    pub(crate) fn bump(&mut self) {
        if self.pos < self.tokens.len() {
            let tok = &self.tokens[self.pos];
            if self.trivia_end < tok.offset {
                self.builder.token(
                    JavaSyntaxKind::Whitespace.into(),
                    &self.source[self.trivia_end..tok.offset],
                );
            }
            self.builder.token(tok.kind.into(), tok.text.as_str());
            self.trivia_end = tok.offset + tok.text.len();
            self.pos += 1;
        }
    }

    pub(crate) fn expect(&mut self, k: JavaSyntaxKind) {
        if self.at(k) {
            self.bump();
        } else {
            self.err_expected(k);
        }
    }

    pub(crate) fn eat(&mut self, k: JavaSyntaxKind) -> bool {
        if self.at(k) {
            self.bump();
            true
        } else {
            false
        }
    }

    pub(crate) fn err(&mut self, msg: impl Into<String>) {
        let msg = msg.into();
        let (offset, len) = self.current_span();
        let found = token_display(self.kind());
        self.errors.push(ParseError {
            message: msg,
            offset,
            len,
            label: format!("found {found}"),
            help: None,
        });
    }

    pub(crate) fn err_and_bump(&mut self, msg: impl Into<String>) {
        self.err(msg);
        self.bump();
    }

    fn err_expected(&mut self, expected: JavaSyntaxKind) {
        let (offset, len) = self.current_span();
        let expected = token_display(expected);
        let found = token_display(self.kind());
        self.errors.push(ParseError {
            message: format!("expected {expected}, found {found}"),
            offset,
            len,
            label: format!("expected {expected} here"),
            help: Some(format!("insert {expected} or remove {found}")),
        });
    }

    fn current_span(&self) -> (usize, usize) {
        self.tokens
            .get(self.pos)
            .map(|token| (token.offset, token.text.len().max(1)))
            .unwrap_or_else(|| (self.source.len(), 0))
    }

    pub(crate) fn lookahead(&self) -> Lookahead<'_> {
        Lookahead {
            tokens: &self.tokens,
            pos: self.pos,
        }
    }
}

pub(crate) struct Marker {
    _pos: usize,
    checkpoint: rowan::Checkpoint,
}

impl Marker {
    pub(crate) fn complete(self, p: &mut Parser, kind: JavaSyntaxKind) {
        p.builder.start_node_at(self.checkpoint, kind.into());
        p.builder.finish_node();
    }

    pub(crate) fn abandon(self, _p: &mut Parser) {}
}

pub(crate) struct Lookahead<'a> {
    tokens: &'a [Token],
    pos: usize,
}

impl<'a> Lookahead<'a> {
    pub(crate) fn at(&self, kind: JavaSyntaxKind) -> bool {
        self.kind() == kind
    }

    pub(crate) fn kind(&self) -> JavaSyntaxKind {
        self.tokens
            .get(self.pos)
            .map(|t| t.kind)
            .unwrap_or(JavaSyntaxKind::Error)
    }

    pub(crate) fn at_any(&self, ks: &[JavaSyntaxKind]) -> bool {
        ks.contains(&self.kind())
    }

    pub(crate) fn advance(&mut self) {
        if self.pos < self.tokens.len() {
            self.pos += 1;
        }
    }

    pub(crate) fn eat(&mut self, kind: JavaSyntaxKind) -> bool {
        if self.at(kind) {
            self.advance();
            true
        } else {
            false
        }
    }

    pub(crate) fn skip_balanced(&mut self, open: JavaSyntaxKind, close: JavaSyntaxKind) {
        if !self.eat(open) {
            return;
        }
        let mut depth = 1usize;
        while depth > 0 && self.pos < self.tokens.len() {
            if self.at(open) {
                depth += 1;
            } else if self.at(close) {
                depth -= 1;
            }
            self.advance();
        }
    }

    pub(crate) fn skip_annotations(&mut self) {
        use JavaSyntaxKind::*;
        while self.eat(At) {
            self.eat(Ident);
            self.skip_balanced(LParen, RParen);
        }
    }

    pub(crate) fn skip_trivia(&mut self) {
        use JavaSyntaxKind::*;
        while self.pos < self.tokens.len()
            && matches!(self.tokens[self.pos].kind, Whitespace | Comment)
        {
            self.pos += 1;
        }
    }

    pub(crate) fn skip_type(&mut self) {
        use JavaSyntaxKind::*;
        let primitives = [
            IntKw, LongKw, ShortKw, ByteKw, CharKw, FloatKw, DoubleKw, BooleanKw, VoidKw,
        ];
        if self.at_any(&primitives) {
            self.advance();
        } else {
            while self.eat(Ident) {
                self.skip_balanced(Lt, Gt);
                if !self.eat(Dot) {
                    break;
                }
            }
        }
    }

    pub(crate) fn skip_array_dims(&mut self) {
        use JavaSyntaxKind::*;
        while self.at(LBrack)
            && self
                .tokens
                .get(self.pos + 1)
                .is_some_and(|t| t.kind == RBrack)
        {
            self.pos += 2;
        }
    }
}