#![doc = include_str!("readme.md")]
pub mod element_type;
pub use element_type::CppElementType;
use crate::{
language::CppLanguage,
lexer::{CppLexer, CppTokenType},
};
use oak_core::{
GreenNode, OakError,
parser::{Associativity, ParseCache, ParseOutput, Parser, ParserState, Pratt, PrattParser, parse_with_lexer},
source::{Source, TextEdit},
};
pub(crate) type State<'a, S> = ParserState<'a, CppLanguage, S>;
pub struct CppParser<'config> {
pub(crate) config: &'config CppLanguage,
}
impl<'config> CppParser<'config> {
pub fn new(config: &'config CppLanguage) -> Self {
Self { config }
}
fn parse_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
use crate::lexer::CppTokenType::*;
self.skip_trivia(state);
match state.peek_kind() {
Some(LeftBrace) => self.parse_compound_statement(state)?,
Some(Preprocessor) => {
while state.not_at_end() && !state.at(Newline) {
state.bump();
}
}
_ => {
while state.not_at_end() && !state.at(Semicolon) && !state.at(LeftBrace) && !state.at(RightBrace) {
state.bump();
}
if state.at(Semicolon) {
state.bump();
}
else if state.at(LeftBrace) {
self.parse_compound_statement(state)?;
}
}
}
Ok(())
}
fn skip_trivia<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
use crate::lexer::CppTokenType::*;
while let Some(kind) = state.peek_kind() {
if matches!(kind, Whitespace | Newline | Comment) {
state.bump();
}
else {
break;
}
}
}
fn parse_compound_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let cp = state.checkpoint();
if !state.eat(CppTokenType::LeftBrace) {
while state.not_at_end() && !state.at(CppTokenType::RightBrace) {
state.bump();
}
if state.at(CppTokenType::RightBrace) {
state.bump();
}
state.finish_at(cp, CppElementType::CompoundStatement);
return Ok(());
}
while state.not_at_end() && !state.at(CppTokenType::RightBrace) {
self.parse_statement(state)?;
}
if !state.eat(CppTokenType::RightBrace) {
while state.not_at_end() && !state.at(CppTokenType::Semicolon) && !state.at(CppTokenType::LeftBrace) {
state.bump();
}
}
state.finish_at(cp, CppElementType::CompoundStatement);
Ok(())
}
fn parse_if_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let cp = state.checkpoint();
state.bump(); self.skip_trivia(state);
state.expect(CppTokenType::LeftParen).ok();
while state.not_at_end() && !state.at(CppTokenType::RightParen) {
state.bump();
}
state.expect(CppTokenType::RightParen).ok();
self.parse_statement(state)?;
self.skip_trivia(state);
if state.at(CppTokenType::Keyword) {
state.bump();
self.parse_statement(state)?;
}
state.finish_at(cp, CppElementType::IfStatement);
Ok(())
}
fn parse_while_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let cp = state.checkpoint();
state.bump(); self.skip_trivia(state);
state.expect(CppTokenType::LeftParen).ok();
let expr = PrattParser::parse(state, 0, self);
state.push_child(expr);
state.expect(CppTokenType::RightParen).ok();
self.parse_statement(state)?;
state.finish_at(cp, CppElementType::WhileStatement);
Ok(())
}
fn parse_for_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let cp = state.checkpoint();
state.bump(); self.skip_trivia(state);
state.expect(CppTokenType::LeftParen).ok();
self.skip_trivia(state);
if !state.at(CppTokenType::Semicolon) {
let expr = PrattParser::parse(state, 0, self);
state.push_child(expr);
}
state.expect(CppTokenType::Semicolon).ok();
self.skip_trivia(state);
if !state.at(CppTokenType::Semicolon) {
let expr = PrattParser::parse(state, 0, self);
state.push_child(expr);
}
state.expect(CppTokenType::Semicolon).ok();
self.skip_trivia(state);
if !state.at(CppTokenType::RightParen) {
let expr = PrattParser::parse(state, 0, self);
state.push_child(expr);
}
state.expect(CppTokenType::RightParen).ok();
self.parse_statement(state)?;
state.finish_at(cp, CppElementType::ForStatement);
Ok(())
}
fn parse_return_statement<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let cp = state.checkpoint();
state.bump(); self.skip_trivia(state);
if !state.at(CppTokenType::Semicolon) {
let expr = PrattParser::parse(state, 0, self);
state.push_child(expr);
}
state.eat(CppTokenType::Semicolon);
state.finish_at(cp, CppElementType::ReturnStatement);
Ok(())
}
fn parse_declaration<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let cp = state.checkpoint();
while state.not_at_end() && !state.at(CppTokenType::Semicolon) && !state.at(CppTokenType::LeftParen) {
state.bump();
}
if state.at(CppTokenType::LeftParen) {
self.parse_function_definition(state)?;
}
else {
while state.not_at_end() && !state.at(CppTokenType::Semicolon) {
state.bump();
}
state.eat(CppTokenType::Semicolon);
state.finish_at(cp, CppElementType::DeclarationStatement);
}
Ok(())
}
fn parse_function_definition<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let cp = state.checkpoint();
if state.at(CppTokenType::Identifier) {
state.bump();
}
self.skip_trivia(state);
state.expect(CppTokenType::LeftParen).ok();
while state.not_at_end() && !state.at(CppTokenType::RightParen) {
self.skip_trivia(state);
while state.not_at_end() && !state.at(CppTokenType::Comma) && !state.at(CppTokenType::RightParen) {
state.bump();
}
if state.at(CppTokenType::Comma) {
state.bump();
}
}
state.expect(CppTokenType::RightParen).ok();
self.skip_trivia(state);
if state.at(CppTokenType::LeftBrace) {
self.parse_compound_statement(state)?;
}
state.finish_at(cp, CppElementType::FunctionDefinition);
Ok(())
}
fn parse_class_definition<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let cp = state.checkpoint();
state.bump();
self.skip_trivia(state);
if state.at(CppTokenType::Identifier) {
state.bump();
}
self.skip_trivia(state);
if state.at(CppTokenType::Less) {
while state.not_at_end() && !state.at(CppTokenType::Greater) {
state.bump();
}
state.eat(CppTokenType::Greater);
}
self.skip_trivia(state);
if state.at(CppTokenType::Colon) {
state.bump();
while state.not_at_end() && !state.at(CppTokenType::LeftBrace) {
state.bump();
}
}
self.skip_trivia(state);
if state.at(CppTokenType::LeftBrace) {
state.bump();
while state.not_at_end() && !state.at(CppTokenType::RightBrace) {
self.skip_trivia(state);
if state.at(CppTokenType::Keyword) {
state.bump();
if state.at(CppTokenType::Colon) {
state.bump();
}
}
while state.not_at_end() && !state.at(CppTokenType::Semicolon) && !state.at(CppTokenType::RightBrace) {
state.bump();
}
state.eat(CppTokenType::Semicolon);
}
state.expect(CppTokenType::RightBrace).ok();
}
state.eat(CppTokenType::Semicolon);
state.finish_at(cp, CppElementType::ClassDefinition);
Ok(())
}
fn parse_namespace_definition<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let cp = state.checkpoint();
state.bump();
self.skip_trivia(state);
if state.at(CppTokenType::Identifier) {
state.bump();
while state.at(CppTokenType::Scope) {
state.bump();
if state.at(CppTokenType::Identifier) {
state.bump();
}
}
}
self.skip_trivia(state);
if state.at(CppTokenType::LeftBrace) {
state.bump();
while state.not_at_end() && !state.at(CppTokenType::RightBrace) {
self.parse_statement(state)?;
}
state.expect(CppTokenType::RightBrace).ok();
}
state.finish_at(cp, CppElementType::NamespaceDefinition);
Ok(())
}
}
impl<'config> Parser<CppLanguage> for CppParser<'config> {
fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<CppLanguage>) -> ParseOutput<'a, CppLanguage> {
let lexer = CppLexer::new(self.config);
parse_with_lexer(&lexer, text, edits, cache, |state| {
let cp = state.checkpoint();
while state.not_at_end() {
self.parse_statement(state)?
}
Ok(state.finish_at(cp, CppElementType::SourceFile))
})
}
}
impl<'config> Pratt<CppLanguage> for CppParser<'config> {
fn primary<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, CppLanguage> {
use crate::lexer::CppTokenType::*;
self.skip_trivia(state);
let cp = state.checkpoint();
match state.peek_kind() {
Some(Identifier) => {
state.bump();
state.finish_at(cp, CppElementType::Token(Identifier))
}
Some(IntegerLiteral) | Some(FloatLiteral) | Some(CharacterLiteral) | Some(StringLiteral) | Some(BooleanLiteral) => {
state.bump();
state.finish_at(cp, CppElementType::ExpressionStatement)
}
Some(LeftParen) => {
state.bump();
let expr = PrattParser::parse(state, 0, self);
state.push_child(expr);
self.skip_trivia(state);
state.expect(RightParen).ok();
state.finish_at(cp, CppElementType::ExpressionStatement)
}
_ => {
state.bump();
state.finish_at(cp, CppElementType::Error)
}
}
}
fn prefix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> &'a GreenNode<'a, CppLanguage> {
self.primary(state)
}
fn infix<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>, left: &'a GreenNode<'a, CppLanguage>, min_precedence: u8) -> Option<&'a GreenNode<'a, CppLanguage>> {
use crate::lexer::CppTokenType::*;
self.skip_trivia(state);
let kind = state.peek_kind()?;
let (prec, assoc) = match kind {
Assign | PlusAssign | MinusAssign | StarAssign | SlashAssign | PercentAssign | AndAssign | OrAssign | XorAssign | LeftShiftAssign | RightShiftAssign => (1, Associativity::Right),
LogicalOr => (2, Associativity::Left),
LogicalAnd => (3, Associativity::Left),
Equal | NotEqual | Less | Greater | LessEqual | GreaterEqual => (4, Associativity::Left),
Plus | Minus => (10, Associativity::Left),
Star | Slash | Percent => (11, Associativity::Left),
LeftParen | LeftBracket | Dot | Arrow => (15, Associativity::Left),
Scope => (16, Associativity::Left),
_ => return None,
};
if prec < min_precedence {
return None;
}
match kind {
LeftParen => {
let cp = state.checkpoint();
state.push_child(left);
state.expect(LeftParen).ok();
while state.not_at_end() && !state.at(RightParen) {
self.skip_trivia(state);
let expr = PrattParser::parse(state, 0, self);
state.push_child(expr);
self.skip_trivia(state);
if !state.eat(Comma) {
break;
}
}
state.expect(RightParen).ok();
Some(state.finish_at(cp, CppElementType::FunctionCall))
}
_ => {
let cp = state.checkpoint();
state.push_child(left);
state.bump();
self.skip_trivia(state);
let right = PrattParser::parse(state, prec + (assoc as u8), self);
state.push_child(right);
Some(state.finish_at(cp, CppElementType::ExpressionStatement))
}
}
}
}