pub mod element_type;
use crate::{
language::TexLanguage,
lexer::{TexLexer, token_type::TexTokenType},
parser::element_type::TexElementType,
};
use oak_core::{
GreenNode, OakError,
parser::{ParseCache, ParseOutput, Parser, ParserState, parse_with_lexer},
source::{Source, TextEdit},
};
pub(crate) type State<'a, S> = ParserState<'a, TexLanguage, S>;
pub struct TexParser<'config> {
pub(crate) config: &'config TexLanguage,
}
impl<'config> TexParser<'config> {
pub fn new(config: &'config TexLanguage) -> Self {
Self { config }
}
fn parse_item<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
match state.peek_kind() {
Some(TexTokenType::BeginKeyword) => self.parse_environment(state),
Some(kind) if self.is_command_like(kind) => self.parse_command(state),
Some(TexTokenType::LeftBrace) => self.parse_group(state),
Some(TexTokenType::Dollar) | Some(TexTokenType::DoubleDollar) => self.parse_math(state),
Some(TexTokenType::Caret) => self.parse_superscript(state),
Some(TexTokenType::Underscore) => self.parse_subscript(state),
_ => {
state.bump();
Ok(())
}
}
}
fn is_command_like(&self, kind: TexTokenType) -> bool {
match kind {
TexTokenType::Backslash |
TexTokenType::Command |
TexTokenType::EndKeyword |
TexTokenType::DocumentclassKeyword |
TexTokenType::UsepackageKeyword |
TexTokenType::SectionKeyword |
TexTokenType::SubsectionKeyword |
TexTokenType::SubsubsectionKeyword |
TexTokenType::ChapterKeyword |
TexTokenType::PartKeyword |
TexTokenType::TitleKeyword |
TexTokenType::AuthorKeyword |
TexTokenType::DateKeyword |
TexTokenType::MaketitleKeyword |
TexTokenType::TableofcontentsKeyword |
TexTokenType::ItemKeyword |
TexTokenType::LabelKeyword |
TexTokenType::RefKeyword |
TexTokenType::CiteKeyword |
TexTokenType::IncludegraphicsKeyword |
TexTokenType::TextbfKeyword |
TexTokenType::TextitKeyword |
TexTokenType::EmphKeyword |
TexTokenType::Frac |
TexTokenType::Sqrt |
TexTokenType::Sum |
TexTokenType::Int |
TexTokenType::Lim |
TexTokenType::Alpha |
TexTokenType::Beta |
TexTokenType::Gamma |
TexTokenType::Delta |
TexTokenType::Epsilon => true,
_ => false,
}
}
fn parse_environment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let checkpoint = state.checkpoint();
let begin_checkpoint = state.checkpoint();
state.expect(TexTokenType::BeginKeyword)?;
let mut env_name = String::new();
if state.at(TexTokenType::LeftBrace) {
state.bump(); if let Some(text) = state.peek_text() {
env_name = text.to_string();
}
while state.not_at_end() && !state.at(TexTokenType::RightBrace) {
state.bump();
}
state.expect(TexTokenType::RightBrace)?;
}
state.finish_at(begin_checkpoint, TexElementType::BeginEnvironment);
let env_type = match env_name.as_str() {
"itemize" | "enumerate" | "description" => TexElementType::List,
"tabular" | "array" => TexElementType::Table,
"figure" => TexElementType::Figure,
"document" => TexElementType::Document,
"equation" | "align" | "gather" | "multline" | "eqnarray" => TexElementType::DisplayMath,
_ => TexElementType::Environment,
};
while state.not_at_end() && !state.at(TexTokenType::EndKeyword) {
match env_type {
TexElementType::List if state.at(TexTokenType::ItemKeyword) => {
let item_checkpoint = state.checkpoint();
state.bump(); while state.not_at_end() && !state.at(TexTokenType::ItemKeyword) && !state.at(TexTokenType::EndKeyword) {
self.parse_item(state)?;
}
state.finish_at(item_checkpoint, TexElementType::Item);
}
TexElementType::Table if state.at(TexTokenType::Ampersand) || state.at(TexTokenType::Command) => {
if state.at(TexTokenType::Ampersand) {
state.bump();
}
else {
self.parse_item(state)?;
}
}
_ => self.parse_item(state)?,
}
}
if state.at(TexTokenType::EndKeyword) {
let end_checkpoint = state.checkpoint();
state.bump();
if state.at(TexTokenType::LeftBrace) {
self.parse_mandatory_argument(state)?
}
state.finish_at(end_checkpoint, TexElementType::EndEnvironment);
}
state.finish_at(checkpoint, env_type);
Ok(())
}
fn parse_superscript<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let checkpoint = state.checkpoint();
state.expect(TexTokenType::Caret)?;
if state.at(TexTokenType::LeftBrace) {
self.parse_group(state)?;
}
else {
state.bump();
}
state.finish_at(checkpoint, TexElementType::Superscript);
Ok(())
}
fn parse_subscript<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let checkpoint = state.checkpoint();
state.expect(TexTokenType::Underscore)?;
if state.at(TexTokenType::LeftBrace) {
self.parse_group(state)?;
}
else {
state.bump();
}
state.finish_at(checkpoint, TexElementType::Subscript);
Ok(())
}
fn parse_command<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let checkpoint = state.checkpoint();
let kind = state.peek_kind().unwrap_or(TexTokenType::Command);
let should_parse_args = state.peek_text().map_or(true, |name| {
let name_str: &str = &name;
let name_str = name_str.strip_prefix('\\').unwrap_or(name_str);
name_str != "left" && name_str != "right"
});
state.bump();
let node_kind = if should_parse_args {
while state.at(TexTokenType::LeftBracket) || state.at(TexTokenType::LeftBrace) {
if state.at(TexTokenType::LeftBracket) {
self.parse_optional_argument(state)?;
}
else {
self.parse_mandatory_argument(state)?;
}
}
match kind {
TexTokenType::Frac | TexTokenType::Sqrt | TexTokenType::Sum | TexTokenType::Int | TexTokenType::Lim | TexTokenType::Alpha | TexTokenType::Beta | TexTokenType::Gamma | TexTokenType::Delta | TexTokenType::Epsilon => kind,
_ => TexTokenType::Command,
}
}
else {
kind
};
state.finish_at(checkpoint, node_kind.into());
Ok(())
}
fn parse_group<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let checkpoint = state.checkpoint();
state.expect(TexTokenType::LeftBrace)?;
while state.not_at_end() && !state.at(TexTokenType::RightBrace) {
self.parse_item(state)?;
}
state.expect(TexTokenType::RightBrace)?;
state.finish_at(checkpoint, TexElementType::Group);
Ok(())
}
fn parse_math<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let checkpoint = state.checkpoint();
let kind = state.peek_kind().unwrap();
state.bump();
while state.not_at_end() && !state.at(kind) {
self.parse_item(state)?;
}
state.expect(kind)?;
let element_kind = if kind == TexTokenType::DoubleDollar { TexElementType::DisplayMath } else { TexElementType::InlineMath };
state.finish_at(checkpoint, element_kind);
Ok(())
}
fn parse_optional_argument<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let checkpoint = state.checkpoint();
state.expect(TexTokenType::LeftBracket)?;
while state.not_at_end() && !state.at(TexTokenType::RightBracket) {
self.parse_item(state)?;
}
state.expect(TexTokenType::RightBracket)?;
state.finish_at(checkpoint, TexElementType::OptionalArgument);
Ok(())
}
fn parse_mandatory_argument<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
let checkpoint = state.checkpoint();
state.expect(TexTokenType::LeftBrace)?;
while state.not_at_end() && !state.at(TexTokenType::RightBrace) {
self.parse_item(state)?;
}
state.expect(TexTokenType::RightBrace)?;
state.finish_at(checkpoint, TexElementType::MandatoryArgument);
Ok(())
}
}
impl<'config> Parser<TexLanguage> for TexParser<'config> {
fn parse<'a, S: Source + ?Sized>(&self, text: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<TexLanguage>) -> ParseOutput<'a, TexLanguage> {
let lexer = TexLexer::new(self.config);
parse_with_lexer(&lexer, text, edits, cache, |state| {
let checkpoint = state.checkpoint();
while state.not_at_end() {
self.parse_item(state)?
}
Ok(state.finish_at(checkpoint, TexElementType::Root))
})
}
}