oak-notedown 0.0.9

High-performance incremental Markdown parser for the oak ecosystem with flexible configuration, optimized for documentation and content creation.
Documentation
/// Element type definitions for the Notedown parser.
pub mod element_type;

use crate::{
    language::NotedownLanguage,
    lexer::{NotedownLexer, token_type::NoteTokenType},
    parser::element_type::NoteElementType,
};
use oak_core::{
    TextEdit,
    errors::OakError,
    parser::{ParseCache, ParseOutput, Parser, ParserState},
    source::Source,
    tree::GreenNode,
};

pub(crate) type State<'a, S> = ParserState<'a, NotedownLanguage, S>;

/// Notedown parser implementation
pub struct NoteParser<'a> {
    /// Reference to the language configuration
    pub language: &'a NotedownLanguage,
}

impl<'a> NoteParser<'a> {
    /// Create a new parser with the given language configuration
    pub fn new(language: &'a NotedownLanguage) -> Self {
        Self { language }
    }
}

impl<'p> Parser<NotedownLanguage> for NoteParser<'p> {
    fn parse<'a, S: Source + ?Sized>(&self, source: &'a S, edits: &[TextEdit], cache: &'a mut impl ParseCache<NotedownLanguage>) -> ParseOutput<'a, NotedownLanguage> {
        let lexer = NotedownLexer::new(self.language);
        oak_core::parser::parse_with_lexer(&lexer, source, edits, cache, |state| {
            let checkpoint = state.checkpoint();
            while state.not_at_end() {
                self.parse_block(state);
            }

            Ok(state.finish_at(checkpoint, NoteElementType::Root))
        })
    }
}

impl<'p> NoteParser<'p> {
    fn parse_block<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
        let kind = state.peek_kind();
        match kind {
            Some(NoteTokenType::Hash) => self.parse_heading(state),
            Some(NoteTokenType::Asterisk) | Some(NoteTokenType::Dash) | Some(NoteTokenType::Plus) => self.parse_list_item(state),
            Some(NoteTokenType::Pipe) => self.parse_table(state),
            Some(NoteTokenType::Backtick) => self.parse_code_block(state),
            _ => self.parse_paragraph(state),
        }
    }

    fn parse_heading<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
        let checkpoint = state.checkpoint();
        let mut level = 0;
        while state.at(NoteTokenType::Hash) {
            state.bump();
            level += 1;
        }

        self.parse_inline_content(state);

        let kind = match level {
            1..=6 => NoteElementType::Heading,
            _ => NoteElementType::Paragraph,
        };
        state.finish_at(checkpoint, kind);
    }

    fn parse_list_item<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
        let checkpoint = state.checkpoint();
        state.bump(); // marker
        self.parse_inline_content(state);
        state.finish_at(checkpoint, NoteElementType::ListItem);
    }

    fn parse_table<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
        let checkpoint = state.checkpoint();
        while state.not_at_end() && state.at(NoteTokenType::Pipe) {
            self.parse_table_row(state);
            while state.at(NoteTokenType::Newline) || state.at(NoteTokenType::Whitespace) {
                state.bump();
            }
        }
        state.finish_at(checkpoint, NoteElementType::Table);
    }

    fn parse_table_row<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
        let checkpoint = state.checkpoint();
        while state.at(NoteTokenType::Pipe) {
            self.parse_table_cell(state);
        }
        if state.at(NoteTokenType::Newline) {
            state.bump();
        }
        state.finish_at(checkpoint, NoteElementType::TableRow);
    }

    fn parse_table_cell<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
        let checkpoint = state.checkpoint();
        state.bump(); // |
        while state.not_at_end() && !state.at(NoteTokenType::Pipe) && !state.at(NoteTokenType::Newline) {
            self.parse_inline_content(state);
        }
        // ElementType for cell is not explicitly in NoteElementType, using Token(Pipe) as placeholder or just Root
        state.finish_at(checkpoint, NoteElementType::Root);
    }

    fn parse_paragraph<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
        let checkpoint = state.checkpoint();
        self.parse_inline_content(state);
        if state.at(NoteTokenType::Newline) {
            state.bump();
        }
        state.finish_at(checkpoint, NoteElementType::Paragraph);
    }

    fn parse_inline_content<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
        while state.not_at_end() && !state.at(NoteTokenType::Newline) {
            let checkpoint = state.checkpoint();
            let kind = state.peek_kind();
            match kind {
                Some(NoteTokenType::Asterisk) | Some(NoteTokenType::Underscore) => {
                    let marker = kind.unwrap();
                    state.bump();
                    while state.not_at_end() && !state.at(marker) && !state.at(NoteTokenType::Newline) {
                        self.parse_inline_content(state);
                    }
                    if state.at(marker) {
                        state.bump();
                    }
                    state.finish_at(checkpoint, NoteElementType::Root);
                }
                Some(NoteTokenType::LeftBracket) => {
                    state.bump(); // [
                    while state.not_at_end() && !state.at(NoteTokenType::RightBracket) && !state.at(NoteTokenType::Newline) {
                        self.parse_inline_content(state);
                    }
                    if state.at(NoteTokenType::RightBracket) {
                        state.bump();
                    }
                    state.finish_at(checkpoint, NoteElementType::Link);
                }
                _ => {
                    state.bump();
                }
            }
        }
    }

    fn parse_code_block<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) {
        let checkpoint = state.checkpoint();
        state.bump(); // ```
        while state.not_at_end() && !state.at(NoteTokenType::Backtick) {
            state.bump();
        }
        if state.at(NoteTokenType::Backtick) {
            state.bump();
        }
        state.finish_at(checkpoint, NoteElementType::CodeBlock);
    }
}