katana-markdown-model 0.2.1

Renderer-neutral Markdown document model for the KatanA ecosystem
Documentation
use super::block::BlockParser;
use super::inline::InlineParser;
use super::line_index::{LineIndex, SourceLine};
use super::quote::BlockQuoteParser;
use crate::{
    KmmDocument, KmmError, KmmNode, KmmNodeKind, MarkdownInput, SourceSpan, TextFingerprint,
};
use std::collections::HashMap;

pub(crate) struct MarkdownParser;

impl MarkdownParser {
    pub fn new() -> Self {
        Self
    }

    pub fn parse(&self, input: MarkdownInput) -> Result<KmmDocument, KmmError> {
        let (path, content) = input.into_parts()?;
        if content.trim().is_empty() {
            return Err(KmmError::EmptySource);
        }
        let index = LineIndex::new(&content);
        let mut cursor = ParserCursor::new(&content, &index);
        Ok(KmmDocument {
            path,
            fingerprint: TextFingerprint::for_text(&content),
            nodes: cursor.parse_nodes(),
        })
    }
}

pub(super) struct ParserCursor<'a> {
    pub(super) source: &'a str,
    pub(super) index: &'a LineIndex,
    pub(super) line: usize,
    ordinals: HashMap<&'static str, usize>,
}

impl<'a> ParserCursor<'a> {
    fn new(source: &'a str, index: &'a LineIndex) -> Self {
        Self {
            source,
            index,
            line: 0,
            ordinals: HashMap::new(),
        }
    }

    fn parse_nodes(&mut self) -> Vec<KmmNode> {
        let mut nodes = Vec::new();
        while self.line < self.index.lines().len() {
            if self.current().text.trim().is_empty() {
                self.line += 1;
                continue;
            }
            nodes.push(self.parse_node());
        }
        nodes
    }

    fn parse_node(&mut self) -> KmmNode {
        let start = self.line;
        let kind = self.node_kind();
        let span = self.span(start);
        let children = self.inline_children(&kind, &span);
        let mut node = self.node(kind, span);
        node.children = children;
        node
    }

    fn node_kind(&mut self) -> KmmNodeKind {
        let line = self.current().text.as_str();
        if let Some(kind) = BlockParser::heading(line) {
            self.line += 1;
            return kind;
        }
        if line.trim_start().starts_with("```") {
            return self.code_block();
        }
        if self.is_dollar_math_block() {
            return self.dollar_math_block();
        }
        if self.is_html_start(line) {
            return self.html_block();
        }
        if self.is_footnote_definition() {
            return self.footnote_definition();
        }
        if self.is_table_start() {
            return self.table();
        }
        if line.trim_start().starts_with('>') {
            return self.block_quote();
        }
        if self.is_description_start() {
            return self.description_list();
        }
        if BlockParser::unordered_list_line(line) || BlockParser::ordered_list_line(line) {
            return self.list();
        }
        if line.trim() == "---" {
            self.line += 1;
            return KmmNodeKind::ThematicBreak;
        }
        self.paragraph()
    }

    fn node(&mut self, kind: KmmNodeKind, span: SourceSpan) -> KmmNode {
        let label = kind.label();
        let ordinal = *self
            .ordinals
            .entry(label)
            .and_modify(|it| *it += 1)
            .or_insert(0);
        let raw = span.raw.text.clone();
        KmmNode::new(kind, &raw, ordinal, span)
    }

    fn span(&self, start: usize) -> SourceSpan {
        self.index.source_span(self.source, start, self.line)
    }

    fn inline_children(&self, kind: &KmmNodeKind, span: &SourceSpan) -> Vec<KmmNode> {
        match kind {
            KmmNodeKind::Heading(_) | KmmNodeKind::Paragraph => {
                let base = span.byte_range.start;
                InlineParser::nodes(&span.raw.text, |start, end| {
                    self.index
                        .source_span_for_byte_range(self.source, base + start, base + end)
                })
            }
            KmmNodeKind::FootnoteDefinition(definition) => {
                let Some(body_start) = footnote_body_start(&span.raw.text) else {
                    return Vec::new();
                };
                InlineParser::nodes(&definition.text, |start, end| {
                    self.index.source_span_for_byte_range(
                        self.source,
                        span.byte_range.start + body_start + start,
                        span.byte_range.start + body_start + end,
                    )
                })
            }
            KmmNodeKind::BlockQuote | KmmNodeKind::Alert { .. } => {
                BlockQuoteParser::new(self.source, self.index, span).children()
            }
            _ => Vec::new(),
        }
    }

    pub(super) fn current(&self) -> &SourceLine {
        &self.index.lines()[self.line]
    }

    pub(super) fn raw_text(&self, start: usize, end: usize) -> String {
        let span = self.index.source_span(self.source, start, end);
        span.raw.text
    }
}

fn footnote_body_start(raw: &str) -> Option<usize> {
    let body_start = raw.find("]:")? + 2;
    let whitespace = raw[body_start..]
        .chars()
        .take_while(|it| it.is_whitespace())
        .map(char::len_utf8)
        .sum::<usize>();
    Some(body_start + whitespace)
}