katana-markdown-engine 0.1.0

Renderer-neutral Markdown document model for the KatanA ecosystem
Documentation
use crate::{EmojiNode, KmeNode, KmeNodeKind, SourceSpan};

pub(crate) fn emoji_nodes(
    raw: &str,
    span_for_match: impl Fn(usize, usize) -> SourceSpan,
) -> Vec<KmeNode> {
    let mut nodes = shortcode_emoji_nodes(raw, &span_for_match);
    nodes.extend(unicode_emoji_nodes(raw, span_for_match));
    nodes
}

fn shortcode_emoji_nodes(
    raw: &str,
    span_for_match: &impl Fn(usize, usize) -> SourceSpan,
) -> Vec<KmeNode> {
    let mut nodes = Vec::new();
    let mut offset = 0;
    while let Some(start) = raw[offset..].find(':') {
        let absolute_start = offset + start;
        let search_from = absolute_start + 1;
        let Some(end) = raw[search_from..].find(':') else {
            break;
        };
        let absolute_end = search_from + end + 1;
        let candidate = &raw[absolute_start + 1..absolute_end - 1];
        if shortcode_name(candidate) {
            let source = span_for_match(absolute_start, absolute_end);
            nodes.push(emoji_node(
                raw[absolute_start..absolute_end].to_string(),
                Some(candidate.to_string()),
                nodes.len(),
                source,
            ));
        }
        offset = absolute_end;
    }
    nodes
}

fn unicode_emoji_nodes(
    raw: &str,
    span_for_match: impl Fn(usize, usize) -> SourceSpan,
) -> Vec<KmeNode> {
    raw.char_indices()
        .filter(|(_, character)| unicode_emoji(*character))
        .enumerate()
        .map(|(ordinal, (start, character))| {
            let end = start + character.len_utf8();
            emoji_node(
                character.to_string(),
                None,
                ordinal,
                span_for_match(start, end),
            )
        })
        .collect()
}

fn emoji_node(
    value: String,
    shortcode: Option<String>,
    ordinal: usize,
    source: SourceSpan,
) -> KmeNode {
    let kind = KmeNodeKind::Emoji(EmojiNode { value, shortcode });
    KmeNode::new(kind, &source.raw.text.clone(), ordinal, source)
}

fn shortcode_name(candidate: &str) -> bool {
    !candidate.is_empty()
        && candidate.chars().all(|it| {
            it.is_ascii_lowercase() || it.is_ascii_digit() || matches!(it, '_' | '-' | '+')
        })
}

fn unicode_emoji(character: char) -> bool {
    matches!(character as u32, 0x1F300..=0x1FAFF | 0x2600..=0x27BF)
}