lazyspec 0.8.0

A little TUI & CLI for project documentation.
Documentation
use pulldown_cmark::{Alignment, BlockQuoteKind, Event, Options, Parser, Tag, TagEnd};

use super::{GfmSegment, GfmTable};

pub(super) enum ExtractorResult {
    Consumed,
    Finished(std::ops::Range<usize>, GfmSegment),
    FootnoteFinished(std::ops::Range<usize>, GfmSegment),
}

pub(super) trait GfmExtractor {
    fn try_start(&mut self, event: &Event, range: &std::ops::Range<usize>) -> bool;
    fn feed(&mut self, event: &Event, range: &std::ops::Range<usize>) -> Option<ExtractorResult>;
    fn active(&self) -> bool;
}

pub(super) struct TableExtractor {
    active: bool,
    alignments: Vec<Alignment>,
    headers: Vec<String>,
    rows: Vec<Vec<String>>,
    current_row: Vec<String>,
    cell_text: String,
    in_head: bool,
    start_offset: usize,
}

impl TableExtractor {
    pub(super) fn new() -> Self {
        Self {
            active: false,
            alignments: Vec::new(),
            headers: Vec::new(),
            rows: Vec::new(),
            current_row: Vec::new(),
            cell_text: String::new(),
            in_head: false,
            start_offset: 0,
        }
    }
}

impl GfmExtractor for TableExtractor {
    fn try_start(&mut self, event: &Event, range: &std::ops::Range<usize>) -> bool {
        let Event::Start(Tag::Table(aligns)) = event else {
            return false;
        };
        self.active = true;
        self.alignments = aligns.clone();
        self.start_offset = range.start;
        true
    }

    fn feed(&mut self, event: &Event, range: &std::ops::Range<usize>) -> Option<ExtractorResult> {
        if !self.active {
            return None;
        }
        match event {
            Event::Start(Tag::TableHead) => self.in_head = true,
            Event::End(TagEnd::TableHead) => self.in_head = false,
            Event::Start(Tag::TableRow) => self.current_row.clear(),
            Event::End(TagEnd::TableRow) => {
                self.rows.push(self.current_row.clone());
                self.current_row.clear();
            }
            Event::Start(Tag::TableCell) => self.cell_text.clear(),
            Event::End(TagEnd::TableCell) => {
                if self.in_head {
                    self.headers.push(self.cell_text.clone());
                } else {
                    self.current_row.push(self.cell_text.clone());
                }
                self.cell_text.clear();
            }
            Event::Text(t) | Event::Code(t) => self.cell_text.push_str(t),
            Event::SoftBreak => self.cell_text.push(' '),
            Event::HardBreak => self.cell_text.push('\n'),
            Event::InlineHtml(t)
                if t.eq_ignore_ascii_case("<br>")
                    || t.eq_ignore_ascii_case("<br/>")
                    || t.eq_ignore_ascii_case("<br />") =>
            {
                self.cell_text.push('\n')
            }
            Event::End(TagEnd::Table) => {
                let seg = GfmSegment::Table(GfmTable {
                    headers: self.headers.clone(),
                    alignments: self.alignments.clone(),
                    rows: self.rows.clone(),
                });
                let result = ExtractorResult::Finished(self.start_offset..range.end, seg);
                self.active = false;
                self.headers.clear();
                self.alignments.clear();
                self.rows.clear();
                self.current_row.clear();
                return Some(result);
            }
            _ => {}
        }
        Some(ExtractorResult::Consumed)
    }

    fn active(&self) -> bool {
        self.active
    }
}

pub(super) struct AdmonitionExtractor {
    kind: Option<String>,
    body: String,
    depth: usize,
    start_offset: usize,
}

impl AdmonitionExtractor {
    pub(super) fn new() -> Self {
        Self {
            kind: None,
            body: String::new(),
            depth: 0,
            start_offset: 0,
        }
    }
}

impl GfmExtractor for AdmonitionExtractor {
    fn try_start(&mut self, event: &Event, range: &std::ops::Range<usize>) -> bool {
        let Event::Start(Tag::BlockQuote(Some(kind))) = event else {
            return false;
        };
        let kind_str = match kind {
            BlockQuoteKind::Note => "Note",
            BlockQuoteKind::Warning => "Warning",
            BlockQuoteKind::Tip => "Tip",
            BlockQuoteKind::Important => "Important",
            BlockQuoteKind::Caution => "Caution",
        };
        self.kind = Some(kind_str.to_string());
        self.depth = 0;
        self.start_offset = range.start;
        true
    }

    fn feed(&mut self, event: &Event, range: &std::ops::Range<usize>) -> Option<ExtractorResult> {
        self.kind.as_ref()?;
        match event {
            Event::Start(Tag::BlockQuote(_)) => self.depth += 1,
            Event::End(TagEnd::BlockQuote(_)) => {
                if self.depth == 0 {
                    let seg = GfmSegment::Admonition {
                        kind: self.kind.take().unwrap(),
                        body: self.body.trim().to_string(),
                    };
                    let result = ExtractorResult::Finished(self.start_offset..range.end, seg);
                    self.body.clear();
                    return Some(result);
                }
                self.depth -= 1;
            }
            Event::Text(t) | Event::Code(t) => self.body.push_str(t),
            Event::SoftBreak => self.body.push(' '),
            Event::HardBreak => self.body.push('\n'),
            _ => {}
        }
        Some(ExtractorResult::Consumed)
    }

    fn active(&self) -> bool {
        self.kind.is_some()
    }
}

pub(super) struct FootnoteExtractor {
    active: bool,
    label: String,
    body: String,
    start_offset: usize,
}

impl FootnoteExtractor {
    pub(super) fn new() -> Self {
        Self {
            active: false,
            label: String::new(),
            body: String::new(),
            start_offset: 0,
        }
    }
}

impl GfmExtractor for FootnoteExtractor {
    fn try_start(&mut self, event: &Event, range: &std::ops::Range<usize>) -> bool {
        let Event::Start(Tag::FootnoteDefinition(label)) = event else {
            return false;
        };
        self.active = true;
        self.label = label.to_string();
        self.start_offset = range.start;
        true
    }

    fn feed(&mut self, event: &Event, range: &std::ops::Range<usize>) -> Option<ExtractorResult> {
        if !self.active {
            return None;
        }
        match event {
            Event::End(TagEnd::FootnoteDefinition) => {
                let seg = GfmSegment::Footnote {
                    label: self.label.clone(),
                    body: self.body.trim().to_string(),
                };
                let result = ExtractorResult::FootnoteFinished(self.start_offset..range.end, seg);
                self.active = false;
                self.label.clear();
                self.body.clear();
                return Some(result);
            }
            Event::Text(t) | Event::Code(t) => self.body.push_str(t),
            Event::SoftBreak | Event::HardBreak => self.body.push('\n'),
            _ => {}
        }
        Some(ExtractorResult::Consumed)
    }

    fn active(&self) -> bool {
        self.active
    }
}

fn parser_options() -> Options {
    Options::ENABLE_TABLES
        | Options::ENABLE_GFM
        | Options::ENABLE_FOOTNOTES
        | Options::ENABLE_STRIKETHROUGH
        | Options::ENABLE_TASKLISTS
}

pub fn extract_gfm_segments(body: &str) -> Vec<GfmSegment> {
    let parser = Parser::new_ext(body, parser_options());
    let offset_iter = parser.into_offset_iter();

    let mut ranged_segments: Vec<(std::ops::Range<usize>, GfmSegment)> = Vec::new();
    let mut footnotes: Vec<GfmSegment> = Vec::new();
    let mut footnote_ranges: Vec<std::ops::Range<usize>> = Vec::new();

    let mut table_ext = TableExtractor::new();
    let mut admonition_ext = AdmonitionExtractor::new();
    let mut footnote_ext = FootnoteExtractor::new();

    for (event, range) in offset_iter {
        let extractors: [&mut dyn GfmExtractor; 3] =
            [&mut footnote_ext, &mut admonition_ext, &mut table_ext];

        let mut handled = false;
        for ext in extractors {
            if !ext.active() {
                continue;
            }
            match ext.feed(&event, &range) {
                Some(ExtractorResult::Consumed) => {
                    handled = true;
                    break;
                }
                Some(ExtractorResult::Finished(r, seg)) => {
                    ranged_segments.push((r, seg));
                    handled = true;
                    break;
                }
                Some(ExtractorResult::FootnoteFinished(r, seg)) => {
                    footnote_ranges.push(r);
                    footnotes.push(seg);
                    handled = true;
                    break;
                }
                None => {}
            }
        }

        if handled {
            continue;
        }

        let _ = table_ext.try_start(&event, &range)
            || admonition_ext.try_start(&event, &range)
            || footnote_ext.try_start(&event, &range);
    }

    assemble_segments(body, ranged_segments, footnote_ranges, footnotes)
}

fn assemble_segments(
    body: &str,
    mut ranged_segments: Vec<(std::ops::Range<usize>, GfmSegment)>,
    mut footnote_ranges: Vec<std::ops::Range<usize>>,
    footnotes: Vec<GfmSegment>,
) -> Vec<GfmSegment> {
    ranged_segments.sort_by_key(|(r, _)| r.start);
    footnote_ranges.sort_by_key(|r| r.start);

    let mut all_ranges: Vec<std::ops::Range<usize>> = ranged_segments
        .iter()
        .map(|(r, _)| r.clone())
        .chain(footnote_ranges.iter().cloned())
        .collect();
    all_ranges.sort_by_key(|r| r.start);

    let mut result: Vec<GfmSegment> = Vec::new();
    let mut cursor = 0;
    let mut seg_idx = 0;

    for r in &all_ranges {
        if cursor < r.start {
            let md = body[cursor..r.start].trim();
            if !md.is_empty() {
                result.push(GfmSegment::Markdown(md.to_string()));
            }
        }
        if seg_idx < ranged_segments.len() && ranged_segments[seg_idx].0 == *r {
            result.push(ranged_segments[seg_idx].1.clone());
            seg_idx += 1;
        }
        cursor = r.end;
    }

    if cursor < body.len() {
        let md = body[cursor..].trim();
        if !md.is_empty() {
            result.push(GfmSegment::Markdown(md.to_string()));
        }
    }

    result.extend(footnotes);

    if result.is_empty() && !body.trim().is_empty() {
        result.push(GfmSegment::Markdown(body.trim().to_string()));
    }

    result
}