treemd 0.5.12 - Docs.rs

//! Utility functions for UI rendering
//!
//! Pure functions for layout calculations, text parsing, and formatting.

use crate::parser::output::Alignment;
use ratatui::layout::Rect;
use ratatui::style::Style;
use ratatui::text::{Line, Span};
use unicode_width::{UnicodeWidthChar, UnicodeWidthStr};

const HALFWIDTH_KATAKANA_VOICED_SOUND_MARK: char = '\u{FF9E}';
const HALFWIDTH_KATAKANA_SEMI_VOICED_SOUND_MARK: char = '\u{FF9F}';

/// Return the width a terminal is expected to use for text.
///
/// `unicode-width` treats halfwidth Katakana dakuten/handakuten as combining
/// marks, but terminals commonly render U+FF9E/U+FF9F as one cell. Ratatui's
/// current buffer diffing has the same adjustment, so table layout should use
/// it too.
pub fn terminal_width(text: &str) -> usize {
    text.width()
        + text
            .chars()
            .filter(|c| {
                matches!(
                    *c,
                    HALFWIDTH_KATAKANA_VOICED_SOUND_MARK
                        | HALFWIDTH_KATAKANA_SEMI_VOICED_SOUND_MARK
                )
            })
            .count()
}

fn terminal_char_width(c: char) -> usize {
    if matches!(
        c,
        HALFWIDTH_KATAKANA_VOICED_SOUND_MARK | HALFWIDTH_KATAKANA_SEMI_VOICED_SOUND_MARK
    ) {
        1
    } else {
        c.width().unwrap_or(1)
    }
}

/// Calculate a popup area with minimum size constraints.
///
/// Returns a `Rect` that is centered within the parent area, sized as a
/// percentage but respecting minimum dimensions. If the parent is smaller
/// than the minimum, the popup will fill the available space.
///
/// # Arguments
/// * `area` - The parent area to center within
/// * `percent_x` - Width as a percentage of parent (0-100)
/// * `percent_y` - Height as a percentage of parent (0-100)
/// * `min_width` - Minimum width in columns (will not exceed parent width)
/// * `min_height` - Minimum height in rows (will not exceed parent height)
pub fn popup_area(
    area: Rect,
    percent_x: u16,
    percent_y: u16,
    min_width: u16,
    min_height: u16,
) -> Rect {
    // Calculate percentage-based dimensions
    let pct_width = area.width * percent_x / 100;
    let pct_height = area.height * percent_y / 100;

    // Apply minimum constraints, but don't exceed parent
    let width = pct_width.max(min_width).min(area.width);
    let height = pct_height.max(min_height).min(area.height);

    // Center the popup
    let x = area.x + (area.width.saturating_sub(width)) / 2;
    let y = area.y + (area.height.saturating_sub(height)) / 2;

    Rect::new(x, y, width, height)
}

/// Detect checkbox markers in text for task list items.
///
/// Parses text to identify task list checkbox patterns (`[x]`, `[X]`, `[ ]`).
///
/// # Arguments
/// * `text` - The text to parse
///
/// # Returns
/// A tuple of `(is_task, is_checked, remaining_text)`:
/// - `is_task`: true if a checkbox pattern was found
/// - `is_checked`: true if the checkbox is checked (`[x]` or `[X]`)
/// - `remaining_text`: the text after the checkbox marker (or original text if no checkbox)
pub fn detect_checkbox_in_text(text: &str) -> (bool, bool, &str) {
    let trimmed = text.trim_start();

    // Check for [x] or [X] (checked)
    if let Some(stripped) = trimmed
        .strip_prefix("[x]")
        .or_else(|| trimmed.strip_prefix("[X]"))
    {
        return (true, true, stripped.trim_start());
    }

    // Check for [ ] (unchecked)
    if let Some(stripped) = trimmed.strip_prefix("[ ]") {
        return (true, false, stripped.trim_start());
    }

    // Not a task list item
    (false, false, text)
}

/// Align text within a fixed width using Unicode-aware width calculations.
///
/// Handles left, center, right, and none (defaults to left) alignments.
/// If text is longer than width, it will be truncated with ellipsis.
///
/// # Arguments
/// * `text` - The text to align
/// * `width` - The total width to align within (including padding)
/// * `alignment` - The alignment direction
///
/// # Returns
/// A string padded to the specified width with appropriate alignment.
pub fn align_text(text: &str, width: usize, alignment: &Alignment) -> String {
    // Use Unicode display width instead of character/byte length
    let text_width = terminal_width(text);

    // If text is longer than width, truncate it
    if text_width >= width {
        // Use single ellipsis character (…) which is 1 display width
        // Much more space-efficient than "..." (3 chars)
        if width > 3 {
            // Truncate with ellipsis: " text… " or "text…" depending on space
            let available = width.saturating_sub(2); // 1 for padding, 1 for ellipsis
            let mut truncated = String::new();
            let mut current_width = 0;
            for c in text.chars() {
                let char_width = terminal_char_width(c);
                if current_width + char_width > available {
                    break;
                }
                truncated.push(c);
                current_width += char_width;
            }
            // Pad to fill remaining space
            let remaining = width.saturating_sub(current_width + 2); // +2 for " " and "…"
            return format!(" {}…{}", truncated, " ".repeat(remaining));
        }
        // Very narrow: just show what fits
        let mut truncated = String::new();
        let mut current_width = 0;
        for c in text.chars() {
            let char_width = terminal_char_width(c);
            if current_width + char_width > width {
                break;
            }
            truncated.push(c);
            current_width += char_width;
        }
        return truncated;
    }

    // Width includes padding we added earlier
    let content_width = width;

    match alignment {
        Alignment::Left | Alignment::None => {
            // Left-aligned: " text     "
            let right_padding = content_width.saturating_sub(text_width + 1);
            format!(" {}{}", text, " ".repeat(right_padding))
        }
        Alignment::Center => {
            // Center-aligned: "  text   "
            let total_padding = content_width.saturating_sub(text_width);
            let left_pad = total_padding / 2;
            let right_pad = total_padding - left_pad;
            format!("{}{}{}", " ".repeat(left_pad), text, " ".repeat(right_pad))
        }
        Alignment::Right => {
            // Right-aligned: "     text "
            let left_padding = content_width.saturating_sub(text_width + 1);
            format!("{}{} ", " ".repeat(left_padding), text)
        }
    }
}

/// Highlight search matches within text, returning a Line with styled spans.
///
/// Performs case-insensitive matching and splits the text into segments,
/// applying the highlight style to matched portions.
///
/// # Arguments
/// * `text` - The text to search within
/// * `query` - The search query (case-insensitive)
/// * `base_style` - Style for non-matched text
/// * `highlight_style` - Style for matched text
///
/// # Returns
/// A vector of Spans with appropriate styling applied
pub fn highlight_search_matches(
    text: &str,
    query: &str,
    base_style: Style,
    highlight_style: Style,
) -> Vec<Span<'static>> {
    if query.is_empty() {
        return vec![Span::styled(text.to_string(), base_style)];
    }

    let text_lower = text.to_lowercase();
    let query_lower = query.to_lowercase();

    let mut spans = Vec::new();
    let mut last_end = 0;

    // Find all matches
    let mut search_start = 0;
    while let Some(rel_pos) = text_lower[search_start..].find(&query_lower) {
        let match_start = search_start + rel_pos;
        let match_end = match_start + query.len();

        // Verify char boundaries
        if !text.is_char_boundary(match_start) || !text.is_char_boundary(match_end) {
            search_start = match_start + 1;
            continue;
        }

        // Add text before match
        if match_start > last_end {
            spans.push(Span::styled(
                text[last_end..match_start].to_string(),
                base_style,
            ));
        }

        // Add highlighted match
        spans.push(Span::styled(
            text[match_start..match_end].to_string(),
            highlight_style,
        ));

        last_end = match_end;
        search_start = match_end;

        if search_start >= text.len() {
            break;
        }
    }

    // Add remaining text after last match
    if last_end < text.len() {
        spans.push(Span::styled(text[last_end..].to_string(), base_style));
    }

    // If no matches found, return original text with base style
    if spans.is_empty() {
        spans.push(Span::styled(text.to_string(), base_style));
    }

    spans
}

/// Build a Line with optional search highlighting.
///
/// Convenience wrapper that builds a complete Line, optionally with a prefix.
///
/// # Arguments
/// * `prefix` - Optional prefix spans to prepend
/// * `text` - The main text content
/// * `query` - Optional search query for highlighting
/// * `base_style` - Style for non-matched text
/// * `highlight_style` - Style for matched text
pub fn build_highlighted_line(
    prefix: Vec<Span<'static>>,
    text: &str,
    query: Option<&str>,
    base_style: Style,
    highlight_style: Style,
) -> Line<'static> {
    let mut spans = prefix;

    if let Some(q) = query {
        spans.extend(highlight_search_matches(
            text,
            q,
            base_style,
            highlight_style,
        ));
    } else {
        spans.push(Span::styled(text.to_string(), base_style));
    }

    Line::from(spans)
}

/// Strip YAML frontmatter from the beginning of a document.
///
/// Frontmatter must:
/// - Start at the very beginning of the document (possibly after leading newlines)
/// - Begin with `---` on its own line
/// - End with `---` on its own line
///
/// # Arguments
/// * `content` - The document content
///
/// # Returns
/// Content with frontmatter removed, or original content if no frontmatter found
pub fn strip_frontmatter(content: &str) -> String {
    // Frontmatter must start at the beginning (after optional whitespace/newlines)
    let trimmed = content.trim_start();

    if !trimmed.starts_with("---") {
        return content.to_string();
    }

    // Find the closing ---
    // The pattern is: ---\n...\n---\n (or end of content)
    if let Some(rest) = trimmed.strip_prefix("---") {
        // Find the closing marker (must be on its own line)
        if let Some(end_pos) = rest.find("\n---") {
            // Skip past the closing ---
            let after_close = &rest[end_pos + 4..];
            // Also skip the newline after the closing --- if present
            let result = after_close.strip_prefix('\n').unwrap_or(after_close);
            return result.to_string();
        }
    }

    content.to_string()
}

/// Strip LaTeX math expressions and commands from content.
///
/// Removes LaTeX commands and math delimiters while preserving the text content.
/// For common math symbols and Greek letters, it uses Unicode approximations
/// to maintain readability (SOTA approach).
///
/// # Arguments
/// * `content` - The document content
///
/// # Returns
/// Content with LaTeX expressions removed or converted to Unicode
pub fn strip_latex(content: &str) -> String {
    use regex::{Captures, Regex};
    use std::sync::OnceLock;

    // All regexes are compiled once and cached via OnceLock (following parser/utils.rs pattern)

    // Symbol replacement patterns: (regex, unicode_replacement) compiled once
    static SYMBOL_PATTERNS: OnceLock<Vec<(Regex, String)>> = OnceLock::new();
    let symbol_patterns = SYMBOL_PATTERNS.get_or_init(|| {
        let symbols: &[(&str, &str)] = &[
            (r"\\alpha", "α"),
            (r"\\beta", "β"),
            (r"\\gamma", "γ"),
            (r"\\delta", "δ"),
            (r"\\epsilon", "ε"),
            (r"\\zeta", "ζ"),
            (r"\\eta", "η"),
            (r"\\theta", "θ"),
            (r"\\iota", "ι"),
            (r"\\kappa", "κ"),
            (r"\\lambda", "λ"),
            (r"\\mu", "μ"),
            (r"\\nu", "ν"),
            (r"\\xi", "ξ"),
            (r"\\pi", "π"),
            (r"\\rho", "ρ"),
            (r"\\sigma", "σ"),
            (r"\\tau", "τ"),
            (r"\\upsilon", "υ"),
            (r"\\phi", "φ"),
            (r"\\chi", "χ"),
            (r"\\psi", "ψ"),
            (r"\\omega", "ω"),
            (r"\\Gamma", "Γ"),
            (r"\\Delta", "Δ"),
            (r"\\Theta", "Θ"),
            (r"\\Lambda", "Λ"),
            (r"\\Xi", "Ξ"),
            (r"\\Pi", "Π"),
            (r"\\Sigma", "Σ"),
            (r"\\Phi", "Φ"),
            (r"\\Psi", "Ψ"),
            (r"\\Omega", "Ω"),
            (r"\\sum", "∑"),
            (r"\\prod", "∏"),
            (r"\\int", "∫"),
            (r"\\infty", "∞"),
            (r"\\approx", "≈"),
            (r"\\neq", "≠"),
            (r"\\le", "≤"),
            (r"\\ge", "≥"),
            (r"\\pm", "±"),
            (r"\\times", "×"),
            (r"\\div", "÷"),
            (r"\\partial", "∂"),
            (r"\\nabla", "∇"),
            (r"\\forall", "∀"),
            (r"\\exists", "∃"),
            (r"\\in", "∈"),
            (r"\\notin", "∉"),
            (r"\\subset", "⊂"),
            (r"\\supset", "⊃"),
            (r"\\cup", "∪"),
            (r"\\cap", "∩"),
            (r"\\Rightarrow", "⇒"),
            (r"\\rightarrow", "→"),
            (r"\\Leftarrow", "⇐"),
            (r"\\leftarrow", "←"),
            (r"\\Leftrightarrow", "⇔"),
            (r"\\leftrightarrow", "↔"),
            (r"\\cdot", "·"),
            (r"\\dots", "…"),
        ];
        symbols
            .iter()
            .map(|(pattern, replacement)| {
                let re = Regex::new(&format!(r"{}([^a-zA-Z]|$)", pattern)).unwrap();
                let repl = format!("{}$1", replacement);
                (re, repl)
            })
            .collect()
    });

    static SUPERSCRIPT: OnceLock<Regex> = OnceLock::new();
    static SUBSCRIPT: OnceLock<Regex> = OnceLock::new();
    static DISPLAY_MATH: OnceLock<Regex> = OnceLock::new();
    static INLINE_MATH: OnceLock<Regex> = OnceLock::new();
    static PAREN_MATH: OnceLock<Regex> = OnceLock::new();
    static BRACKET_MATH: OnceLock<Regex> = OnceLock::new();
    static LATEX_ENV: OnceLock<Regex> = OnceLock::new();
    static LATEX_ENV_INLINE: OnceLock<Regex> = OnceLock::new();
    static BEGIN_END_STANDALONE: OnceLock<Regex> = OnceLock::new();
    static FONT_SIZE_CMD: OnceLock<Regex> = OnceLock::new();
    static STANDALONE_CMD: OnceLock<Regex> = OnceLock::new();
    static CMD_WITH_ARGS_LINE: OnceLock<Regex> = OnceLock::new();
    static CMD_WITH_ARGS_INLINE_STRIP: OnceLock<Regex> = OnceLock::new();
    static FONTSIZE_INLINE: OnceLock<Regex> = OnceLock::new();
    static CMD_WITH_ARGS_INLINE: OnceLock<Regex> = OnceLock::new();
    static TEXT_FORMATTING: OnceLock<Regex> = OnceLock::new();
    static TEXTCOLOR: OnceLock<Regex> = OnceLock::new();
    static COLORBOX: OnceLock<Regex> = OnceLock::new();
    static FONT_SIZE_INLINE: OnceLock<Regex> = OnceLock::new();
    static BARE_CMD: OnceLock<Regex> = OnceLock::new();
    static BARE_CMD_EOL: OnceLock<Regex> = OnceLock::new();
    static MULTI_SPACE: OnceLock<Regex> = OnceLock::new();

    let superscript = SUPERSCRIPT.get_or_init(|| Regex::new(r"\^\{?([0-9+\-=()nix])\}?").unwrap());
    let subscript = SUBSCRIPT.get_or_init(|| {
        Regex::new(
            r"_(?:\{([0-9+\-=()aehijklmnoprstuvx])\}|([0-9+\-=()aehijklmnoprstuvx])([^a-zA-Z_]|$))",
        )
        .unwrap()
    });
    let display_math = DISPLAY_MATH.get_or_init(|| Regex::new(r"\$\$[\s\S]*?\$\$").unwrap());
    let inline_math = INLINE_MATH.get_or_init(|| Regex::new(r"\$([^\$\n]+)\$").unwrap());
    let paren_math = PAREN_MATH.get_or_init(|| Regex::new(r"\\\(([\s\S]*?)\\\)").unwrap());
    let bracket_math = BRACKET_MATH.get_or_init(|| Regex::new(r"\\\[([\s\S]*?)\\\]").unwrap());
    let latex_env = LATEX_ENV.get_or_init(|| {
        Regex::new(r"(?s)^\s*\\begin\{[^}]+\}\s*(.*?)\s*\\end\{[^}]+\}\s*$").unwrap()
    });
    let latex_env_inline = LATEX_ENV_INLINE
        .get_or_init(|| Regex::new(r"(?s)\\begin\{[^}]+\}(.*?)\\end\{[^}]+\}").unwrap());
    let begin_end_standalone =
        BEGIN_END_STANDALONE.get_or_init(|| Regex::new(r"\\(begin|end)\{[^}]*\}").unwrap());
    let font_size_cmd = FONT_SIZE_CMD.get_or_init(|| Regex::new(
        r"(?m)^\s*\\(tiny|scriptsize|footnotesize|small|normalsize|large|Large|LARGE|huge|Huge|HUGE|ssmall|miniscule)\s*$"
    ).unwrap());
    let standalone_cmd = STANDALONE_CMD.get_or_init(|| Regex::new(
        r"(?m)^\s*\\(newpage|clearpage|pagebreak|tableofcontents|maketitle|listoffigures|listoftables|appendix|frontmatter|mainmatter|backmatter|centering|raggedright|raggedleft|noindent|indent|par|bigskip|medskip|smallskip|vfill|hfill|newline|linebreak)\s*$"
    ).unwrap());
    let cmd_with_args_line = CMD_WITH_ARGS_LINE.get_or_init(|| Regex::new(
        r"(?m)^\s*\\(usepackage|documentclass|title|author|date|include|input|bibliography|bibliographystyle|setlength|renewcommand|newcommand|setcounter|addtocounter|pagenumbering|pagestyle|thispagestyle|geometry|hypersetup|definecolor|graphicspath|addbibresource|fontsize|sethlcolor|titlespacing|titleformat|captionsetup|lstset)(\[[^\]]*\])?(\{[^}]*\})+\s*$"
    ).unwrap());
    let cmd_with_args_inline_strip = CMD_WITH_ARGS_INLINE_STRIP.get_or_init(|| Regex::new(
        r"\\(usepackage|documentclass|setlength|renewcommand|newcommand|setcounter|addtocounter|pagenumbering|pagestyle|thispagestyle|geometry|hypersetup|definecolor|graphicspath|addbibresource|sethlcolor|titlespacing|titleformat|captionsetup|lstset)(\[[^\]]*\])?(\{[^}]*\})+"
    ).unwrap());
    let fontsize_inline =
        FONTSIZE_INLINE.get_or_init(|| Regex::new(r"\\fontsize\{[^}]*\}\{[^}]*\}").unwrap());
    let cmd_with_args_inline = CMD_WITH_ARGS_INLINE.get_or_init(|| {
        Regex::new(
            r"\\(label|ref|cite|eqref|pageref|vspace|hspace|phantom|hphantom|vphantom)\{[^}]*\}",
        )
        .unwrap()
    });
    let text_formatting = TEXT_FORMATTING.get_or_init(|| {
        Regex::new(r"\\(textbf|textit|emph|underline|texttt|hl|textsf|textsc|textsl)\{([^}]*)\}")
            .unwrap()
    });
    let textcolor =
        TEXTCOLOR.get_or_init(|| Regex::new(r"\\textcolor\{[^}]*\}\{([^}]*)\}").unwrap());
    let colorbox = COLORBOX.get_or_init(|| Regex::new(r"\\colorbox\{[^}]*\}\{([^}]*)\}").unwrap());
    let font_size_inline = FONT_SIZE_INLINE.get_or_init(|| Regex::new(
        r"\\(tiny|scriptsize|footnotesize|small|normalsize|large|Large|LARGE|huge|Huge|HUGE|ssmall|miniscule)([^a-zA-Z]|$)"
    ).unwrap());
    let bare_cmd = BARE_CMD.get_or_init(|| Regex::new(r"\\[a-zA-Z]+\$?([\s,;.!?\)\]\}])").unwrap());
    let bare_cmd_eol = BARE_CMD_EOL.get_or_init(|| Regex::new(r"\\[a-zA-Z]+\$?$").unwrap());

    // Superscript/subscript lookups as match expressions (compiler reduces to jump tables)
    fn to_superscript(c: char) -> Option<char> {
        match c {
            '0' => Some('⁰'),
            '1' => Some('¹'),
            '2' => Some('²'),
            '3' => Some('³'),
            '4' => Some('⁴'),
            '5' => Some('⁵'),
            '6' => Some('⁶'),
            '7' => Some('⁷'),
            '8' => Some('⁸'),
            '9' => Some('⁹'),
            '+' => Some('⁺'),
            '-' => Some('⁻'),
            '=' => Some('⁼'),
            '(' => Some('⁽'),
            ')' => Some('⁾'),
            'n' => Some('ⁿ'),
            'i' => Some('ⁱ'),
            'x' => Some('ˣ'),
            _ => None,
        }
    }
    fn to_subscript(c: char) -> Option<char> {
        match c {
            '0' => Some('₀'),
            '1' => Some('₁'),
            '2' => Some('₂'),
            '3' => Some('₃'),
            '4' => Some('₄'),
            '5' => Some('₅'),
            '6' => Some('₆'),
            '7' => Some('₇'),
            '8' => Some('₈'),
            '9' => Some('₉'),
            '+' => Some('₊'),
            '-' => Some('₋'),
            '=' => Some('₌'),
            '(' => Some('₍'),
            ')' => Some('₎'),
            'a' => Some('ₐ'),
            'e' => Some('ₑ'),
            'h' => Some('ₕ'),
            'i' => Some('ᵢ'),
            'j' => Some('ⱼ'),
            'k' => Some('ₖ'),
            'l' => Some('ₗ'),
            'm' => Some('ₘ'),
            'n' => Some('ₙ'),
            'o' => Some('ₒ'),
            'p' => Some('ₚ'),
            'r' => Some('ᵣ'),
            's' => Some('ₛ'),
            't' => Some('ₜ'),
            'u' => Some('ᵤ'),
            'v' => Some('ᵥ'),
            'x' => Some('ₓ'),
            _ => None,
        }
    }

    // 1. Convert common LaTeX symbols to Unicode approximations (SOTA)
    let mut result = content.to_string();

    // Protect code spans and fenced code blocks from LaTeX transformations.
    // Extract them as placeholders, apply transforms, then restore.
    static CODE_FENCE: OnceLock<Regex> = OnceLock::new();
    static CODE_SPAN_DOUBLE: OnceLock<Regex> = OnceLock::new();
    static CODE_SPAN_SINGLE: OnceLock<Regex> = OnceLock::new();
    let code_fence = CODE_FENCE.get_or_init(|| Regex::new(r"(?s)```[^\n]*\n.*?```").unwrap());
    let code_span_double = CODE_SPAN_DOUBLE.get_or_init(|| Regex::new(r"``(.+?)``").unwrap());
    let code_span_single = CODE_SPAN_SINGLE.get_or_init(|| Regex::new(r"`([^`]+)`").unwrap());

    let mut code_placeholders: Vec<String> = Vec::new();
    // Replace fenced code blocks first (multi-line)
    result = code_fence
        .replace_all(&result, |caps: &Captures| {
            let idx = code_placeholders.len();
            code_placeholders.push(caps[0].to_string());
            format!("\x00CODE{idx}\x00")
        })
        .to_string();
    // Replace double-backtick code spans
    result = code_span_double
        .replace_all(&result, |caps: &Captures| {
            let idx = code_placeholders.len();
            code_placeholders.push(caps[0].to_string());
            format!("\x00CODE{idx}\x00")
        })
        .to_string();
    // Replace single-backtick code spans
    result = code_span_single
        .replace_all(&result, |caps: &Captures| {
            let idx = code_placeholders.len();
            code_placeholders.push(caps[0].to_string());
            format!("\x00CODE{idx}\x00")
        })
        .to_string();

    for (re, replacement) in symbol_patterns {
        result = re.replace_all(&result, replacement.as_str()).to_string();
    }

    // Replace ^x with superscript if x is in map
    result = superscript
        .replace_all(&result, |caps: &Captures| {
            let val = caps[1].chars().next().unwrap();
            to_superscript(val)
                .map(|v| v.to_string())
                .unwrap_or_else(|| caps[0].to_string())
        })
        .to_string();

    // Replace _{x} or _x (when not mid-identifier) with subscript
    result = subscript
        .replace_all(&result, |caps: &Captures| {
            let (val, trailing) = if let Some(m) = caps.get(1) {
                (m.as_str().chars().next().unwrap(), "")
            } else {
                (
                    caps[2].chars().next().unwrap(),
                    caps.get(3).map_or("", |m| m.as_str()),
                )
            };
            to_subscript(val)
                .map(|v| format!("{v}{trailing}"))
                .unwrap_or_else(|| caps[0].to_string())
        })
        .to_string();

    // 2. Strip remaining delimiters and structural commands
    result = display_math.replace_all(&result, "").to_string();
    result = inline_math.replace_all(&result, "$1").to_string();
    result = paren_math.replace_all(&result, "$1").to_string();
    result = bracket_math.replace_all(&result, "$1").to_string();
    result = latex_env.replace_all(&result, "$1").to_string();
    result = latex_env_inline.replace_all(&result, "$1").to_string();
    result = begin_end_standalone.replace_all(&result, "").to_string();
    result = font_size_cmd.replace_all(&result, "").to_string();
    result = standalone_cmd.replace_all(&result, "").to_string();
    result = cmd_with_args_line.replace_all(&result, "").to_string();
    result = cmd_with_args_inline_strip
        .replace_all(&result, "")
        .to_string();
    result = fontsize_inline.replace_all(&result, "").to_string();
    result = cmd_with_args_inline.replace_all(&result, "").to_string();
    result = text_formatting.replace_all(&result, "$2").to_string();
    result = textcolor.replace_all(&result, "$1").to_string();
    result = colorbox.replace_all(&result, "$1").to_string();
    result = font_size_inline.replace_all(&result, "$2").to_string();
    result = bare_cmd.replace_all(&result, "$1").to_string();
    result = bare_cmd_eol.replace_all(&result, "").to_string();

    // Collapse multiple spaces left by stripped inline commands
    let multi_space = MULTI_SPACE.get_or_init(|| Regex::new(r"  +").unwrap());
    result = multi_space.replace_all(&result, " ").to_string();

    // Restore protected code spans and fenced code blocks
    for (idx, original) in code_placeholders.iter().enumerate() {
        result = result.replace(&format!("\x00CODE{idx}\x00"), original);
    }

    result
}

/// Strip ALL lines starting with backslash (aggressive LaTeX filtering).
///
/// This is a simple catch-all for users whose documents have LaTeX commands
/// not covered by the standard filtering.
///
/// # Arguments
/// * `content` - The document content
///
/// # Returns
/// Content with all backslash-starting lines removed
pub fn strip_latex_aggressive(content: &str) -> String {
    use regex::Regex;
    use std::sync::OnceLock;

    static BACKSLASH_LINE: OnceLock<Regex> = OnceLock::new();
    let backslash_line =
        BACKSLASH_LINE.get_or_init(|| Regex::new(r"(?m)^\s*\\[a-zA-Z].*$").unwrap());
    backslash_line.replace_all(content, "").to_string()
}

/// Wrap text to a specific width, preserving word boundaries when possible.
///
/// Uses unicode-width for accurate terminal display measurement.
///
/// # Arguments
/// * `text` - The text to wrap
/// * `width` - The maximum width per line
///
/// # Returns
/// A vector of lines
pub fn wrap_text(text: &str, width: usize) -> Vec<String> {
    if width == 0 {
        return vec![text.to_string()];
    }

    let mut lines = Vec::new();
    let mut current_line = String::new();
    let mut current_width = 0;

    for word in text.split_whitespace() {
        let word_width = terminal_width(word);

        // If word itself is wider than limit, we must break it
        if word_width > width {
            // Push whatever we have so far
            if !current_line.is_empty() {
                lines.push(std::mem::take(&mut current_line));
                current_width = 0;
            }

            let mut remaining = word;
            while !remaining.is_empty() {
                let mut chunk = String::new();
                let mut chunk_width = 0;
                for c in remaining.chars() {
                    let c_width = terminal_char_width(c);
                    if chunk_width + c_width > width {
                        break;
                    }
                    chunk.push(c);
                    chunk_width += c_width;
                }
                if chunk.is_empty() {
                    break;
                }
                let chunk_len = chunk.len();
                lines.push(chunk);
                remaining = &remaining[chunk_len..];
            }
            continue;
        }

        // Check if word fits on current line (plus a space)
        let space_needed = if current_line.is_empty() { 0 } else { 1 };
        if current_width + space_needed + word_width <= width {
            if space_needed > 0 {
                current_line.push(' ');
                current_width += 1;
            }
            current_line.push_str(word);
            current_width += word_width;
        } else {
            // Doesn't fit, start new line
            lines.push(std::mem::take(&mut current_line));
            current_line = word.to_string();
            current_width = word_width;
        }
    }

    if !current_line.is_empty() {
        lines.push(current_line);
    }

    if lines.is_empty() {
        lines.push(if text.trim().is_empty() && !text.is_empty() {
            text.to_string()
        } else {
            String::new()
        });
    }

    lines
}

/// Apply content filters based on configuration.
///
/// Strips frontmatter and/or LaTeX based on the provided flags.
///
/// # Arguments
/// * `content` - The document content
/// * `hide_frontmatter` - Whether to strip YAML frontmatter
/// * `hide_latex` - Whether to strip LaTeX expressions
/// * `latex_aggressive` - Whether to use aggressive filtering (strip all backslash lines)
///
/// # Returns
/// Filtered content
pub fn filter_content(
    content: &str,
    hide_frontmatter: bool,
    hide_latex: bool,
    latex_aggressive: bool,
) -> String {
    let mut result = content.to_string();

    if hide_frontmatter {
        result = strip_frontmatter(&result);
    }

    if hide_latex {
        result = strip_latex(&result);

        // Apply aggressive filtering if enabled (catches anything standard missed)
        if latex_aggressive {
            result = strip_latex_aggressive(&result);
        }
    }

    result
}

#[cfg(test)]
mod tests {
    use super::*;

    mod strip_frontmatter_tests {
        use super::*;

        #[test]
        fn test_simple_frontmatter() {
            let content = "---\ntitle: Test\n---\n\n# Heading\n\nContent";
            let result = strip_frontmatter(content);
            assert_eq!(result, "\n# Heading\n\nContent");
        }

        #[test]
        fn test_no_frontmatter() {
            let content = "# Heading\n\nContent";
            let result = strip_frontmatter(content);
            assert_eq!(result, content);
        }

        #[test]
        fn test_frontmatter_with_yaml() {
            let content = "---\ntitle: My Doc\ntags:\n  - rust\n  - markdown\n---\n# Start";
            let result = strip_frontmatter(content);
            assert_eq!(result, "# Start");
        }

        #[test]
        fn test_frontmatter_not_at_start() {
            let content = "Some text\n---\ntitle: Test\n---\nMore text";
            let result = strip_frontmatter(content);
            assert_eq!(result, content); // Should not strip
        }
    }

    mod strip_latex_tests {
        use super::*;

        #[test]
        fn test_inline_math() {
            let content = "The formula $x^2$ is quadratic";
            let result = strip_latex(content);
            assert_eq!(result, "The formula x² is quadratic");
        }

        #[test]
        fn test_display_math() {
            let content = "The equation:\n$$\nE = mc^2\n$$\nis famous.";
            let result = strip_latex(content);
            assert_eq!(result, "The equation:\n\nis famous.");
        }

        #[test]
        fn test_latex_environment() {
            let content = "An equation:\n\\begin{equation}\ny = mx + b\n\\end{equation}\ndone.";
            let result = strip_latex(content);
            assert_eq!(result, "An equation:\n\ny = mx + b\n\ndone.");
        }

        #[test]
        fn test_greek_letters() {
            let content = "Angle $\\alpha$ and $\\beta$";
            let result = strip_latex(content);
            assert_eq!(result, "Angle α and β");
        }

        #[test]
        fn test_math_symbols() {
            let content = "$\\sum_{i=1}^n x_i \\approx \\int f(x) dx$";
            let result = strip_latex(content);
            // i=1 and n might not be mapped in sup/sub maps fully yet but let's check what we have
            // _i -> ᵢ, ^n -> ⁿ
            assert!(result.contains("∑"));
            assert!(result.contains("≈"));
            assert!(result.contains("∫"));
        }

        #[test]
        fn test_no_latex() {
            let content = "Regular text without math";
            let result = strip_latex(content);
            assert_eq!(result, content);
        }

        #[test]
        fn test_money_not_stripped() {
            // Currency like "$5" shouldn't match inline math pattern
            // because it would need content between the dollars
            let content = "It costs $5";
            let result = strip_latex(content);
            // This won't match because there's no closing $
            assert_eq!(result, content);
        }

        #[test]
        fn test_standalone_commands() {
            let content = "Some text\n\\newpage\nMore text";
            let result = strip_latex(content);
            assert_eq!(result, "Some text\n\nMore text");
        }

        #[test]
        fn test_clearpage() {
            let content = "Chapter 1\n\\clearpage\nChapter 2";
            let result = strip_latex(content);
            assert_eq!(result, "Chapter 1\n\nChapter 2");
        }

        #[test]
        fn test_usepackage() {
            let content = "\\usepackage{amsmath}\nSome content";
            let result = strip_latex(content);
            assert_eq!(result, "\nSome content");
        }

        #[test]
        fn test_text_formatting_preserved() {
            // \textbf{} content should be preserved, just without the command
            let content = "This is \\textbf{bold} text";
            let result = strip_latex(content);
            assert_eq!(result, "This is bold text");
        }

        #[test]
        fn test_font_size_normalsize() {
            let content = "Some text\n\\normalsize\nMore text";
            let result = strip_latex(content);
            assert_eq!(result, "Some text\n\nMore text");
        }

        #[test]
        fn test_font_size_large() {
            let content = "\\Large\nHeading";
            let result = strip_latex(content);
            assert_eq!(result, "\nHeading");
        }

        #[test]
        fn test_font_size_tiny() {
            let content = "Normal\n\\tiny\nSmall text\n\\normalsize\nBack to normal";
            let result = strip_latex(content);
            assert_eq!(result, "Normal\n\nSmall text\n\nBack to normal");
        }

        #[test]
        fn test_centering() {
            let content = "\\centering\nCentered content";
            let result = strip_latex(content);
            assert_eq!(result, "\nCentered content");
        }

        #[test]
        fn test_label_ref_stripped() {
            let content = "See Figure \\ref{fig:example} for details.";
            let result = strip_latex(content);
            assert_eq!(result, "See Figure for details.");
        }

        #[test]
        fn test_cite_stripped() {
            let content = "As shown by \\cite{smith2020} in their work.";
            let result = strip_latex(content);
            assert_eq!(result, "As shown by in their work.");
        }

        #[test]
        fn test_vspace_hspace_stripped() {
            let content = "Text\\vspace{1em}More text\\hspace{2cm}End";
            let result = strip_latex(content);
            assert_eq!(result, "TextMore textEnd");
        }

        #[test]
        fn test_geometry_stripped() {
            let content = "\\geometry{margin=1in}\nDocument content";
            let result = strip_latex(content);
            assert_eq!(result, "\nDocument content");
        }

        #[test]
        fn test_standalone_begin_end_stripped() {
            let content = "Text \\begin{center} centered \\end{center} more";
            let result = strip_latex(content);
            assert_eq!(result, "Text centered more");
        }

        #[test]
        fn test_unpaired_begin_stripped() {
            let content = "Before \\begin{itemize} items";
            let result = strip_latex(content);
            assert_eq!(result, "Before items");
        }

        #[test]
        fn test_inline_setlength_stripped() {
            let content = "Text \\setlength{\\parindent}{0pt} more text";
            let result = strip_latex(content);
            assert!(result.contains("more text"));
            assert!(!result.contains("setlength"));
        }

        #[test]
        fn test_inline_fontsize_stripped() {
            let content = "Normal \\fontsize{12}{14} text here";
            let result = strip_latex(content);
            assert!(!result.contains("fontsize"));
            assert!(result.contains("text here"));
        }

        #[test]
        fn test_bare_box_stripped() {
            let content = "Check \\Box$ next";
            let result = strip_latex(content);
            assert!(!result.contains("\\Box"));
        }

        #[test]
        fn test_bare_commands_stripped() {
            // \no, \yes are bare commands with no content value
            let content = "Item \\no text \\yes more";
            let result = strip_latex(content);
            assert!(!result.contains("\\no"));
            assert!(!result.contains("\\yes"));
        }

        #[test]
        fn test_inline_pagestyle_stripped() {
            let content = "Text \\pagestyle{fancy} more";
            let result = strip_latex(content);
            assert!(!result.contains("pagestyle"));
            assert!(result.contains("more"));
        }

        #[test]
        fn test_inline_thispagestyle_stripped() {
            let content = "\\thispagestyle{empty} Content here";
            let result = strip_latex(content);
            assert!(!result.contains("thispagestyle"));
            assert!(result.contains("Content here"));
        }

        #[test]
        fn test_inline_usepackage_stripped() {
            let content = "Load \\usepackage[utf8]{inputenc} text";
            let result = strip_latex(content);
            assert!(!result.contains("usepackage"));
        }

        #[test]
        fn test_no_double_spaces_after_stripping() {
            let content = "Text with \\fontsize{12}{14} commands and \\setlength{\\parskip}{1em} more text here.";
            let result = strip_latex(content);
            assert!(
                !result.contains("  "),
                "Double spaces found in: {:?}",
                result
            );
            assert!(result.contains("Text with"));
            assert!(result.contains("more text here."));
        }

        #[test]
        fn test_code_spans_preserved_in_tables() {
            let content =
                "| `post_tweet` | Post a tweet |\n| `post_reddit` | Submit a Reddit post |";
            let result = strip_latex(content);
            assert!(
                result.contains("`post_tweet`"),
                "code span mangled: {result}"
            );
            assert!(
                result.contains("`post_reddit`"),
                "code span mangled: {result}"
            );
            assert!(
                !result.contains("ₜ"),
                "subscript leaked into code span: {result}"
            );
            assert!(
                !result.contains("ᵣ"),
                "subscript leaked into code span: {result}"
            );
        }

        #[test]
        fn test_code_spans_with_underscores_preserved() {
            let content = "Use `my_variable` and `some_function` in code";
            let result = strip_latex(content);
            assert!(
                result.contains("`my_variable`"),
                "code span mangled: {result}"
            );
            assert!(
                result.contains("`some_function`"),
                "code span mangled: {result}"
            );
        }

        #[test]
        fn test_fenced_code_block_preserved() {
            let content = "Text\n```rust\nlet x_n = 1;\n```\nMore text";
            let result = strip_latex(content);
            assert!(result.contains("x_n"), "fenced code mangled: {result}");
        }

        #[test]
        fn test_subscript_preserves_snake_case() {
            assert_eq!(strip_latex("x_1 + z_{i}th"), "x₁ + zᵢth");
            assert_eq!(strip_latex("i_am_a_snake x_1"), "i_am_a_snake x₁");
        }
    }

    mod strip_latex_aggressive_tests {
        use super::*;

        #[test]
        fn test_aggressive_strips_any_backslash_line() {
            let content = "Normal text\n\\unknowncommand\nMore text";
            let result = strip_latex_aggressive(content);
            assert_eq!(result, "Normal text\n\nMore text");
        }

        #[test]
        fn test_aggressive_strips_with_args() {
            let content = "\\customcmd{arg}\nContent here";
            let result = strip_latex_aggressive(content);
            assert_eq!(result, "\nContent here");
        }

        #[test]
        fn test_aggressive_preserves_prose() {
            let content = "Regular text without backslash commands";
            let result = strip_latex_aggressive(content);
            assert_eq!(result, content);
        }

        #[test]
        fn test_aggressive_preserves_inline_backslash() {
            // Text with backslash not at line start should be preserved
            let content = "Some text with \\command inline";
            let result = strip_latex_aggressive(content);
            assert_eq!(result, content);
        }
    }

    mod detect_checkbox_tests {
        use super::*;

        #[test]
        fn test_checked_lowercase() {
            let (is_task, is_checked, text) = detect_checkbox_in_text("[x] Task done");
            assert!(is_task);
            assert!(is_checked);
            assert_eq!(text, "Task done");
        }

        #[test]
        fn test_checked_uppercase() {
            let (is_task, is_checked, text) = detect_checkbox_in_text("[X] Also done");
            assert!(is_task);
            assert!(is_checked);
            assert_eq!(text, "Also done");
        }

        #[test]
        fn test_unchecked() {
            let (is_task, is_checked, text) = detect_checkbox_in_text("[ ] Not done yet");
            assert!(is_task);
            assert!(!is_checked);
            assert_eq!(text, "Not done yet");
        }

        #[test]
        fn test_not_a_task() {
            let (is_task, is_checked, text) = detect_checkbox_in_text("Regular text");
            assert!(!is_task);
            assert!(!is_checked);
            assert_eq!(text, "Regular text");
        }

        #[test]
        fn test_with_leading_whitespace() {
            let (is_task, is_checked, text) = detect_checkbox_in_text("  [x] Indented task");
            assert!(is_task);
            assert!(is_checked);
            assert_eq!(text, "Indented task");
        }

        #[test]
        fn test_empty_task() {
            let (is_task, is_checked, text) = detect_checkbox_in_text("[x]");
            assert!(is_task);
            assert!(is_checked);
            assert_eq!(text, "");
        }

        #[test]
        fn test_bracket_but_not_checkbox() {
            let (is_task, is_checked, text) = detect_checkbox_in_text("[a] Not a checkbox");
            assert!(!is_task);
            assert!(!is_checked);
            assert_eq!(text, "[a] Not a checkbox");
        }
    }

    mod align_text_tests {
        use super::*;

        #[test]
        fn test_halfwidth_katakana_sound_marks_match_terminal_width() {
            assert_eq!(terminal_width("ｶﾞ"), 2);
            assert_eq!(terminal_width("ﾊﾟ"), 2);
            assert_eq!(terminal_width("aﾞ"), 2);
        }

        #[test]
        fn test_left_align() {
            let result = align_text("Hi", 10, &Alignment::Left);
            assert_eq!(result, " Hi       ");
            assert_eq!(result.len(), 10);
        }

        #[test]
        fn test_right_align() {
            let result = align_text("Hi", 10, &Alignment::Right);
            assert_eq!(result, "       Hi ");
            assert_eq!(result.len(), 10);
        }

        #[test]
        fn test_center_align() {
            let result = align_text("Hi", 10, &Alignment::Center);
            assert_eq!(result, "    Hi    ");
            assert_eq!(result.len(), 10);
        }

        #[test]
        fn test_none_defaults_to_left() {
            let result = align_text("Hi", 10, &Alignment::None);
            assert_eq!(result, " Hi       ");
        }

        #[test]
        fn test_truncation_when_too_long() {
            let result = align_text("This is a very long text", 10, &Alignment::Left);
            // Now uses single ellipsis character (…) instead of three dots
            assert!(result.contains("…"));
            // Should be truncated with ellipsis
        }

        #[test]
        fn test_exact_width() {
            let result = align_text("Test", 6, &Alignment::Left);
            assert_eq!(result, " Test ");
        }

        #[test]
        fn test_unicode_width() {
            // Japanese characters are typically 2 columns wide
            let result = align_text("日本", 10, &Alignment::Left);
            // "日本" is 4 columns wide (2 chars * 2 width each)
            // Result should be " 日本     " (1 space + 4 cols + 5 spaces = 10)
            assert_eq!(terminal_width(&result), 10);
        }

        #[test]
        fn test_align_halfwidth_katakana_sound_marks() {
            let result = align_text("ｶﾞ", 6, &Alignment::Left);
            assert_eq!(terminal_width(&result), 6);
        }

        #[test]
        fn test_center_odd_padding() {
            // When padding can't be split evenly, extra space goes to right
            let result = align_text("A", 10, &Alignment::Center);
            // "A" is 1 wide, 9 spaces to distribute: 4 left, 5 right
            assert_eq!(result, "    A     ");
        }
    }

    mod highlight_search_tests {
        use super::*;
        use ratatui::style::Color;

        #[test]
        fn test_no_match() {
            let base = Style::default().fg(Color::White);
            let highlight = Style::default().fg(Color::Yellow);
            let spans = highlight_search_matches("Hello World", "xyz", base, highlight);
            assert_eq!(spans.len(), 1);
            assert_eq!(spans[0].content.as_ref(), "Hello World");
        }

        #[test]
        fn test_single_match() {
            let base = Style::default().fg(Color::White);
            let highlight = Style::default().fg(Color::Yellow);
            let spans = highlight_search_matches("Hello World", "World", base, highlight);
            assert_eq!(spans.len(), 2);
            assert_eq!(spans[0].content.as_ref(), "Hello ");
            assert_eq!(spans[1].content.as_ref(), "World");
            assert_eq!(spans[1].style, highlight);
        }

        #[test]
        fn test_case_insensitive() {
            let base = Style::default().fg(Color::White);
            let highlight = Style::default().fg(Color::Yellow);
            let spans = highlight_search_matches("Hello World", "world", base, highlight);
            assert_eq!(spans.len(), 2);
            assert_eq!(spans[1].content.as_ref(), "World"); // Preserves original case
        }

        #[test]
        fn test_multiple_matches() {
            let base = Style::default().fg(Color::White);
            let highlight = Style::default().fg(Color::Yellow);
            let spans = highlight_search_matches("foo bar foo", "foo", base, highlight);
            assert_eq!(spans.len(), 3);
            assert_eq!(spans[0].content.as_ref(), "foo");
            assert_eq!(spans[1].content.as_ref(), " bar ");
            assert_eq!(spans[2].content.as_ref(), "foo");
        }

        #[test]
        fn test_empty_query() {
            let base = Style::default().fg(Color::White);
            let highlight = Style::default().fg(Color::Yellow);
            let spans = highlight_search_matches("Hello", "", base, highlight);
            assert_eq!(spans.len(), 1);
            assert_eq!(spans[0].content.as_ref(), "Hello");
        }

        #[test]
        fn test_match_at_start() {
            let base = Style::default().fg(Color::White);
            let highlight = Style::default().fg(Color::Yellow);
            let spans = highlight_search_matches("Hello World", "Hello", base, highlight);
            assert_eq!(spans.len(), 2);
            assert_eq!(spans[0].content.as_ref(), "Hello");
            assert_eq!(spans[0].style, highlight);
            assert_eq!(spans[1].content.as_ref(), " World");
        }
    }
}