rumdl 0.1.51 - Docs.rs

//! Text reflow utilities for MD013
//!
//! This module implements text wrapping/reflow functionality that preserves
//! Markdown elements like links, emphasis, code spans, etc.

use crate::utils::calculate_indentation_width_default;
use crate::utils::is_definition_list_item;
use crate::utils::mkdocs_attr_list::{ATTR_LIST_PATTERN, is_standalone_attr_list};
use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
use crate::utils::regex_cache::{
    DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
    HUGO_SHORTCODE_REGEX, INLINE_IMAGE_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, LINKED_IMAGE_INLINE_INLINE,
    LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF, REF_IMAGE_REGEX, REF_LINK_REGEX,
    SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
};
use crate::utils::sentence_utils::{
    get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
    text_ends_with_abbreviation,
};
use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
use std::collections::HashSet;
use unicode_width::UnicodeWidthStr;

/// Length calculation mode for reflow
#[derive(Clone, Copy, Debug, Default, PartialEq)]
pub enum ReflowLengthMode {
    /// Count Unicode characters (grapheme clusters)
    Chars,
    /// Count visual display width (CJK = 2 columns, emoji = 2, etc.)
    #[default]
    Visual,
    /// Count raw bytes
    Bytes,
}

/// Calculate the display length of a string based on the length mode
fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
    match mode {
        ReflowLengthMode::Chars => s.chars().count(),
        ReflowLengthMode::Visual => s.width(),
        ReflowLengthMode::Bytes => s.len(),
    }
}

/// Options for reflowing text
#[derive(Clone)]
pub struct ReflowOptions {
    /// Target line length
    pub line_length: usize,
    /// Whether to break on sentence boundaries when possible
    pub break_on_sentences: bool,
    /// Whether to preserve existing line breaks in paragraphs
    pub preserve_breaks: bool,
    /// Whether to enforce one sentence per line
    pub sentence_per_line: bool,
    /// Whether to use semantic line breaks (cascading split strategy)
    pub semantic_line_breaks: bool,
    /// Custom abbreviations for sentence detection
    /// Periods are optional - both "Dr" and "Dr." work the same
    /// Custom abbreviations are always added to the built-in defaults
    pub abbreviations: Option<Vec<String>>,
    /// How to measure string length for line-length comparisons
    pub length_mode: ReflowLengthMode,
    /// Whether to treat {#id .class key="value"} as atomic (unsplittable) elements.
    /// Enabled for MkDocs and Kramdown flavors.
    pub attr_lists: bool,
    /// Whether to require uppercase after periods for sentence detection.
    /// When true (default), only "word. Capital" is a sentence boundary.
    /// When false, "word. lowercase" is also treated as a sentence boundary.
    /// Does not affect ! and ? which are always treated as sentence boundaries.
    pub require_sentence_capital: bool,
    /// Cap list continuation indent to this value when set.
    /// Used by mkdocs flavor where continuation is always 4 spaces
    /// regardless of checkbox markers.
    pub max_list_continuation_indent: Option<usize>,
}

impl Default for ReflowOptions {
    fn default() -> Self {
        Self {
            line_length: 80,
            break_on_sentences: true,
            preserve_breaks: false,
            sentence_per_line: false,
            semantic_line_breaks: false,
            abbreviations: None,
            length_mode: ReflowLengthMode::default(),
            attr_lists: false,
            require_sentence_capital: true,
            max_list_continuation_indent: None,
        }
    }
}

/// Detect if a character position is a sentence boundary
/// Based on the approach from github.com/JoshuaKGoldberg/sentences-per-line
/// Supports both ASCII punctuation (. ! ?) and CJK punctuation (。 ！ ？)
fn is_sentence_boundary(
    text: &str,
    pos: usize,
    abbreviations: &HashSet<String>,
    require_sentence_capital: bool,
) -> bool {
    let chars: Vec<char> = text.chars().collect();

    if pos + 1 >= chars.len() {
        return false;
    }

    let c = chars[pos];
    let next_char = chars[pos + 1];

    // Check for CJK sentence-ending punctuation (。, ！, ？)
    // CJK punctuation doesn't require space or uppercase after it
    if is_cjk_sentence_ending(c) {
        // Skip any trailing emphasis/strikethrough markers
        let mut after_punct_pos = pos + 1;
        while after_punct_pos < chars.len()
            && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
        {
            after_punct_pos += 1;
        }

        // Skip whitespace
        while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
            after_punct_pos += 1;
        }

        // Check if we have more content (any non-whitespace)
        if after_punct_pos >= chars.len() {
            return false;
        }

        // Skip leading emphasis/strikethrough markers
        while after_punct_pos < chars.len()
            && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
        {
            after_punct_pos += 1;
        }

        if after_punct_pos >= chars.len() {
            return false;
        }

        // For CJK, we accept any character as the start of the next sentence
        // (no uppercase requirement, since CJK doesn't have case)
        return true;
    }

    // Check for ASCII sentence-ending punctuation
    if c != '.' && c != '!' && c != '?' {
        return false;
    }

    // Must be followed by space, closing quote, or emphasis/strikethrough marker followed by space
    let (_space_pos, after_space_pos) = if next_char == ' ' {
        // Normal case: punctuation followed by space
        (pos + 1, pos + 2)
    } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
        // Sentence ends with quote - check what follows the quote
        if chars[pos + 2] == ' ' {
            // Just quote followed by space: 'sentence." '
            (pos + 2, pos + 3)
        } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
            // Quote followed by emphasis: 'sentence."* '
            (pos + 3, pos + 4)
        } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
            && pos + 4 < chars.len()
            && chars[pos + 3] == chars[pos + 2]
            && chars[pos + 4] == ' '
        {
            // Quote followed by bold: 'sentence."** '
            (pos + 4, pos + 5)
        } else {
            return false;
        }
    } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
        // Sentence ends with emphasis: "sentence.* " or "sentence._ "
        (pos + 2, pos + 3)
    } else if (next_char == '*' || next_char == '_')
        && pos + 3 < chars.len()
        && chars[pos + 2] == next_char
        && chars[pos + 3] == ' '
    {
        // Sentence ends with bold: "sentence.** " or "sentence.__ "
        (pos + 3, pos + 4)
    } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
        // Sentence ends with strikethrough: "sentence.~~ "
        (pos + 3, pos + 4)
    } else {
        return false;
    };

    // Skip all whitespace after the space to find the start of the next sentence
    let mut next_char_pos = after_space_pos;
    while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
        next_char_pos += 1;
    }

    // Check if we reached the end of the string
    if next_char_pos >= chars.len() {
        return false;
    }

    // Skip leading emphasis/strikethrough markers and opening quotes to find the actual first letter
    let mut first_letter_pos = next_char_pos;
    while first_letter_pos < chars.len()
        && (chars[first_letter_pos] == '*'
            || chars[first_letter_pos] == '_'
            || chars[first_letter_pos] == '~'
            || is_opening_quote(chars[first_letter_pos]))
    {
        first_letter_pos += 1;
    }

    // Check if we reached the end after skipping emphasis
    if first_letter_pos >= chars.len() {
        return false;
    }

    let first_char = chars[first_letter_pos];

    // For ! and ?, sentence boundaries are unambiguous — no uppercase requirement
    if c == '!' || c == '?' {
        return true;
    }

    // Period-specific checks: periods are ambiguous (abbreviations, decimals, initials)
    // so we apply additional guards before accepting a sentence boundary.

    if pos > 0 {
        // Check for common abbreviations
        let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
        if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
            return false;
        }

        // Check for decimal numbers (e.g., "3.14 is pi")
        if chars[pos - 1].is_numeric() && first_char.is_ascii_digit() {
            return false;
        }

        // Check for single-letter initials (e.g., "J. K. Rowling")
        // A single uppercase letter before the period preceded by whitespace or start
        // is likely an initial, not a sentence ending.
        if chars[pos - 1].is_ascii_uppercase() && (pos == 1 || (pos >= 2 && chars[pos - 2].is_whitespace())) {
            return false;
        }
    }

    // In strict mode, require uppercase or CJK to start the next sentence after a period.
    // In relaxed mode, accept any alphanumeric character.
    if require_sentence_capital && !first_char.is_uppercase() && !is_cjk_char(first_char) {
        return false;
    }

    true
}

/// Split text into sentences
pub fn split_into_sentences(text: &str) -> Vec<String> {
    split_into_sentences_custom(text, &None)
}

/// Split text into sentences with custom abbreviations
pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
    let abbreviations = get_abbreviations(custom_abbreviations);
    split_into_sentences_with_set(text, &abbreviations, true)
}

/// Internal function to split text into sentences with a pre-computed abbreviations set
/// Use this when calling multiple times in a loop to avoid repeatedly computing the set
fn split_into_sentences_with_set(
    text: &str,
    abbreviations: &HashSet<String>,
    require_sentence_capital: bool,
) -> Vec<String> {
    let mut sentences = Vec::new();
    let mut current_sentence = String::new();
    let mut chars = text.chars().peekable();
    let mut pos = 0;

    while let Some(c) = chars.next() {
        current_sentence.push(c);

        if is_sentence_boundary(text, pos, abbreviations, require_sentence_capital) {
            // Consume any trailing emphasis/strikethrough markers and quotes (they belong to the current sentence)
            while let Some(&next) = chars.peek() {
                if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
                    current_sentence.push(chars.next().unwrap());
                    pos += 1;
                } else {
                    break;
                }
            }

            // Consume the space after the sentence
            if chars.peek() == Some(&' ') {
                chars.next();
                pos += 1;
            }

            sentences.push(current_sentence.trim().to_string());
            current_sentence.clear();
        }

        pos += 1;
    }

    // Add any remaining text as the last sentence
    if !current_sentence.trim().is_empty() {
        sentences.push(current_sentence.trim().to_string());
    }
    sentences
}

/// Check if a line is a horizontal rule (---, ___, ***)
fn is_horizontal_rule(line: &str) -> bool {
    if line.len() < 3 {
        return false;
    }

    // Check if line consists only of -, _, or * characters (at least 3)
    let chars: Vec<char> = line.chars().collect();
    if chars.is_empty() {
        return false;
    }

    let first_char = chars[0];
    if first_char != '-' && first_char != '_' && first_char != '*' {
        return false;
    }

    // All characters should be the same (allowing spaces between)
    for c in &chars {
        if *c != first_char && *c != ' ' {
            return false;
        }
    }

    // Count non-space characters
    let non_space_count = chars.iter().filter(|c| **c != ' ').count();
    non_space_count >= 3
}

/// Check if a line is a numbered list item (e.g., "1. ", "10. ")
fn is_numbered_list_item(line: &str) -> bool {
    let mut chars = line.chars();

    // Must start with a digit
    if !chars.next().is_some_and(|c| c.is_numeric()) {
        return false;
    }

    // Can have more digits
    while let Some(c) = chars.next() {
        if c == '.' {
            // After period, must have a space (consistent with list marker extraction)
            // "2019." alone is NOT treated as a list item to avoid false positives
            return chars.next() == Some(' ');
        }
        if !c.is_numeric() {
            return false;
        }
    }

    false
}

/// Check if a trimmed line is an unordered list item (-, *, + followed by space)
fn is_unordered_list_marker(s: &str) -> bool {
    matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
        && !is_horizontal_rule(s)
        && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
}

/// Shared structural checks for block boundary detection.
/// Checks elements that only depend on the trimmed line content.
fn is_block_boundary_core(trimmed: &str) -> bool {
    trimmed.is_empty()
        || trimmed.starts_with('#')
        || trimmed.starts_with("```")
        || trimmed.starts_with("~~~")
        || trimmed.starts_with('>')
        || (trimmed.starts_with('[') && trimmed.contains("]:"))
        || is_horizontal_rule(trimmed)
        || is_unordered_list_marker(trimmed)
        || is_numbered_list_item(trimmed)
        || is_definition_list_item(trimmed)
        || trimmed.starts_with(":::")
}

/// Check if a trimmed line starts a new structural block element.
/// Used for paragraph boundary detection in `reflow_markdown()`.
fn is_block_boundary(trimmed: &str) -> bool {
    is_block_boundary_core(trimmed) || trimmed.starts_with('|')
}

/// Check if a line starts a new structural block for paragraph boundary detection
/// in `reflow_paragraph_at_line()`. Extends the core checks with indented code blocks
/// (≥4 spaces) and table row detection via `is_potential_table_row`.
fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
    is_block_boundary_core(trimmed)
        || calculate_indentation_width_default(line) >= 4
        || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
}

/// Check if a line ends with a hard break (either two spaces or backslash)
///
/// CommonMark supports two formats for hard line breaks:
/// 1. Two or more trailing spaces
/// 2. A backslash at the end of the line
fn has_hard_break(line: &str) -> bool {
    let line = line.strip_suffix('\r').unwrap_or(line);
    line.ends_with("  ") || line.ends_with('\\')
}

/// Check if text ends with sentence-terminating punctuation (. ! ?)
fn ends_with_sentence_punct(text: &str) -> bool {
    text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
}

/// Trim trailing whitespace while preserving hard breaks (two trailing spaces or backslash)
///
/// Hard breaks in Markdown can be indicated by:
/// 1. Two trailing spaces before a newline (traditional)
/// 2. A backslash at the end of the line (mdformat style)
fn trim_preserving_hard_break(s: &str) -> String {
    // Strip trailing \r from CRLF line endings first to handle Windows files
    let s = s.strip_suffix('\r').unwrap_or(s);

    // Check for backslash hard break (mdformat style)
    if s.ends_with('\\') {
        // Preserve the backslash exactly as-is
        return s.to_string();
    }

    // Check if there are at least 2 trailing spaces (traditional hard break)
    if s.ends_with("  ") {
        // Find the position where non-space content ends
        let content_end = s.trim_end().len();
        if content_end == 0 {
            // String is all whitespace
            return String::new();
        }
        // Preserve exactly 2 trailing spaces for hard break
        format!("{}  ", &s[..content_end])
    } else {
        // No hard break, just trim all trailing whitespace
        s.trim_end().to_string()
    }
}

/// Parse markdown elements using the appropriate parser based on options.
fn parse_elements(text: &str, options: &ReflowOptions) -> Vec<Element> {
    if options.attr_lists {
        parse_markdown_elements_with_attr_lists(text)
    } else {
        parse_markdown_elements(text)
    }
}

pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
    // For sentence-per-line mode, always process regardless of length
    if options.sentence_per_line {
        let elements = parse_elements(line, options);
        return reflow_elements_sentence_per_line(&elements, &options.abbreviations, options.require_sentence_capital);
    }

    // For semantic line breaks mode, use cascading split strategy
    if options.semantic_line_breaks {
        let elements = parse_elements(line, options);
        return reflow_elements_semantic(&elements, options);
    }

    // Quick check: if line is already short enough or no wrapping requested, return as-is
    // line_length = 0 means no wrapping (unlimited line length)
    if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
        return vec![line.to_string()];
    }

    // Parse the markdown to identify elements
    let elements = parse_elements(line, options);

    // Reflow the elements into lines
    reflow_elements(&elements, options)
}

/// Image source in a linked image structure
#[derive(Debug, Clone)]
enum LinkedImageSource {
    /// Inline image URL: ![alt](url)
    Inline(String),
    /// Reference image: ![alt][ref]
    Reference(String),
}

/// Link target in a linked image structure
#[derive(Debug, Clone)]
enum LinkedImageTarget {
    /// Inline link URL: ](url)
    Inline(String),
    /// Reference link: ][ref]
    Reference(String),
}

/// Represents a piece of content in the markdown
#[derive(Debug, Clone)]
enum Element {
    /// Plain text that can be wrapped
    Text(String),
    /// A complete markdown inline link [text](url)
    Link { text: String, url: String },
    /// A complete markdown reference link [text][ref]
    ReferenceLink { text: String, reference: String },
    /// A complete markdown empty reference link [text][]
    EmptyReferenceLink { text: String },
    /// A complete markdown shortcut reference link [ref]
    ShortcutReference { reference: String },
    /// A complete markdown inline image ![alt](url)
    InlineImage { alt: String, url: String },
    /// A complete markdown reference image ![alt][ref]
    ReferenceImage { alt: String, reference: String },
    /// A complete markdown empty reference image ![alt][]
    EmptyReferenceImage { alt: String },
    /// A clickable image badge in any of 4 forms:
    /// - [![alt](img-url)](link-url)
    /// - [![alt][img-ref]](link-url)
    /// - [![alt](img-url)][link-ref]
    /// - [![alt][img-ref]][link-ref]
    LinkedImage {
        alt: String,
        img_source: LinkedImageSource,
        link_target: LinkedImageTarget,
    },
    /// Footnote reference [^note]
    FootnoteReference { note: String },
    /// Strikethrough text ~~text~~
    Strikethrough(String),
    /// Wiki-style link [[wiki]] or [[wiki|text]]
    WikiLink(String),
    /// Inline math $math$
    InlineMath(String),
    /// Display math $$math$$
    DisplayMath(String),
    /// Emoji shortcode :emoji:
    EmojiShortcode(String),
    /// Autolink <https://...> or <mailto:...> or <user@domain.com>
    Autolink(String),
    /// HTML tag <tag> or </tag> or <tag/>
    HtmlTag(String),
    /// HTML entity &nbsp; or &#123;
    HtmlEntity(String),
    /// Hugo/Go template shortcode {{< ... >}} or {{% ... %}}
    HugoShortcode(String),
    /// MkDocs/kramdown attribute list {#id .class key="value"}
    AttrList(String),
    /// Inline code `code`
    Code(String),
    /// Bold text **text** or __text__
    Bold {
        content: String,
        /// True if underscore markers (__), false for asterisks (**)
        underscore: bool,
    },
    /// Italic text *text* or _text_
    Italic {
        content: String,
        /// True if underscore marker (_), false for asterisk (*)
        underscore: bool,
    },
}

impl std::fmt::Display for Element {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Element::Text(s) => write!(f, "{s}"),
            Element::Link { text, url } => write!(f, "[{text}]({url})"),
            Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
            Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
            Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
            Element::InlineImage { alt, url } => write!(f, "![{alt}]({url})"),
            Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
            Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
            Element::LinkedImage {
                alt,
                img_source,
                link_target,
            } => {
                // Build the image part: ![alt](url) or ![alt][ref]
                let img_part = match img_source {
                    LinkedImageSource::Inline(url) => format!("![{alt}]({url})"),
                    LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
                };
                // Build the link part: (url) or [ref]
                match link_target {
                    LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
                    LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
                }
            }
            Element::FootnoteReference { note } => write!(f, "[^{note}]"),
            Element::Strikethrough(s) => write!(f, "~~{s}~~"),
            Element::WikiLink(s) => write!(f, "[[{s}]]"),
            Element::InlineMath(s) => write!(f, "${s}$"),
            Element::DisplayMath(s) => write!(f, "$${s}$$"),
            Element::EmojiShortcode(s) => write!(f, ":{s}:"),
            Element::Autolink(s) => write!(f, "{s}"),
            Element::HtmlTag(s) => write!(f, "{s}"),
            Element::HtmlEntity(s) => write!(f, "{s}"),
            Element::HugoShortcode(s) => write!(f, "{s}"),
            Element::AttrList(s) => write!(f, "{s}"),
            Element::Code(s) => write!(f, "`{s}`"),
            Element::Bold { content, underscore } => {
                if *underscore {
                    write!(f, "__{content}__")
                } else {
                    write!(f, "**{content}**")
                }
            }
            Element::Italic { content, underscore } => {
                if *underscore {
                    write!(f, "_{content}_")
                } else {
                    write!(f, "*{content}*")
                }
            }
        }
    }
}

impl Element {
    /// Calculate the display width of this element using the given length mode.
    /// This formats the element and computes its width, correctly handling
    /// visual width for CJK characters and other wide glyphs.
    fn display_width(&self, mode: ReflowLengthMode) -> usize {
        let formatted = format!("{self}");
        display_len(&formatted, mode)
    }
}

/// An emphasis or formatting span parsed by pulldown-cmark
#[derive(Debug, Clone)]
struct EmphasisSpan {
    /// Byte offset where the emphasis starts (including markers)
    start: usize,
    /// Byte offset where the emphasis ends (after closing markers)
    end: usize,
    /// The content inside the emphasis markers
    content: String,
    /// Whether this is strong (bold) emphasis
    is_strong: bool,
    /// Whether this is strikethrough (~~text~~)
    is_strikethrough: bool,
    /// Whether the original used underscore markers (for emphasis only)
    uses_underscore: bool,
}

/// Extract emphasis and strikethrough spans from text using pulldown-cmark
///
/// This provides CommonMark-compliant emphasis parsing, correctly handling:
/// - Nested emphasis like `*text **bold** more*`
/// - Left/right flanking delimiter rules
/// - Underscore vs asterisk markers
/// - GFM strikethrough (~~text~~)
///
/// Returns spans sorted by start position.
fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
    let mut spans = Vec::new();
    let mut options = Options::empty();
    options.insert(Options::ENABLE_STRIKETHROUGH);

    // Stacks to track nested formatting with their start positions
    let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); // (start_byte, uses_underscore)
    let mut strong_stack: Vec<(usize, bool)> = Vec::new();
    let mut strikethrough_stack: Vec<usize> = Vec::new();

    let parser = Parser::new_ext(text, options).into_offset_iter();

    for (event, range) in parser {
        match event {
            Event::Start(Tag::Emphasis) => {
                // Check if this uses underscore by looking at the original text
                let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
                emphasis_stack.push((range.start, uses_underscore));
            }
            Event::End(TagEnd::Emphasis) => {
                if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
                    // Extract content between the markers (1 char marker on each side)
                    let content_start = start_byte + 1;
                    let content_end = range.end - 1;
                    if content_end > content_start
                        && let Some(content) = text.get(content_start..content_end)
                    {
                        spans.push(EmphasisSpan {
                            start: start_byte,
                            end: range.end,
                            content: content.to_string(),
                            is_strong: false,
                            is_strikethrough: false,
                            uses_underscore,
                        });
                    }
                }
            }
            Event::Start(Tag::Strong) => {
                // Check if this uses underscore by looking at the original text
                let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
                strong_stack.push((range.start, uses_underscore));
            }
            Event::End(TagEnd::Strong) => {
                if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
                    // Extract content between the markers (2 char marker on each side)
                    let content_start = start_byte + 2;
                    let content_end = range.end - 2;
                    if content_end > content_start
                        && let Some(content) = text.get(content_start..content_end)
                    {
                        spans.push(EmphasisSpan {
                            start: start_byte,
                            end: range.end,
                            content: content.to_string(),
                            is_strong: true,
                            is_strikethrough: false,
                            uses_underscore,
                        });
                    }
                }
            }
            Event::Start(Tag::Strikethrough) => {
                strikethrough_stack.push(range.start);
            }
            Event::End(TagEnd::Strikethrough) => {
                if let Some(start_byte) = strikethrough_stack.pop() {
                    // Extract content between the ~~ markers (2 char marker on each side)
                    let content_start = start_byte + 2;
                    let content_end = range.end - 2;
                    if content_end > content_start
                        && let Some(content) = text.get(content_start..content_end)
                    {
                        spans.push(EmphasisSpan {
                            start: start_byte,
                            end: range.end,
                            content: content.to_string(),
                            is_strong: false,
                            is_strikethrough: true,
                            uses_underscore: false,
                        });
                    }
                }
            }
            _ => {}
        }
    }

    // Sort by start position
    spans.sort_by_key(|s| s.start);
    spans
}

/// Parse markdown elements from text preserving the raw syntax
///
/// Detection order is critical:
/// 1. Linked images [![alt](img)](link) - must be detected first as atomic units
/// 2. Inline images ![alt](url) - before links to handle ! prefix
/// 3. Reference images ![alt][ref] - before reference links
/// 4. Inline links [text](url) - before reference links
/// 5. Reference links [text][ref] - before shortcut references
/// 6. Shortcut reference links [ref] - detected last to avoid false positives
/// 7. Other elements (code, bold, italic, etc.) - processed normally
fn parse_markdown_elements(text: &str) -> Vec<Element> {
    parse_markdown_elements_inner(text, false)
}

fn parse_markdown_elements_with_attr_lists(text: &str) -> Vec<Element> {
    parse_markdown_elements_inner(text, true)
}

fn parse_markdown_elements_inner(text: &str, attr_lists: bool) -> Vec<Element> {
    let mut elements = Vec::new();
    let mut remaining = text;

    // Pre-extract emphasis spans using pulldown-cmark for CommonMark-compliant parsing
    let emphasis_spans = extract_emphasis_spans(text);

    while !remaining.is_empty() {
        // Calculate current byte offset in original text
        let current_offset = text.len() - remaining.len();
        // Find the earliest occurrence of any markdown pattern
        // Store (start, end, pattern_name) to unify standard Regex and FancyRegex match results
        let mut earliest_match: Option<(usize, usize, &str)> = None;

        // Check for linked images FIRST (all 4 variants)
        // Quick literal check: only run expensive regexes if we might have a linked image
        // Pattern starts with "[!" so check for that first
        if remaining.contains("[!") {
            // Pattern 1: [![alt](img)](link) - inline image in inline link
            if let Some(m) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
            {
                earliest_match = Some((m.start(), m.end(), "linked_image_ii"));
            }

            // Pattern 2: [![alt][ref]](link) - reference image in inline link
            if let Some(m) = LINKED_IMAGE_REF_INLINE.find(remaining)
                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
            {
                earliest_match = Some((m.start(), m.end(), "linked_image_ri"));
            }

            // Pattern 3: [![alt](img)][ref] - inline image in reference link
            if let Some(m) = LINKED_IMAGE_INLINE_REF.find(remaining)
                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
            {
                earliest_match = Some((m.start(), m.end(), "linked_image_ir"));
            }

            // Pattern 4: [![alt][ref]][ref] - reference image in reference link
            if let Some(m) = LINKED_IMAGE_REF_REF.find(remaining)
                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
            {
                earliest_match = Some((m.start(), m.end(), "linked_image_rr"));
            }
        }

        // Check for images (they start with ! so should be detected before links)
        // Inline images - ![alt](url)
        if let Some(m) = INLINE_IMAGE_REGEX.find(remaining)
            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
        {
            earliest_match = Some((m.start(), m.end(), "inline_image"));
        }

        // Reference images - ![alt][ref]
        if let Some(m) = REF_IMAGE_REGEX.find(remaining)
            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
        {
            earliest_match = Some((m.start(), m.end(), "ref_image"));
        }

        // Check for footnote references - [^note]
        if let Some(m) = FOOTNOTE_REF_REGEX.find(remaining)
            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
        {
            earliest_match = Some((m.start(), m.end(), "footnote_ref"));
        }

        // Check for inline links - [text](url)
        if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
        {
            earliest_match = Some((m.start(), m.end(), "inline_link"));
        }

        // Check for reference links - [text][ref]
        if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
        {
            earliest_match = Some((m.start(), m.end(), "ref_link"));
        }

        // Check for shortcut reference links - [ref]
        // Only check if we haven't found an earlier pattern that would conflict
        if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
        {
            earliest_match = Some((m.start(), m.end(), "shortcut_ref"));
        }

        // Check for wiki-style links - [[wiki]]
        if let Some(m) = WIKI_LINK_REGEX.find(remaining)
            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
        {
            earliest_match = Some((m.start(), m.end(), "wiki_link"));
        }

        // Check for display math first (before inline) - $$math$$
        if let Some(m) = DISPLAY_MATH_REGEX.find(remaining)
            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
        {
            earliest_match = Some((m.start(), m.end(), "display_math"));
        }

        // Check for inline math - $math$
        if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
        {
            earliest_match = Some((m.start(), m.end(), "inline_math"));
        }

        // Note: Strikethrough is now handled by pulldown-cmark in extract_emphasis_spans

        // Check for emoji shortcodes - :emoji:
        if let Some(m) = EMOJI_SHORTCODE_REGEX.find(remaining)
            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
        {
            earliest_match = Some((m.start(), m.end(), "emoji"));
        }

        // Check for HTML entities - &nbsp; etc
        if let Some(m) = HTML_ENTITY_REGEX.find(remaining)
            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
        {
            earliest_match = Some((m.start(), m.end(), "html_entity"));
        }

        // Check for Hugo shortcodes - {{< ... >}} or {{% ... %}}
        // Must be checked before other patterns to avoid false sentence breaks
        if let Some(m) = HUGO_SHORTCODE_REGEX.find(remaining)
            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
        {
            earliest_match = Some((m.start(), m.end(), "hugo_shortcode"));
        }

        // Check for HTML tags - <tag> </tag> <tag/>
        // But exclude autolinks like <https://...> or <mailto:...> or email autolinks <user@domain.com>
        if let Some(m) = HTML_TAG_PATTERN.find(remaining)
            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
        {
            // Check if this is an autolink (starts with protocol or mailto:)
            let matched_text = &remaining[m.start()..m.end()];
            let is_url_autolink = matched_text.starts_with("<http://")
                || matched_text.starts_with("<https://")
                || matched_text.starts_with("<mailto:")
                || matched_text.starts_with("<ftp://")
                || matched_text.starts_with("<ftps://");

            // Check if this is an email autolink (per CommonMark spec: <local@domain.tld>)
            // Use centralized EMAIL_PATTERN for consistency with MD034 and other rules
            let is_email_autolink = {
                let content = matched_text.trim_start_matches('<').trim_end_matches('>');
                EMAIL_PATTERN.is_match(content)
            };

            if is_url_autolink || is_email_autolink {
                earliest_match = Some((m.start(), m.end(), "autolink"));
            } else {
                earliest_match = Some((m.start(), m.end(), "html_tag"));
            }
        }

        // Find earliest non-link special characters
        let mut next_special = remaining.len();
        let mut special_type = "";
        let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
        let mut attr_list_len: usize = 0;

        // Check for code spans (not handled by pulldown-cmark in this context)
        if let Some(pos) = remaining.find('`')
            && pos < next_special
        {
            next_special = pos;
            special_type = "code";
        }

        // Check for MkDocs/kramdown attr lists - {#id .class key="value"}
        if attr_lists
            && let Some(pos) = remaining.find('{')
            && pos < next_special
            && let Some(m) = ATTR_LIST_PATTERN.find(&remaining[pos..])
            && m.start() == 0
        {
            next_special = pos;
            special_type = "attr_list";
            attr_list_len = m.end();
        }

        // Check for emphasis using pulldown-cmark's pre-extracted spans
        // Find the earliest emphasis span that starts within remaining text
        for span in &emphasis_spans {
            if span.start >= current_offset && span.start < current_offset + remaining.len() {
                let pos_in_remaining = span.start - current_offset;
                if pos_in_remaining < next_special {
                    next_special = pos_in_remaining;
                    special_type = "pulldown_emphasis";
                    pulldown_emphasis = Some(span);
                }
                break; // Spans are sorted by start position, so first match is earliest
            }
        }

        // Determine which pattern to process first
        let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
            pos < next_special
        } else {
            false
        };

        if should_process_markdown_link {
            let (pos, match_end, pattern_type) = earliest_match.unwrap();

            // Add any text before the match
            if pos > 0 {
                elements.push(Element::Text(remaining[..pos].to_string()));
            }

            // Process the matched pattern
            match pattern_type {
                // Pattern 1: [![alt](img)](link) - inline image in inline link
                "linked_image_ii" => {
                    if let Some(caps) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
                        let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
                        let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
                        elements.push(Element::LinkedImage {
                            alt: alt.to_string(),
                            img_source: LinkedImageSource::Inline(img_url.to_string()),
                            link_target: LinkedImageTarget::Inline(link_url.to_string()),
                        });
                        remaining = &remaining[match_end..];
                    } else {
                        elements.push(Element::Text("[".to_string()));
                        remaining = &remaining[1..];
                    }
                }
                // Pattern 2: [![alt][ref]](link) - reference image in inline link
                "linked_image_ri" => {
                    if let Some(caps) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
                        let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
                        let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
                        elements.push(Element::LinkedImage {
                            alt: alt.to_string(),
                            img_source: LinkedImageSource::Reference(img_ref.to_string()),
                            link_target: LinkedImageTarget::Inline(link_url.to_string()),
                        });
                        remaining = &remaining[match_end..];
                    } else {
                        elements.push(Element::Text("[".to_string()));
                        remaining = &remaining[1..];
                    }
                }
                // Pattern 3: [![alt](img)][ref] - inline image in reference link
                "linked_image_ir" => {
                    if let Some(caps) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
                        let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
                        let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
                        elements.push(Element::LinkedImage {
                            alt: alt.to_string(),
                            img_source: LinkedImageSource::Inline(img_url.to_string()),
                            link_target: LinkedImageTarget::Reference(link_ref.to_string()),
                        });
                        remaining = &remaining[match_end..];
                    } else {
                        elements.push(Element::Text("[".to_string()));
                        remaining = &remaining[1..];
                    }
                }
                // Pattern 4: [![alt][ref]][ref] - reference image in reference link
                "linked_image_rr" => {
                    if let Some(caps) = LINKED_IMAGE_REF_REF.captures(remaining) {
                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
                        let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
                        let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
                        elements.push(Element::LinkedImage {
                            alt: alt.to_string(),
                            img_source: LinkedImageSource::Reference(img_ref.to_string()),
                            link_target: LinkedImageTarget::Reference(link_ref.to_string()),
                        });
                        remaining = &remaining[match_end..];
                    } else {
                        elements.push(Element::Text("[".to_string()));
                        remaining = &remaining[1..];
                    }
                }
                "inline_image" => {
                    if let Some(caps) = INLINE_IMAGE_REGEX.captures(remaining) {
                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
                        elements.push(Element::InlineImage {
                            alt: alt.to_string(),
                            url: url.to_string(),
                        });
                        remaining = &remaining[match_end..];
                    } else {
                        elements.push(Element::Text("!".to_string()));
                        remaining = &remaining[1..];
                    }
                }
                "ref_image" => {
                    if let Some(caps) = REF_IMAGE_REGEX.captures(remaining) {
                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");

                        if reference.is_empty() {
                            elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
                        } else {
                            elements.push(Element::ReferenceImage {
                                alt: alt.to_string(),
                                reference: reference.to_string(),
                            });
                        }
                        remaining = &remaining[match_end..];
                    } else {
                        elements.push(Element::Text("!".to_string()));
                        remaining = &remaining[1..];
                    }
                }
                "footnote_ref" => {
                    if let Some(caps) = FOOTNOTE_REF_REGEX.captures(remaining) {
                        let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
                        elements.push(Element::FootnoteReference { note: note.to_string() });
                        remaining = &remaining[match_end..];
                    } else {
                        elements.push(Element::Text("[".to_string()));
                        remaining = &remaining[1..];
                    }
                }
                "inline_link" => {
                    if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
                        elements.push(Element::Link {
                            text: text.to_string(),
                            url: url.to_string(),
                        });
                        remaining = &remaining[match_end..];
                    } else {
                        // Fallback - shouldn't happen
                        elements.push(Element::Text("[".to_string()));
                        remaining = &remaining[1..];
                    }
                }
                "ref_link" => {
                    if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");

                        if reference.is_empty() {
                            // Empty reference link [text][]
                            elements.push(Element::EmptyReferenceLink { text: text.to_string() });
                        } else {
                            // Regular reference link [text][ref]
                            elements.push(Element::ReferenceLink {
                                text: text.to_string(),
                                reference: reference.to_string(),
                            });
                        }
                        remaining = &remaining[match_end..];
                    } else {
                        // Fallback - shouldn't happen
                        elements.push(Element::Text("[".to_string()));
                        remaining = &remaining[1..];
                    }
                }
                "shortcut_ref" => {
                    if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
                        let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
                        elements.push(Element::ShortcutReference {
                            reference: reference.to_string(),
                        });
                        remaining = &remaining[match_end..];
                    } else {
                        // Fallback - shouldn't happen
                        elements.push(Element::Text("[".to_string()));
                        remaining = &remaining[1..];
                    }
                }
                "wiki_link" => {
                    if let Some(caps) = WIKI_LINK_REGEX.captures(remaining) {
                        let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
                        elements.push(Element::WikiLink(content.to_string()));
                        remaining = &remaining[match_end..];
                    } else {
                        elements.push(Element::Text("[[".to_string()));
                        remaining = &remaining[2..];
                    }
                }
                "display_math" => {
                    if let Some(caps) = DISPLAY_MATH_REGEX.captures(remaining) {
                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
                        elements.push(Element::DisplayMath(math.to_string()));
                        remaining = &remaining[match_end..];
                    } else {
                        elements.push(Element::Text("$$".to_string()));
                        remaining = &remaining[2..];
                    }
                }
                "inline_math" => {
                    if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
                        elements.push(Element::InlineMath(math.to_string()));
                        remaining = &remaining[match_end..];
                    } else {
                        elements.push(Element::Text("$".to_string()));
                        remaining = &remaining[1..];
                    }
                }
                // Note: "strikethrough" case removed - now handled by pulldown-cmark
                "emoji" => {
                    if let Some(caps) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
                        let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
                        elements.push(Element::EmojiShortcode(emoji.to_string()));
                        remaining = &remaining[match_end..];
                    } else {
                        elements.push(Element::Text(":".to_string()));
                        remaining = &remaining[1..];
                    }
                }
                "html_entity" => {
                    // HTML entities are captured whole
                    elements.push(Element::HtmlEntity(remaining[pos..match_end].to_string()));
                    remaining = &remaining[match_end..];
                }
                "hugo_shortcode" => {
                    // Hugo shortcodes are atomic elements - preserve them exactly
                    elements.push(Element::HugoShortcode(remaining[pos..match_end].to_string()));
                    remaining = &remaining[match_end..];
                }
                "autolink" => {
                    // Autolinks are atomic elements - preserve them exactly
                    elements.push(Element::Autolink(remaining[pos..match_end].to_string()));
                    remaining = &remaining[match_end..];
                }
                "html_tag" => {
                    // HTML tags are captured whole
                    elements.push(Element::HtmlTag(remaining[pos..match_end].to_string()));
                    remaining = &remaining[match_end..];
                }
                _ => {
                    // Unknown pattern, treat as text
                    elements.push(Element::Text("[".to_string()));
                    remaining = &remaining[1..];
                }
            }
        } else {
            // Process non-link special characters

            // Add any text before the special character
            if next_special > 0 && next_special < remaining.len() {
                elements.push(Element::Text(remaining[..next_special].to_string()));
                remaining = &remaining[next_special..];
            }

            // Process the special element
            match special_type {
                "code" => {
                    // Find end of code
                    if let Some(code_end) = remaining[1..].find('`') {
                        let code = &remaining[1..1 + code_end];
                        elements.push(Element::Code(code.to_string()));
                        remaining = &remaining[1 + code_end + 1..];
                    } else {
                        // No closing backtick, treat as text
                        elements.push(Element::Text(remaining.to_string()));
                        break;
                    }
                }
                "attr_list" => {
                    elements.push(Element::AttrList(remaining[..attr_list_len].to_string()));
                    remaining = &remaining[attr_list_len..];
                }
                "pulldown_emphasis" => {
                    // Use pre-extracted emphasis/strikethrough span from pulldown-cmark
                    if let Some(span) = pulldown_emphasis {
                        let span_len = span.end - span.start;
                        if span.is_strikethrough {
                            elements.push(Element::Strikethrough(span.content.clone()));
                        } else if span.is_strong {
                            elements.push(Element::Bold {
                                content: span.content.clone(),
                                underscore: span.uses_underscore,
                            });
                        } else {
                            elements.push(Element::Italic {
                                content: span.content.clone(),
                                underscore: span.uses_underscore,
                            });
                        }
                        remaining = &remaining[span_len..];
                    } else {
                        // Fallback - shouldn't happen
                        elements.push(Element::Text(remaining[..1].to_string()));
                        remaining = &remaining[1..];
                    }
                }
                _ => {
                    // No special elements found, add all remaining text
                    elements.push(Element::Text(remaining.to_string()));
                    break;
                }
            }
        }
    }

    elements
}

/// Reflow elements for sentence-per-line mode
fn reflow_elements_sentence_per_line(
    elements: &[Element],
    custom_abbreviations: &Option<Vec<String>>,
    require_sentence_capital: bool,
) -> Vec<String> {
    let abbreviations = get_abbreviations(custom_abbreviations);
    let mut lines = Vec::new();
    let mut current_line = String::new();

    for (idx, element) in elements.iter().enumerate() {
        let element_str = format!("{element}");

        // For text elements, split into sentences
        if let Element::Text(text) = element {
            // Simply append text - it already has correct spacing from tokenization
            let combined = format!("{current_line}{text}");
            // Use the pre-computed abbreviations set to avoid redundant computation
            let sentences = split_into_sentences_with_set(&combined, &abbreviations, require_sentence_capital);

            if sentences.len() > 1 {
                // We found sentence boundaries
                for (i, sentence) in sentences.iter().enumerate() {
                    if i == 0 {
                        // First sentence might continue from previous elements
                        // But check if it ends with an abbreviation
                        let trimmed = sentence.trim();

                        if text_ends_with_abbreviation(trimmed, &abbreviations) {
                            // Don't emit yet - this sentence ends with abbreviation, continue accumulating
                            current_line = sentence.to_string();
                        } else {
                            // Normal case - emit the first sentence
                            lines.push(sentence.to_string());
                            current_line.clear();
                        }
                    } else if i == sentences.len() - 1 {
                        // Last sentence: check if it's complete or incomplete
                        let trimmed = sentence.trim();
                        let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);

                        if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
                            // Complete sentence - emit it immediately
                            lines.push(sentence.to_string());
                            current_line.clear();
                        } else {
                            // Incomplete sentence - save for next iteration
                            current_line = sentence.to_string();
                        }
                    } else {
                        // Complete sentences in the middle
                        lines.push(sentence.to_string());
                    }
                }
            } else {
                // Single sentence - check if it's complete
                let trimmed = combined.trim();

                // If the combined result is only whitespace, don't accumulate it.
                // This prevents leading spaces on subsequent elements when lines
                // are joined with spaces during reflow iteration.
                if trimmed.is_empty() {
                    continue;
                }

                let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);

                if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
                    // Complete single sentence - emit it
                    lines.push(trimmed.to_string());
                    current_line.clear();
                } else {
                    // Incomplete sentence - continue accumulating
                    current_line = combined;
                }
            }
        } else if let Element::Italic { content, underscore } = element {
            // Handle italic elements - may contain multiple sentences that need continuation
            let marker = if *underscore { "_" } else { "*" };
            handle_emphasis_sentence_split(
                content,
                marker,
                &abbreviations,
                require_sentence_capital,
                &mut current_line,
                &mut lines,
            );
        } else if let Element::Bold { content, underscore } = element {
            // Handle bold elements - may contain multiple sentences that need continuation
            let marker = if *underscore { "__" } else { "**" };
            handle_emphasis_sentence_split(
                content,
                marker,
                &abbreviations,
                require_sentence_capital,
                &mut current_line,
                &mut lines,
            );
        } else if let Element::Strikethrough(content) = element {
            // Handle strikethrough elements - may contain multiple sentences that need continuation
            handle_emphasis_sentence_split(
                content,
                "~~",
                &abbreviations,
                require_sentence_capital,
                &mut current_line,
                &mut lines,
            );
        } else {
            // Non-text, non-emphasis elements (Code, Links, etc.)
            // Check if this element is adjacent to the preceding text (no space between)
            let is_adjacent = if idx > 0 {
                match &elements[idx - 1] {
                    Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
                    _ => true,
                }
            } else {
                false
            };

            // Add space before element if needed, but not for adjacent elements
            if !is_adjacent
                && !current_line.is_empty()
                && !current_line.ends_with(' ')
                && !current_line.ends_with('(')
                && !current_line.ends_with('[')
            {
                current_line.push(' ');
            }
            current_line.push_str(&element_str);
        }
    }

    // Add any remaining content
    if !current_line.is_empty() {
        lines.push(current_line.trim().to_string());
    }
    lines
}

/// Handle splitting emphasis content at sentence boundaries while preserving markers
fn handle_emphasis_sentence_split(
    content: &str,
    marker: &str,
    abbreviations: &HashSet<String>,
    require_sentence_capital: bool,
    current_line: &mut String,
    lines: &mut Vec<String>,
) {
    // Split the emphasis content into sentences
    let sentences = split_into_sentences_with_set(content, abbreviations, require_sentence_capital);

    if sentences.len() <= 1 {
        // Single sentence or no boundaries - treat as atomic
        if !current_line.is_empty()
            && !current_line.ends_with(' ')
            && !current_line.ends_with('(')
            && !current_line.ends_with('[')
        {
            current_line.push(' ');
        }
        current_line.push_str(marker);
        current_line.push_str(content);
        current_line.push_str(marker);

        // Check if the emphasis content ends with sentence punctuation - if so, emit
        let trimmed = content.trim();
        let ends_with_punct = ends_with_sentence_punct(trimmed);
        if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
            lines.push(current_line.clone());
            current_line.clear();
        }
    } else {
        // Multiple sentences - each gets its own emphasis markers
        for (i, sentence) in sentences.iter().enumerate() {
            let trimmed = sentence.trim();
            if trimmed.is_empty() {
                continue;
            }

            if i == 0 {
                // First sentence: combine with current_line and emit
                if !current_line.is_empty()
                    && !current_line.ends_with(' ')
                    && !current_line.ends_with('(')
                    && !current_line.ends_with('[')
                {
                    current_line.push(' ');
                }
                current_line.push_str(marker);
                current_line.push_str(trimmed);
                current_line.push_str(marker);

                // Check if this is a complete sentence
                let ends_with_punct = ends_with_sentence_punct(trimmed);
                if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
                    lines.push(current_line.clone());
                    current_line.clear();
                }
            } else if i == sentences.len() - 1 {
                // Last sentence: check if complete
                let ends_with_punct = ends_with_sentence_punct(trimmed);

                let mut line = String::new();
                line.push_str(marker);
                line.push_str(trimmed);
                line.push_str(marker);

                if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
                    lines.push(line);
                } else {
                    // Incomplete - keep in current_line for potential continuation
                    *current_line = line;
                }
            } else {
                // Middle sentences: emit with markers
                let mut line = String::new();
                line.push_str(marker);
                line.push_str(trimmed);
                line.push_str(marker);
                lines.push(line);
            }
        }
    }
}

/// English break-words used for semantic line break splitting.
/// These are conjunctions and relative pronouns where a line break
/// reads naturally.
const BREAK_WORDS: &[&str] = &[
    "and",
    "or",
    "but",
    "nor",
    "yet",
    "so",
    "for",
    "which",
    "that",
    "because",
    "when",
    "if",
    "while",
    "where",
    "although",
    "though",
    "unless",
    "since",
    "after",
    "before",
    "until",
    "as",
    "once",
    "whether",
    "however",
    "therefore",
    "moreover",
    "furthermore",
    "nevertheless",
    "whereas",
];

/// Check if a character is clause punctuation for semantic line breaks
fn is_clause_punctuation(c: char) -> bool {
    matches!(c, ',' | ';' | ':' | '\u{2014}') // comma, semicolon, colon, em dash
}

/// Compute element spans for a flat text representation of elements.
/// Returns Vec of (start, end) byte offsets for non-Text elements,
/// so we can check that a split position doesn't fall inside them.
fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
    let mut spans = Vec::new();
    let mut offset = 0;
    for element in elements {
        let rendered = format!("{element}");
        let len = rendered.len();
        if !matches!(element, Element::Text(_)) {
            spans.push((offset, offset + len));
        }
        offset += len;
    }
    spans
}

/// Check if a byte position falls inside any non-Text element span
fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
    spans.iter().any(|(start, end)| pos > *start && pos < *end)
}

/// Minimum fraction of line_length that the first part of a split must occupy.
/// Prevents awkwardly short first lines like "A," or "Note:" on their own.
const MIN_SPLIT_RATIO: f64 = 0.3;

/// Split a line at the latest clause punctuation that keeps the first part
/// within `line_length`. Returns None if no valid split point exists or if
/// the split would create an unreasonably short first line.
fn split_at_clause_punctuation(
    text: &str,
    line_length: usize,
    element_spans: &[(usize, usize)],
    length_mode: ReflowLengthMode,
) -> Option<(String, String)> {
    let chars: Vec<char> = text.chars().collect();
    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;

    // Find the char index where accumulated display width exceeds line_length
    let mut width_acc = 0;
    let mut search_end_char = 0;
    for (idx, &c) in chars.iter().enumerate() {
        let c_width = display_len(&c.to_string(), length_mode);
        if width_acc + c_width > line_length {
            break;
        }
        width_acc += c_width;
        search_end_char = idx + 1;
    }

    let mut best_pos = None;
    for i in (0..search_end_char).rev() {
        if is_clause_punctuation(chars[i]) {
            // Convert char position to byte position for element span check
            let byte_pos: usize = chars[..=i].iter().map(|c| c.len_utf8()).sum();
            if !is_inside_element(byte_pos, element_spans) {
                best_pos = Some(i);
                break;
            }
        }
    }

    let pos = best_pos?;

    // Reject splits that create very short first lines
    let first: String = chars[..=pos].iter().collect();
    let first_display_len = display_len(&first, length_mode);
    if first_display_len < min_first_len {
        return None;
    }

    // Split after the punctuation character
    let rest: String = chars[pos + 1..].iter().collect();
    let rest = rest.trim_start().to_string();

    if rest.is_empty() {
        return None;
    }

    Some((first, rest))
}

/// Split a line before the latest break-word that keeps the first part
/// within `line_length`. Returns None if no valid split point exists or if
/// the split would create an unreasonably short first line.
fn split_at_break_word(
    text: &str,
    line_length: usize,
    element_spans: &[(usize, usize)],
    length_mode: ReflowLengthMode,
) -> Option<(String, String)> {
    let lower = text.to_lowercase();
    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
    let mut best_split: Option<(usize, usize)> = None; // (byte_start, word_len_bytes)

    for &word in BREAK_WORDS {
        let mut search_start = 0;
        while let Some(pos) = lower[search_start..].find(word) {
            let abs_pos = search_start + pos;

            // Verify it's a word boundary: preceded by space, followed by space
            let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
            let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');

            if preceded_by_space && followed_by_space {
                // The break goes BEFORE the word, so first part ends at abs_pos - 1
                let first_part = text[..abs_pos].trim_end();
                let first_part_len = display_len(first_part, length_mode);

                if first_part_len >= min_first_len
                    && first_part_len <= line_length
                    && !is_inside_element(abs_pos, element_spans)
                {
                    // Prefer the latest valid split point
                    if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
                        best_split = Some((abs_pos, word.len()));
                    }
                }
            }

            search_start = abs_pos + word.len();
        }
    }

    let (byte_start, _word_len) = best_split?;

    let first = text[..byte_start].trim_end().to_string();
    let rest = text[byte_start..].to_string();

    if first.is_empty() || rest.trim().is_empty() {
        return None;
    }

    Some((first, rest))
}

/// Recursively cascade-split a line that exceeds line_length.
/// Tries clause punctuation first, then break-words, then word wrap.
fn cascade_split_line(
    text: &str,
    line_length: usize,
    abbreviations: &Option<Vec<String>>,
    length_mode: ReflowLengthMode,
    attr_lists: bool,
) -> Vec<String> {
    if line_length == 0 || display_len(text, length_mode) <= line_length {
        return vec![text.to_string()];
    }

    let elements = parse_markdown_elements_inner(text, attr_lists);
    let element_spans = compute_element_spans(&elements);

    // Try clause punctuation split
    if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
        let mut result = vec![first];
        result.extend(cascade_split_line(
            &rest,
            line_length,
            abbreviations,
            length_mode,
            attr_lists,
        ));
        return result;
    }

    // Try break-word split
    if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
        let mut result = vec![first];
        result.extend(cascade_split_line(
            &rest,
            line_length,
            abbreviations,
            length_mode,
            attr_lists,
        ));
        return result;
    }

    // Fallback: word wrap using existing reflow_elements
    let options = ReflowOptions {
        line_length,
        break_on_sentences: false,
        preserve_breaks: false,
        sentence_per_line: false,
        semantic_line_breaks: false,
        abbreviations: abbreviations.clone(),
        length_mode,
        attr_lists,
        require_sentence_capital: true,
        max_list_continuation_indent: None,
    };
    reflow_elements(&elements, &options)
}

/// Reflow elements using semantic line breaks strategy:
/// 1. Split at sentence boundaries (always)
/// 2. For lines exceeding line_length, cascade through clause punct → break-words → word wrap
fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
    // Step 1: Split into sentences using existing sentence-per-line logic
    let sentence_lines =
        reflow_elements_sentence_per_line(elements, &options.abbreviations, options.require_sentence_capital);

    // Step 2: For each sentence line, apply cascading splits if it exceeds line_length
    // When line_length is 0 (unlimited), skip cascading — sentence splits only
    if options.line_length == 0 {
        return sentence_lines;
    }

    let length_mode = options.length_mode;
    let mut result = Vec::new();
    for line in sentence_lines {
        if display_len(&line, length_mode) <= options.line_length {
            result.push(line);
        } else {
            result.extend(cascade_split_line(
                &line,
                options.line_length,
                &options.abbreviations,
                length_mode,
                options.attr_lists,
            ));
        }
    }

    // Step 3: Merge very short trailing lines back into the previous line.
    // Word wrap can produce lines like "was" or "see" on their own, which reads poorly.
    let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
    let mut merged: Vec<String> = Vec::with_capacity(result.len());
    for line in result {
        if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
            // Don't merge across sentence boundaries — sentence splits are intentional
            let prev_ends_at_sentence = {
                let trimmed = merged.last().unwrap().trim_end();
                trimmed
                    .chars()
                    .rev()
                    .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
                    .is_some_and(|c| matches!(c, '.' | '!' | '?'))
            };

            if !prev_ends_at_sentence {
                let prev = merged.last_mut().unwrap();
                let combined = format!("{prev} {line}");
                // Only merge if the combined line fits within the limit
                if display_len(&combined, length_mode) <= options.line_length {
                    *prev = combined;
                    continue;
                }
            }
        }
        merged.push(line);
    }
    merged
}

/// Find the last space in `line` that is safe to split at.
/// Safe spaces are those NOT inside rendered non-Text elements.
/// `element_spans` contains (start, end) byte ranges of non-Text elements in the line.
/// Find the last space in `line` that is not inside any element span.
/// Spans use exclusive bounds (pos > start && pos < end) because element
/// delimiters (e.g., `[`, `]`, `(`, `)`, `<`, `>`, `` ` ``) are never
/// spaces, so only interior positions need protection.
fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
    line.char_indices()
        .rev()
        .map(|(pos, _)| pos)
        .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
}

/// Reflow elements into lines that fit within the line length
fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
    let mut lines = Vec::new();
    let mut current_line = String::new();
    let mut current_length = 0;
    // Track byte spans of non-Text elements in current_line for safe splitting
    let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
    let length_mode = options.length_mode;

    for (idx, element) in elements.iter().enumerate() {
        let element_str = format!("{element}");
        let element_len = element.display_width(length_mode);

        // Determine adjacency from the original elements, not from current_line.
        // Elements are adjacent when there's no whitespace between them in the source:
        // - Text("v") → HugoShortcode("{{<...>}}") = adjacent (text has no trailing space)
        // - Text(" and ") → InlineLink("[a](url)") = NOT adjacent (text has trailing space)
        // - HugoShortcode("{{<...>}}") → Text(",") = adjacent (text has no leading space)
        let is_adjacent_to_prev = if idx > 0 {
            match (&elements[idx - 1], element) {
                (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
                (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
                _ => true,
            }
        } else {
            false
        };

        // For text elements that might need breaking
        if let Element::Text(text) = element {
            // Check if original text had leading whitespace
            let has_leading_space = text.starts_with(char::is_whitespace);
            // If this is a text element, always process it word by word
            let words: Vec<&str> = text.split_whitespace().collect();

            for (i, word) in words.iter().enumerate() {
                let word_len = display_len(word, length_mode);
                // Check if this "word" is just punctuation that should stay attached
                let is_trailing_punct = word
                    .chars()
                    .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));

                // First word of text adjacent to preceding non-text element
                // must stay attached (e.g., shortcode followed by punctuation or text)
                let is_first_adjacent = i == 0 && is_adjacent_to_prev;

                if is_first_adjacent {
                    // Attach directly without space, preventing line break
                    if current_length + word_len > options.line_length && current_length > 0 {
                        // Would exceed — break before the adjacent group
                        // Use element-aware space search to avoid splitting inside links/code/etc.
                        if let Some(last_space) = rfind_safe_space(&current_line, &current_line_element_spans) {
                            let before = current_line[..last_space].trim_end().to_string();
                            let after = current_line[last_space + 1..].to_string();
                            lines.push(before);
                            current_line = format!("{after}{word}");
                            current_length = display_len(&current_line, length_mode);
                            current_line_element_spans.clear();
                        } else {
                            current_line.push_str(word);
                            current_length += word_len;
                        }
                    } else {
                        current_line.push_str(word);
                        current_length += word_len;
                    }
                } else if current_length > 0
                    && current_length + 1 + word_len > options.line_length
                    && !is_trailing_punct
                {
                    // Start a new line (but never for trailing punctuation)
                    lines.push(current_line.trim().to_string());
                    current_line = word.to_string();
                    current_length = word_len;
                    current_line_element_spans.clear();
                } else {
                    // Add word to current line
                    // Only add space if: we have content AND (this isn't the first word OR original had leading space)
                    // AND this isn't trailing punctuation (which attaches directly)
                    if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
                        current_line.push(' ');
                        current_length += 1;
                    }
                    current_line.push_str(word);
                    current_length += word_len;
                }
            }
        } else if matches!(
            element,
            Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
        ) && element_len > options.line_length
        {
            // Italic, bold, and strikethrough with content longer than line_length need word wrapping.
            // Split content word-by-word, attach the opening marker to the first word
            // and the closing marker to the last word.
            let (content, marker): (&str, &str) = match element {
                Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
                Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
                Element::Strikethrough(content) => (content.as_str(), "~~"),
                _ => unreachable!(),
            };

            let words: Vec<&str> = content.split_whitespace().collect();
            let n = words.len();

            if n == 0 {
                // Empty span — treat as atomic
                let full = format!("{marker}{marker}");
                let full_len = display_len(&full, length_mode);
                if !is_adjacent_to_prev && current_length > 0 {
                    current_line.push(' ');
                    current_length += 1;
                }
                current_line.push_str(&full);
                current_length += full_len;
            } else {
                for (i, word) in words.iter().enumerate() {
                    let is_first = i == 0;
                    let is_last = i == n - 1;
                    let word_str: String = match (is_first, is_last) {
                        (true, true) => format!("{marker}{word}{marker}"),
                        (true, false) => format!("{marker}{word}"),
                        (false, true) => format!("{word}{marker}"),
                        (false, false) => word.to_string(),
                    };
                    let word_len = display_len(&word_str, length_mode);

                    let needs_space = if is_first {
                        !is_adjacent_to_prev && current_length > 0
                    } else {
                        current_length > 0
                    };

                    if needs_space && current_length + 1 + word_len > options.line_length {
                        lines.push(current_line.trim_end().to_string());
                        current_line = word_str;
                        current_length = word_len;
                        current_line_element_spans.clear();
                    } else {
                        if needs_space {
                            current_line.push(' ');
                            current_length += 1;
                        }
                        current_line.push_str(&word_str);
                        current_length += word_len;
                    }
                }
            }
        } else {
            // For non-text elements (code, links, references), treat as atomic units
            // These should never be broken across lines

            if is_adjacent_to_prev {
                // Adjacent to preceding text — attach directly without space
                if current_length + element_len > options.line_length {
                    // Would exceed limit — break before the adjacent word group
                    // Use element-aware space search to avoid splitting inside links/code/etc.
                    if let Some(last_space) = rfind_safe_space(&current_line, &current_line_element_spans) {
                        let before = current_line[..last_space].trim_end().to_string();
                        let after = current_line[last_space + 1..].to_string();
                        lines.push(before);
                        current_line = format!("{after}{element_str}");
                        current_length = display_len(&current_line, length_mode);
                        current_line_element_spans.clear();
                        // Record the element span in the new current_line
                        let start = after.len();
                        current_line_element_spans.push((start, start + element_str.len()));
                    } else {
                        // No safe space to break at — accept the long line
                        let start = current_line.len();
                        current_line.push_str(&element_str);
                        current_length += element_len;
                        current_line_element_spans.push((start, current_line.len()));
                    }
                } else {
                    let start = current_line.len();
                    current_line.push_str(&element_str);
                    current_length += element_len;
                    current_line_element_spans.push((start, current_line.len()));
                }
            } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
                // Not adjacent, would exceed — start new line
                lines.push(current_line.trim().to_string());
                current_line = element_str.clone();
                current_length = element_len;
                current_line_element_spans.clear();
                current_line_element_spans.push((0, element_str.len()));
            } else {
                // Not adjacent, fits — add with space
                let ends_with_opener =
                    current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
                if current_length > 0 && !ends_with_opener {
                    current_line.push(' ');
                    current_length += 1;
                }
                let start = current_line.len();
                current_line.push_str(&element_str);
                current_length += element_len;
                current_line_element_spans.push((start, current_line.len()));
            }
        }
    }

    // Don't forget the last line
    if !current_line.is_empty() {
        lines.push(current_line.trim_end().to_string());
    }

    lines
}

/// Reflow markdown content preserving structure
pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
    let lines: Vec<&str> = content.lines().collect();
    let mut result = Vec::new();
    let mut i = 0;

    while i < lines.len() {
        let line = lines[i];
        let trimmed = line.trim();

        // Preserve empty lines
        if trimmed.is_empty() {
            result.push(String::new());
            i += 1;
            continue;
        }

        // Preserve headings as-is
        if trimmed.starts_with('#') {
            result.push(line.to_string());
            i += 1;
            continue;
        }

        // Preserve Quarto/Pandoc div markers (:::) as-is
        if trimmed.starts_with(":::") {
            result.push(line.to_string());
            i += 1;
            continue;
        }

        // Preserve fenced code blocks
        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
            result.push(line.to_string());
            i += 1;
            // Copy lines until closing fence
            while i < lines.len() {
                result.push(lines[i].to_string());
                if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
                    i += 1;
                    break;
                }
                i += 1;
            }
            continue;
        }

        // Preserve indented code blocks (4+ columns accounting for tab expansion)
        if calculate_indentation_width_default(line) >= 4 {
            // Collect all consecutive indented lines
            result.push(line.to_string());
            i += 1;
            while i < lines.len() {
                let next_line = lines[i];
                // Continue if next line is also indented or empty (empty lines in code blocks are ok)
                if calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
                    result.push(next_line.to_string());
                    i += 1;
                } else {
                    break;
                }
            }
            continue;
        }

        // Preserve block quotes (but reflow their content)
        if trimmed.starts_with('>') {
            // find() returns byte position which is correct for str slicing
            // The unwrap is safe because we already verified trimmed starts with '>'
            let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
            let quote_prefix = line[0..gt_pos + 1].to_string();
            let quote_content = &line[quote_prefix.len()..].trim_start();

            let reflowed = reflow_line(quote_content, options);
            for reflowed_line in reflowed.iter() {
                result.push(format!("{quote_prefix} {reflowed_line}"));
            }
            i += 1;
            continue;
        }

        // Preserve horizontal rules first (before checking for lists)
        if is_horizontal_rule(trimmed) {
            result.push(line.to_string());
            i += 1;
            continue;
        }

        // Preserve lists (but not horizontal rules)
        if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
            // Find the list marker and preserve indentation
            let indent = line.len() - line.trim_start().len();
            let indent_str = " ".repeat(indent);

            // For numbered lists, find the period and the space after it
            // For bullet lists, find the marker and the space after it
            let mut marker_end = indent;
            let mut content_start = indent;

            if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
                // Numbered list: find the period
                if let Some(period_pos) = line[indent..].find('.') {
                    marker_end = indent + period_pos + 1; // Include the period
                    content_start = marker_end;
                    // Skip any spaces after the period to find content start
                    // Use byte-based check since content_start is a byte index
                    // This is safe because space is ASCII (single byte)
                    while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
                        content_start += 1;
                    }
                }
            } else {
                // Bullet list: marker is single character
                marker_end = indent + 1; // Just the marker character
                content_start = marker_end;
                // Skip any spaces after the marker
                // Use byte-based check since content_start is a byte index
                // This is safe because space is ASCII (single byte)
                while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
                    content_start += 1;
                }
            }

            // Minimum indent for continuation lines (based on list marker, before checkbox)
            let min_continuation_indent = content_start;

            // Detect checkbox/task list markers: [ ], [x], [X]
            // GFM task lists work with both unordered and ordered lists
            let rest = &line[content_start..];
            if rest.starts_with("[ ] ") || rest.starts_with("[x] ") || rest.starts_with("[X] ") {
                marker_end = content_start + 3; // Include the checkbox `[ ]`
                content_start += 4; // Skip past `[ ] `
            }

            let marker = &line[indent..marker_end];

            // Collect all content for this list item (including continuation lines)
            // Preserve hard breaks (2 trailing spaces) while trimming excessive whitespace
            let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
            i += 1;

            // Collect continuation lines (indented lines that are part of this list item)
            // Use the base marker indent (not checkbox-extended) for collection,
            // since users may indent continuations to the bullet level, not the checkbox level
            while i < lines.len() {
                let next_line = lines[i];
                let next_trimmed = next_line.trim();

                // Stop if we hit an empty line or another list item or special block
                if is_block_boundary(next_trimmed) {
                    break;
                }

                // Check if this line is indented (continuation of list item)
                let next_indent = next_line.len() - next_line.trim_start().len();
                if next_indent >= min_continuation_indent {
                    // This is a continuation line - add its content
                    // Preserve hard breaks while trimming excessive whitespace
                    let trimmed_start = next_line.trim_start();
                    list_content.push(trim_preserving_hard_break(trimmed_start));
                    i += 1;
                } else {
                    // Not indented enough, not part of this list item
                    break;
                }
            }

            // Join content, but respect hard breaks (lines ending with 2 spaces or backslash)
            // Hard breaks should prevent joining with the next line
            let combined_content = if options.preserve_breaks {
                list_content[0].clone()
            } else {
                // Check if any lines have hard breaks - if so, preserve the structure
                let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
                if has_hard_breaks {
                    // Don't join lines with hard breaks - keep them separate with newlines
                    list_content.join("\n")
                } else {
                    // No hard breaks, safe to join with spaces
                    list_content.join(" ")
                }
            };

            // Calculate the proper indentation for continuation lines
            let trimmed_marker = marker;
            let continuation_spaces = if let Some(max_indent) = options.max_list_continuation_indent {
                // Cap the relative indent (past the nesting level) to max_indent,
                // then add back the nesting indent so nested items stay correct
                indent + (content_start - indent).min(max_indent)
            } else {
                content_start
            };

            // Adjust line length to account for list marker and space
            let prefix_length = indent + trimmed_marker.len() + 1;

            // Create adjusted options with reduced line length
            let adjusted_options = ReflowOptions {
                line_length: options.line_length.saturating_sub(prefix_length),
                ..options.clone()
            };

            let reflowed = reflow_line(&combined_content, &adjusted_options);
            for (j, reflowed_line) in reflowed.iter().enumerate() {
                if j == 0 {
                    result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
                } else {
                    // Continuation lines aligned with text after marker
                    let continuation_indent = " ".repeat(continuation_spaces);
                    result.push(format!("{continuation_indent}{reflowed_line}"));
                }
            }
            continue;
        }

        // Preserve tables
        if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
            result.push(line.to_string());
            i += 1;
            continue;
        }

        // Preserve reference definitions
        if trimmed.starts_with('[') && line.contains("]:") {
            result.push(line.to_string());
            i += 1;
            continue;
        }

        // Preserve definition list items (extended markdown)
        if is_definition_list_item(trimmed) {
            result.push(line.to_string());
            i += 1;
            continue;
        }

        // Check if this is a single line that doesn't need processing
        let mut is_single_line_paragraph = true;
        if i + 1 < lines.len() {
            let next_trimmed = lines[i + 1].trim();
            // Check if next line continues this paragraph
            if !is_block_boundary(next_trimmed) {
                is_single_line_paragraph = false;
            }
        }

        // If it's a single line that fits, just add it as-is
        if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
            result.push(line.to_string());
            i += 1;
            continue;
        }

        // For regular paragraphs, collect consecutive lines
        let mut paragraph_parts = Vec::new();
        let mut current_part = vec![line];
        i += 1;

        // If preserve_breaks is true, treat each line separately
        if options.preserve_breaks {
            // Don't collect consecutive lines - just reflow this single line
            let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
                Some("\\")
            } else if line.ends_with("  ") {
                Some("  ")
            } else {
                None
            };
            let reflowed = reflow_line(line, options);

            // Preserve hard breaks (two trailing spaces or backslash)
            if let Some(break_marker) = hard_break_type {
                if !reflowed.is_empty() {
                    let mut reflowed_with_break = reflowed;
                    let last_idx = reflowed_with_break.len() - 1;
                    if !has_hard_break(&reflowed_with_break[last_idx]) {
                        reflowed_with_break[last_idx].push_str(break_marker);
                    }
                    result.extend(reflowed_with_break);
                }
            } else {
                result.extend(reflowed);
            }
        } else {
            // Original behavior: collect consecutive lines into a paragraph
            while i < lines.len() {
                let prev_line = if !current_part.is_empty() {
                    current_part.last().unwrap()
                } else {
                    ""
                };
                let next_line = lines[i];
                let next_trimmed = next_line.trim();

                // Stop at empty lines or special blocks
                if is_block_boundary(next_trimmed) {
                    break;
                }

                // Check if previous line ends with hard break (two spaces or backslash)
                // or is a complete sentence in sentence_per_line mode
                let prev_trimmed = prev_line.trim();
                let abbreviations = get_abbreviations(&options.abbreviations);
                let ends_with_sentence = (prev_trimmed.ends_with('.')
                    || prev_trimmed.ends_with('!')
                    || prev_trimmed.ends_with('?')
                    || prev_trimmed.ends_with(".*")
                    || prev_trimmed.ends_with("!*")
                    || prev_trimmed.ends_with("?*")
                    || prev_trimmed.ends_with("._")
                    || prev_trimmed.ends_with("!_")
                    || prev_trimmed.ends_with("?_")
                    // Quote-terminated sentences (straight and curly quotes)
                    || prev_trimmed.ends_with(".\"")
                    || prev_trimmed.ends_with("!\"")
                    || prev_trimmed.ends_with("?\"")
                    || prev_trimmed.ends_with(".'")
                    || prev_trimmed.ends_with("!'")
                    || prev_trimmed.ends_with("?'")
                    || prev_trimmed.ends_with(".\u{201D}")
                    || prev_trimmed.ends_with("!\u{201D}")
                    || prev_trimmed.ends_with("?\u{201D}")
                    || prev_trimmed.ends_with(".\u{2019}")
                    || prev_trimmed.ends_with("!\u{2019}")
                    || prev_trimmed.ends_with("?\u{2019}"))
                    && !text_ends_with_abbreviation(
                        prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
                        &abbreviations,
                    );

                if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
                    // Start a new part after hard break or complete sentence
                    paragraph_parts.push(current_part.join(" "));
                    current_part = vec![next_line];
                } else {
                    current_part.push(next_line);
                }
                i += 1;
            }

            // Add the last part
            if !current_part.is_empty() {
                if current_part.len() == 1 {
                    // Single line, don't add trailing space
                    paragraph_parts.push(current_part[0].to_string());
                } else {
                    paragraph_parts.push(current_part.join(" "));
                }
            }

            // Reflow each part separately, preserving hard breaks
            for (j, part) in paragraph_parts.iter().enumerate() {
                let reflowed = reflow_line(part, options);
                result.extend(reflowed);

                // Preserve hard break by ensuring last line of part ends with hard break marker
                // Use two spaces as the default hard break format for reflows
                // But don't add hard breaks in sentence_per_line mode - lines are already separate
                if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
                    let last_idx = result.len() - 1;
                    if !has_hard_break(&result[last_idx]) {
                        result[last_idx].push_str("  ");
                    }
                }
            }
        }
    }

    // Preserve trailing newline if the original content had one
    let result_text = result.join("\n");
    if content.ends_with('\n') && !result_text.ends_with('\n') {
        format!("{result_text}\n")
    } else {
        result_text
    }
}

/// Information about a reflowed paragraph
#[derive(Debug, Clone)]
pub struct ParagraphReflow {
    /// Starting byte offset of the paragraph in the original content
    pub start_byte: usize,
    /// Ending byte offset of the paragraph in the original content
    pub end_byte: usize,
    /// The reflowed text for this paragraph
    pub reflowed_text: String,
}

/// A collected blockquote line used for style-preserving reflow.
///
/// The invariant `is_explicit == true` iff `prefix.is_some()` is enforced by the
/// constructors. Use [`BlockquoteLineData::explicit`] or [`BlockquoteLineData::lazy`]
/// rather than constructing the struct directly.
#[derive(Debug, Clone)]
pub struct BlockquoteLineData {
    /// Trimmed content without the `> ` prefix.
    pub(crate) content: String,
    /// Whether this line carries an explicit blockquote marker.
    pub(crate) is_explicit: bool,
    /// Full blockquote prefix (e.g. `"> "`, `"> > "`). `None` for lazy continuation lines.
    pub(crate) prefix: Option<String>,
}

impl BlockquoteLineData {
    /// Create an explicit (marker-bearing) blockquote line.
    pub fn explicit(content: String, prefix: String) -> Self {
        Self {
            content,
            is_explicit: true,
            prefix: Some(prefix),
        }
    }

    /// Create a lazy continuation line (no blockquote marker).
    pub fn lazy(content: String) -> Self {
        Self {
            content,
            is_explicit: false,
            prefix: None,
        }
    }
}

/// Style for blockquote continuation lines after reflow.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BlockquoteContinuationStyle {
    Explicit,
    Lazy,
}

/// Determine the continuation style for a blockquote paragraph from its collected lines.
///
/// The first line is always explicit (it carries the marker), so only continuation
/// lines (index 1+) are counted. Ties resolve to `Explicit`.
///
/// When the slice has only one element (no continuation lines to inspect), both
/// counts are zero and the tie-breaking rule returns `Explicit`.
pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
    let mut explicit_count = 0usize;
    let mut lazy_count = 0usize;

    for line in lines.iter().skip(1) {
        if line.is_explicit {
            explicit_count += 1;
        } else {
            lazy_count += 1;
        }
    }

    if explicit_count > 0 && lazy_count == 0 {
        BlockquoteContinuationStyle::Explicit
    } else if lazy_count > 0 && explicit_count == 0 {
        BlockquoteContinuationStyle::Lazy
    } else if explicit_count >= lazy_count {
        BlockquoteContinuationStyle::Explicit
    } else {
        BlockquoteContinuationStyle::Lazy
    }
}

/// Determine the dominant blockquote prefix for a paragraph.
///
/// The most frequently occurring explicit prefix wins. Ties are broken by earliest
/// first appearance. Falls back to `fallback` when no explicit lines are present.
pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
    let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();

    for (idx, line) in lines.iter().enumerate() {
        let Some(prefix) = line.prefix.as_ref() else {
            continue;
        };
        counts
            .entry(prefix.clone())
            .and_modify(|entry| entry.0 += 1)
            .or_insert((1, idx));
    }

    counts
        .into_iter()
        .max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
            count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
        })
        .map(|(prefix, _)| prefix)
        .unwrap_or_else(|| fallback.to_string())
}

/// Whether a reflowed blockquote content line must carry an explicit prefix.
///
/// Lines that would start a new block structure (headings, fences, lists, etc.)
/// cannot safely use lazy continuation syntax.
pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
    let trimmed = content_line.trim_start();
    trimmed.starts_with('>')
        || trimmed.starts_with('#')
        || trimmed.starts_with("```")
        || trimmed.starts_with("~~~")
        || is_unordered_list_marker(trimmed)
        || is_numbered_list_item(trimmed)
        || is_horizontal_rule(trimmed)
        || is_definition_list_item(trimmed)
        || (trimmed.starts_with('[') && trimmed.contains("]:"))
        || trimmed.starts_with(":::")
        || (trimmed.starts_with('<')
            && !trimmed.starts_with("<http")
            && !trimmed.starts_with("<https")
            && !trimmed.starts_with("<mailto:"))
}

/// Reflow blockquote content lines and apply continuation style.
///
/// Segments separated by hard breaks are reflowed independently. The output lines
/// receive blockquote prefixes according to `continuation_style`: the first line and
/// any line that would start a new block structure always get an explicit prefix;
/// other lines follow the detected style.
///
/// Returns the styled, reflowed lines (without a trailing newline).
pub fn reflow_blockquote_content(
    lines: &[BlockquoteLineData],
    explicit_prefix: &str,
    continuation_style: BlockquoteContinuationStyle,
    options: &ReflowOptions,
) -> Vec<String> {
    let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
    let segments = split_into_segments_strs(&content_strs);
    let mut reflowed_content_lines: Vec<String> = Vec::new();

    for segment in segments {
        let hard_break_type = segment.last().and_then(|&line| {
            let line = line.strip_suffix('\r').unwrap_or(line);
            if line.ends_with('\\') {
                Some("\\")
            } else if line.ends_with("  ") {
                Some("  ")
            } else {
                None
            }
        });

        let pieces: Vec<&str> = segment
            .iter()
            .map(|&line| {
                if let Some(l) = line.strip_suffix('\\') {
                    l.trim_end()
                } else if let Some(l) = line.strip_suffix("  ") {
                    l.trim_end()
                } else {
                    line.trim_end()
                }
            })
            .collect();

        let segment_text = pieces.join(" ");
        let segment_text = segment_text.trim();
        if segment_text.is_empty() {
            continue;
        }

        let mut reflowed = reflow_line(segment_text, options);
        if let Some(break_marker) = hard_break_type
            && !reflowed.is_empty()
        {
            let last_idx = reflowed.len() - 1;
            if !has_hard_break(&reflowed[last_idx]) {
                reflowed[last_idx].push_str(break_marker);
            }
        }
        reflowed_content_lines.extend(reflowed);
    }

    let mut styled_lines: Vec<String> = Vec::new();
    for (idx, line) in reflowed_content_lines.iter().enumerate() {
        let force_explicit = idx == 0
            || continuation_style == BlockquoteContinuationStyle::Explicit
            || should_force_explicit_blockquote_line(line);
        if force_explicit {
            styled_lines.push(format!("{explicit_prefix}{line}"));
        } else {
            styled_lines.push(line.clone());
        }
    }

    styled_lines
}

fn is_blockquote_content_boundary(content: &str) -> bool {
    let trimmed = content.trim();
    trimmed.is_empty()
        || is_block_boundary(trimmed)
        || crate::utils::table_utils::TableUtils::is_potential_table_row(content)
        || trimmed.starts_with(":::")
        || crate::utils::is_template_directive_only(content)
        || is_standalone_attr_list(content)
        || is_snippet_block_delimiter(content)
}

fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
    let mut segments = Vec::new();
    let mut current = Vec::new();

    for &line in lines {
        current.push(line);
        if has_hard_break(line) {
            segments.push(current);
            current = Vec::new();
        }
    }

    if !current.is_empty() {
        segments.push(current);
    }

    segments
}

fn reflow_blockquote_paragraph_at_line(
    content: &str,
    lines: &[&str],
    target_idx: usize,
    options: &ReflowOptions,
) -> Option<ParagraphReflow> {
    let mut anchor_idx = target_idx;
    let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
        parsed.nesting_level
    } else {
        let mut found = None;
        let mut idx = target_idx;
        loop {
            if lines[idx].trim().is_empty() {
                break;
            }
            if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
                found = Some((idx, parsed.nesting_level));
                break;
            }
            if idx == 0 {
                break;
            }
            idx -= 1;
        }
        let (idx, level) = found?;
        anchor_idx = idx;
        level
    };

    // Expand backward to capture prior quote content at the same nesting level.
    let mut para_start = anchor_idx;
    while para_start > 0 {
        let prev_idx = para_start - 1;
        let prev_line = lines[prev_idx];

        if prev_line.trim().is_empty() {
            break;
        }

        if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
            if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
                break;
            }
            para_start = prev_idx;
            continue;
        }

        let prev_lazy = prev_line.trim_start();
        if is_blockquote_content_boundary(prev_lazy) {
            break;
        }
        para_start = prev_idx;
    }

    // Lazy continuation cannot precede the first explicit marker.
    while para_start < lines.len() {
        let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
            para_start += 1;
            continue;
        };
        target_level = parsed.nesting_level;
        break;
    }

    if para_start >= lines.len() || para_start > target_idx {
        return None;
    }

    // Collect explicit lines at target level and lazy continuation lines.
    // Each entry is (original_line_idx, BlockquoteLineData).
    let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
    let mut idx = para_start;
    while idx < lines.len() {
        if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
            break;
        }

        let line = lines[idx];
        if line.trim().is_empty() {
            break;
        }

        if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
            if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
                break;
            }
            collected.push((
                idx,
                BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
            ));
            idx += 1;
            continue;
        }

        let lazy_content = line.trim_start();
        if is_blockquote_content_boundary(lazy_content) {
            break;
        }

        collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
        idx += 1;
    }

    if collected.is_empty() {
        return None;
    }

    let para_end = collected[collected.len() - 1].0;
    if target_idx < para_start || target_idx > para_end {
        return None;
    }

    let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();

    let fallback_prefix = line_data
        .iter()
        .find_map(|d| d.prefix.clone())
        .unwrap_or_else(|| "> ".to_string());
    let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
    let continuation_style = blockquote_continuation_style(&line_data);

    let adjusted_line_length = options
        .line_length
        .saturating_sub(display_len(&explicit_prefix, options.length_mode))
        .max(1);

    let adjusted_options = ReflowOptions {
        line_length: adjusted_line_length,
        ..options.clone()
    };

    let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);

    if styled_lines.is_empty() {
        return None;
    }

    // Calculate byte offsets.
    let mut start_byte = 0;
    for line in lines.iter().take(para_start) {
        start_byte += line.len() + 1;
    }

    let mut end_byte = start_byte;
    for line in lines.iter().take(para_end + 1).skip(para_start) {
        end_byte += line.len() + 1;
    }

    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
    if !includes_trailing_newline {
        end_byte -= 1;
    }

    let reflowed_joined = styled_lines.join("\n");
    let reflowed_text = if includes_trailing_newline {
        if reflowed_joined.ends_with('\n') {
            reflowed_joined
        } else {
            format!("{reflowed_joined}\n")
        }
    } else if reflowed_joined.ends_with('\n') {
        reflowed_joined.trim_end_matches('\n').to_string()
    } else {
        reflowed_joined
    };

    Some(ParagraphReflow {
        start_byte,
        end_byte,
        reflowed_text,
    })
}

/// Reflow a single paragraph at the specified line number
///
/// This function finds the paragraph containing the given line number,
/// reflows it according to the specified line length, and returns
/// information about the paragraph location and its reflowed text.
///
/// # Arguments
///
/// * `content` - The full document content
/// * `line_number` - The 1-based line number within the paragraph to reflow
/// * `line_length` - The target line length for reflowing
///
/// # Returns
///
/// Returns `Some(ParagraphReflow)` if a paragraph was found and reflowed,
/// or `None` if the line number is out of bounds or the content at that
/// line shouldn't be reflowed (e.g., code blocks, headings, etc.)
pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
    reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
}

/// Reflow a paragraph at the given line with a specific length mode.
pub fn reflow_paragraph_at_line_with_mode(
    content: &str,
    line_number: usize,
    line_length: usize,
    length_mode: ReflowLengthMode,
) -> Option<ParagraphReflow> {
    let options = ReflowOptions {
        line_length,
        length_mode,
        ..Default::default()
    };
    reflow_paragraph_at_line_with_options(content, line_number, &options)
}

/// Reflow a paragraph at the given line using the provided options.
///
/// This is the canonical implementation used by both the rule's fix mode and the
/// LSP "Reflow paragraph" action. Passing a fully configured `ReflowOptions` allows
/// the LSP action to respect user-configured reflow mode, abbreviations, etc.
///
/// # Returns
///
/// Returns `Some(ParagraphReflow)` with byte offsets and reflowed text, or `None`
/// if the line is out of bounds or sits inside a non-reflow-able construct.
pub fn reflow_paragraph_at_line_with_options(
    content: &str,
    line_number: usize,
    options: &ReflowOptions,
) -> Option<ParagraphReflow> {
    if line_number == 0 {
        return None;
    }

    let lines: Vec<&str> = content.lines().collect();

    // Check if line number is valid (1-based)
    if line_number > lines.len() {
        return None;
    }

    let target_idx = line_number - 1; // Convert to 0-based
    let target_line = lines[target_idx];
    let trimmed = target_line.trim();

    // Handle blockquote paragraphs (including lazy continuation lines) with
    // style-preserving output.
    if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
        return Some(blockquote_reflow);
    }

    // Don't reflow special blocks
    if is_paragraph_boundary(trimmed, target_line) {
        return None;
    }

    // Find paragraph start - scan backward until blank line or special block
    let mut para_start = target_idx;
    while para_start > 0 {
        let prev_idx = para_start - 1;
        let prev_line = lines[prev_idx];
        let prev_trimmed = prev_line.trim();

        // Stop at blank line or special blocks
        if is_paragraph_boundary(prev_trimmed, prev_line) {
            break;
        }

        para_start = prev_idx;
    }

    // Find paragraph end - scan forward until blank line or special block
    let mut para_end = target_idx;
    while para_end + 1 < lines.len() {
        let next_idx = para_end + 1;
        let next_line = lines[next_idx];
        let next_trimmed = next_line.trim();

        // Stop at blank line or special blocks
        if is_paragraph_boundary(next_trimmed, next_line) {
            break;
        }

        para_end = next_idx;
    }

    // Extract paragraph lines
    let paragraph_lines = &lines[para_start..=para_end];

    // Calculate byte offsets
    let mut start_byte = 0;
    for line in lines.iter().take(para_start) {
        start_byte += line.len() + 1; // +1 for newline
    }

    let mut end_byte = start_byte;
    for line in paragraph_lines.iter() {
        end_byte += line.len() + 1; // +1 for newline
    }

    // Track whether the byte range includes a trailing newline
    // (it doesn't if this is the last line and the file doesn't end with newline)
    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');

    // Adjust end_byte if the last line doesn't have a newline
    if !includes_trailing_newline {
        end_byte -= 1;
    }

    // Join paragraph lines and reflow
    let paragraph_text = paragraph_lines.join("\n");

    // Reflow the paragraph using reflow_markdown to handle it properly
    let reflowed = reflow_markdown(&paragraph_text, options);

    // Ensure reflowed text matches whether the byte range includes a trailing newline
    // This is critical: if the range includes a newline, the replacement must too,
    // otherwise the next line will get appended to the reflowed paragraph
    let reflowed_text = if includes_trailing_newline {
        // Range includes newline - ensure reflowed text has one
        if reflowed.ends_with('\n') {
            reflowed
        } else {
            format!("{reflowed}\n")
        }
    } else {
        // Range doesn't include newline - ensure reflowed text doesn't have one
        if reflowed.ends_with('\n') {
            reflowed.trim_end_matches('\n').to_string()
        } else {
            reflowed
        }
    };

    Some(ParagraphReflow {
        start_byte,
        end_byte,
        reflowed_text,
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Unit test for private helper function text_ends_with_abbreviation()
    ///
    /// This test stays inline because it tests a private function.
    /// All other tests (public API, integration tests) are in tests/utils/text_reflow_test.rs
    #[test]
    fn test_helper_function_text_ends_with_abbreviation() {
        // Test the helper function directly
        let abbreviations = get_abbreviations(&None);

        // True cases - built-in abbreviations (titles and i.e./e.g.)
        assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
        assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
        assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
        assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
        assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
        assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
        assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
        assert!(text_ends_with_abbreviation("Prof.", &abbreviations));

        // False cases - NOT in built-in list (etc doesn't always have period)
        assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
        assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
        assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
        assert!(!text_ends_with_abbreviation("items.", &abbreviations));
        assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
        assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); // question mark, not period
        assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); // exclamation, not period
        assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); // question mark
        assert!(!text_ends_with_abbreviation("word", &abbreviations)); // no punctuation
        assert!(!text_ends_with_abbreviation("", &abbreviations)); // empty string
    }

    #[test]
    fn test_is_unordered_list_marker() {
        // Valid unordered list markers
        assert!(is_unordered_list_marker("- item"));
        assert!(is_unordered_list_marker("* item"));
        assert!(is_unordered_list_marker("+ item"));
        assert!(is_unordered_list_marker("-")); // lone marker
        assert!(is_unordered_list_marker("*"));
        assert!(is_unordered_list_marker("+"));

        // Not list markers
        assert!(!is_unordered_list_marker("---")); // horizontal rule
        assert!(!is_unordered_list_marker("***")); // horizontal rule
        assert!(!is_unordered_list_marker("- - -")); // horizontal rule
        assert!(!is_unordered_list_marker("* * *")); // horizontal rule
        assert!(!is_unordered_list_marker("*emphasis*")); // emphasis, not list
        assert!(!is_unordered_list_marker("-word")); // no space after marker
        assert!(!is_unordered_list_marker("")); // empty
        assert!(!is_unordered_list_marker("text")); // plain text
        assert!(!is_unordered_list_marker("# heading")); // heading
    }

    #[test]
    fn test_is_block_boundary() {
        // Block boundaries
        assert!(is_block_boundary("")); // empty line
        assert!(is_block_boundary("# Heading")); // ATX heading
        assert!(is_block_boundary("## Level 2")); // ATX heading
        assert!(is_block_boundary("```rust")); // code fence
        assert!(is_block_boundary("~~~")); // tilde code fence
        assert!(is_block_boundary("> quote")); // blockquote
        assert!(is_block_boundary("| cell |")); // table
        assert!(is_block_boundary("[link]: http://example.com")); // reference def
        assert!(is_block_boundary("---")); // horizontal rule
        assert!(is_block_boundary("***")); // horizontal rule
        assert!(is_block_boundary("- item")); // unordered list
        assert!(is_block_boundary("* item")); // unordered list
        assert!(is_block_boundary("+ item")); // unordered list
        assert!(is_block_boundary("1. item")); // ordered list
        assert!(is_block_boundary("10. item")); // ordered list
        assert!(is_block_boundary(": definition")); // definition list
        assert!(is_block_boundary(":::")); // div marker
        assert!(is_block_boundary("::::: {.callout-note}")); // div marker with attrs

        // NOT block boundaries (paragraph continuation)
        assert!(!is_block_boundary("regular text"));
        assert!(!is_block_boundary("*emphasis*")); // emphasis, not list
        assert!(!is_block_boundary("[link](url)")); // inline link, not reference def
        assert!(!is_block_boundary("some words here"));
    }

    #[test]
    fn test_definition_list_boundary_in_single_line_paragraph() {
        // Verifies that a definition list item after a single-line paragraph
        // is treated as a block boundary, not merged into the paragraph
        let options = ReflowOptions {
            line_length: 80,
            ..Default::default()
        };
        let input = "Term\n: Definition of the term";
        let result = reflow_markdown(input, &options);
        // The definition list marker should remain on its own line
        assert!(
            result.contains(": Definition"),
            "Definition list item should not be merged into previous line. Got: {result:?}"
        );
        let lines: Vec<&str> = result.lines().collect();
        assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
        assert_eq!(lines[0], "Term");
        assert_eq!(lines[1], ": Definition of the term");
    }

    #[test]
    fn test_is_paragraph_boundary() {
        // Core block boundary checks are inherited
        assert!(is_paragraph_boundary("# Heading", "# Heading"));
        assert!(is_paragraph_boundary("- item", "- item"));
        assert!(is_paragraph_boundary(":::", ":::"));
        assert!(is_paragraph_boundary(": definition", ": definition"));

        // Indented code blocks (≥4 spaces or tab)
        assert!(is_paragraph_boundary("code", "    code"));
        assert!(is_paragraph_boundary("code", "\tcode"));

        // Table rows via is_potential_table_row
        assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
        assert!(is_paragraph_boundary("a | b", "a | b")); // pipe-delimited without leading pipe

        // Not paragraph boundaries
        assert!(!is_paragraph_boundary("regular text", "regular text"));
        assert!(!is_paragraph_boundary("text", "  text")); // 2-space indent is not code
    }

    #[test]
    fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
        // Verifies that div markers (:::) are treated as paragraph boundaries
        // in reflow_paragraph_at_line, preventing reflow across div boundaries
        let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
        // Line 3 is the div marker — should not be reflowed
        let result = reflow_paragraph_at_line(content, 3, 80);
        assert!(result.is_none(), "Div marker line should not be reflowed");
    }
}