rumdl_lib/utils/
text_reflow.rs

1//! Text reflow utilities for MD013
2//!
3//! This module implements text wrapping/reflow functionality that preserves
4//! Markdown elements like links, emphasis, code spans, etc.
5
6use crate::utils::calculate_indentation_width_default;
7use crate::utils::is_definition_list_item;
8use crate::utils::mkdocs_attr_list::{ATTR_LIST_PATTERN, is_standalone_attr_list};
9use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
10use crate::utils::regex_cache::{
11    DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
12    HUGO_SHORTCODE_REGEX, INLINE_IMAGE_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, LINKED_IMAGE_INLINE_INLINE,
13    LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF, REF_IMAGE_REGEX, REF_LINK_REGEX,
14    SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
15};
16use crate::utils::sentence_utils::{
17    get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
18    text_ends_with_abbreviation,
19};
20use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
21use std::collections::HashSet;
22use unicode_width::UnicodeWidthStr;
23
24/// Length calculation mode for reflow
25#[derive(Clone, Copy, Debug, Default, PartialEq)]
26pub enum ReflowLengthMode {
27    /// Count Unicode characters (grapheme clusters)
28    Chars,
29    /// Count visual display width (CJK = 2 columns, emoji = 2, etc.)
30    #[default]
31    Visual,
32    /// Count raw bytes
33    Bytes,
34}
35
36/// Calculate the display length of a string based on the length mode
37fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
38    match mode {
39        ReflowLengthMode::Chars => s.chars().count(),
40        ReflowLengthMode::Visual => s.width(),
41        ReflowLengthMode::Bytes => s.len(),
42    }
43}
44
45/// Options for reflowing text
46#[derive(Clone)]
47pub struct ReflowOptions {
48    /// Target line length
49    pub line_length: usize,
50    /// Whether to break on sentence boundaries when possible
51    pub break_on_sentences: bool,
52    /// Whether to preserve existing line breaks in paragraphs
53    pub preserve_breaks: bool,
54    /// Whether to enforce one sentence per line
55    pub sentence_per_line: bool,
56    /// Whether to use semantic line breaks (cascading split strategy)
57    pub semantic_line_breaks: bool,
58    /// Custom abbreviations for sentence detection
59    /// Periods are optional - both "Dr" and "Dr." work the same
60    /// Custom abbreviations are always added to the built-in defaults
61    pub abbreviations: Option<Vec<String>>,
62    /// How to measure string length for line-length comparisons
63    pub length_mode: ReflowLengthMode,
64    /// Whether to treat {#id .class key="value"} as atomic (unsplittable) elements.
65    /// Enabled for MkDocs and Kramdown flavors.
66    pub attr_lists: bool,
67    /// Whether to require uppercase after periods for sentence detection.
68    /// When true (default), only "word. Capital" is a sentence boundary.
69    /// When false, "word. lowercase" is also treated as a sentence boundary.
70    /// Does not affect ! and ? which are always treated as sentence boundaries.
71    pub require_sentence_capital: bool,
72    /// Cap list continuation indent to this value when set.
73    /// Used by mkdocs flavor where continuation is always 4 spaces
74    /// regardless of checkbox markers.
75    pub max_list_continuation_indent: Option<usize>,
76}
77
78impl Default for ReflowOptions {
79    fn default() -> Self {
80        Self {
81            line_length: 80,
82            break_on_sentences: true,
83            preserve_breaks: false,
84            sentence_per_line: false,
85            semantic_line_breaks: false,
86            abbreviations: None,
87            length_mode: ReflowLengthMode::default(),
88            attr_lists: false,
89            require_sentence_capital: true,
90            max_list_continuation_indent: None,
91        }
92    }
93}
94
95/// Build a boolean mask indicating which character positions are inside inline code spans.
96/// Handles single, double, and triple backtick delimiters.
97fn compute_inline_code_mask(text: &str) -> Vec<bool> {
98    let chars: Vec<char> = text.chars().collect();
99    let len = chars.len();
100    let mut mask = vec![false; len];
101    let mut i = 0;
102
103    while i < len {
104        if chars[i] == '`' {
105            // Count opening backticks
106            let open_start = i;
107            let mut backtick_count = 0;
108            while i < len && chars[i] == '`' {
109                backtick_count += 1;
110                i += 1;
111            }
112
113            // Find matching closing backticks (same count)
114            let mut found_close = false;
115            let content_start = i;
116            while i < len {
117                if chars[i] == '`' {
118                    let close_start = i;
119                    let mut close_count = 0;
120                    while i < len && chars[i] == '`' {
121                        close_count += 1;
122                        i += 1;
123                    }
124                    if close_count == backtick_count {
125                        // Mark the content between the delimiters (not the backticks themselves)
126                        for item in mask.iter_mut().take(close_start).skip(content_start) {
127                            *item = true;
128                        }
129                        // Also mark the opening and closing backticks
130                        for item in mask.iter_mut().take(content_start).skip(open_start) {
131                            *item = true;
132                        }
133                        for item in mask.iter_mut().take(i).skip(close_start) {
134                            *item = true;
135                        }
136                        found_close = true;
137                        break;
138                    }
139                } else {
140                    i += 1;
141                }
142            }
143
144            if !found_close {
145                // No matching close — backticks are literal, not code span
146                i = open_start + backtick_count;
147            }
148        } else {
149            i += 1;
150        }
151    }
152
153    mask
154}
155
156/// Detect if a character position is a sentence boundary
157/// Based on the approach from github.com/JoshuaKGoldberg/sentences-per-line
158/// Supports both ASCII punctuation (. ! ?) and CJK punctuation (。 ！ ？)
159fn is_sentence_boundary(
160    text: &str,
161    pos: usize,
162    abbreviations: &HashSet<String>,
163    require_sentence_capital: bool,
164) -> bool {
165    let chars: Vec<char> = text.chars().collect();
166
167    if pos + 1 >= chars.len() {
168        return false;
169    }
170
171    let c = chars[pos];
172    let next_char = chars[pos + 1];
173
174    // Check for CJK sentence-ending punctuation (。, ！, ？)
175    // CJK punctuation doesn't require space or uppercase after it
176    if is_cjk_sentence_ending(c) {
177        // Skip any trailing emphasis/strikethrough markers
178        let mut after_punct_pos = pos + 1;
179        while after_punct_pos < chars.len()
180            && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
181        {
182            after_punct_pos += 1;
183        }
184
185        // Skip whitespace
186        while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
187            after_punct_pos += 1;
188        }
189
190        // Check if we have more content (any non-whitespace)
191        if after_punct_pos >= chars.len() {
192            return false;
193        }
194
195        // Skip leading emphasis/strikethrough markers
196        while after_punct_pos < chars.len()
197            && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
198        {
199            after_punct_pos += 1;
200        }
201
202        if after_punct_pos >= chars.len() {
203            return false;
204        }
205
206        // For CJK, we accept any character as the start of the next sentence
207        // (no uppercase requirement, since CJK doesn't have case)
208        return true;
209    }
210
211    // Check for ASCII sentence-ending punctuation
212    if c != '.' && c != '!' && c != '?' {
213        return false;
214    }
215
216    // Must be followed by space, closing quote, or emphasis/strikethrough marker followed by space
217    let (_space_pos, after_space_pos) = if next_char == ' ' {
218        // Normal case: punctuation followed by space
219        (pos + 1, pos + 2)
220    } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
221        // Sentence ends with quote - check what follows the quote
222        if chars[pos + 2] == ' ' {
223            // Just quote followed by space: 'sentence." '
224            (pos + 2, pos + 3)
225        } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
226            // Quote followed by emphasis: 'sentence."* '
227            (pos + 3, pos + 4)
228        } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
229            && pos + 4 < chars.len()
230            && chars[pos + 3] == chars[pos + 2]
231            && chars[pos + 4] == ' '
232        {
233            // Quote followed by bold: 'sentence."** '
234            (pos + 4, pos + 5)
235        } else {
236            return false;
237        }
238    } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
239        // Sentence ends with emphasis: "sentence.* " or "sentence._ "
240        (pos + 2, pos + 3)
241    } else if (next_char == '*' || next_char == '_')
242        && pos + 3 < chars.len()
243        && chars[pos + 2] == next_char
244        && chars[pos + 3] == ' '
245    {
246        // Sentence ends with bold: "sentence.** " or "sentence.__ "
247        (pos + 3, pos + 4)
248    } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
249        // Sentence ends with strikethrough: "sentence.~~ "
250        (pos + 3, pos + 4)
251    } else {
252        return false;
253    };
254
255    // Skip all whitespace after the space to find the start of the next sentence
256    let mut next_char_pos = after_space_pos;
257    while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
258        next_char_pos += 1;
259    }
260
261    // Check if we reached the end of the string
262    if next_char_pos >= chars.len() {
263        return false;
264    }
265
266    // Skip leading emphasis/strikethrough markers and opening quotes to find the actual first letter
267    let mut first_letter_pos = next_char_pos;
268    while first_letter_pos < chars.len()
269        && (chars[first_letter_pos] == '*'
270            || chars[first_letter_pos] == '_'
271            || chars[first_letter_pos] == '~'
272            || is_opening_quote(chars[first_letter_pos]))
273    {
274        first_letter_pos += 1;
275    }
276
277    // Check if we reached the end after skipping emphasis
278    if first_letter_pos >= chars.len() {
279        return false;
280    }
281
282    let first_char = chars[first_letter_pos];
283
284    // For ! and ?, sentence boundaries are unambiguous — no uppercase requirement
285    if c == '!' || c == '?' {
286        return true;
287    }
288
289    // Period-specific checks: periods are ambiguous (abbreviations, decimals, initials)
290    // so we apply additional guards before accepting a sentence boundary.
291
292    if pos > 0 {
293        // Check for common abbreviations
294        let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
295        if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
296            return false;
297        }
298
299        // Check for decimal numbers (e.g., "3.14 is pi")
300        if chars[pos - 1].is_numeric() && first_char.is_ascii_digit() {
301            return false;
302        }
303
304        // Check for single-letter initials (e.g., "J. K. Rowling")
305        // A single uppercase letter before the period preceded by whitespace or start
306        // is likely an initial, not a sentence ending.
307        if chars[pos - 1].is_ascii_uppercase() && (pos == 1 || (pos >= 2 && chars[pos - 2].is_whitespace())) {
308            return false;
309        }
310    }
311
312    // In strict mode, require uppercase or CJK to start the next sentence after a period.
313    // In relaxed mode, accept any alphanumeric character.
314    if require_sentence_capital && !first_char.is_uppercase() && !is_cjk_char(first_char) {
315        return false;
316    }
317
318    true
319}
320
321/// Split text into sentences
322pub fn split_into_sentences(text: &str) -> Vec<String> {
323    split_into_sentences_custom(text, &None)
324}
325
326/// Split text into sentences with custom abbreviations
327pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
328    let abbreviations = get_abbreviations(custom_abbreviations);
329    split_into_sentences_with_set(text, &abbreviations, true)
330}
331
332/// Internal function to split text into sentences with a pre-computed abbreviations set
333/// Use this when calling multiple times in a loop to avoid repeatedly computing the set
334fn split_into_sentences_with_set(
335    text: &str,
336    abbreviations: &HashSet<String>,
337    require_sentence_capital: bool,
338) -> Vec<String> {
339    // Pre-compute which character positions are inside inline code spans
340    let in_code = compute_inline_code_mask(text);
341
342    let mut sentences = Vec::new();
343    let mut current_sentence = String::new();
344    let mut chars = text.chars().peekable();
345    let mut pos = 0;
346
347    while let Some(c) = chars.next() {
348        current_sentence.push(c);
349
350        if !in_code[pos] && is_sentence_boundary(text, pos, abbreviations, require_sentence_capital) {
351            // Consume any trailing emphasis/strikethrough markers and quotes (they belong to the current sentence)
352            while let Some(&next) = chars.peek() {
353                if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
354                    current_sentence.push(chars.next().unwrap());
355                    pos += 1;
356                } else {
357                    break;
358                }
359            }
360
361            // Consume the space after the sentence
362            if chars.peek() == Some(&' ') {
363                chars.next();
364                pos += 1;
365            }
366
367            sentences.push(current_sentence.trim().to_string());
368            current_sentence.clear();
369        }
370
371        pos += 1;
372    }
373
374    // Add any remaining text as the last sentence
375    if !current_sentence.trim().is_empty() {
376        sentences.push(current_sentence.trim().to_string());
377    }
378    sentences
379}
380
381/// Check if a line is a horizontal rule (---, ___, ***)
382fn is_horizontal_rule(line: &str) -> bool {
383    if line.len() < 3 {
384        return false;
385    }
386
387    // Check if line consists only of -, _, or * characters (at least 3)
388    let chars: Vec<char> = line.chars().collect();
389    if chars.is_empty() {
390        return false;
391    }
392
393    let first_char = chars[0];
394    if first_char != '-' && first_char != '_' && first_char != '*' {
395        return false;
396    }
397
398    // All characters should be the same (allowing spaces between)
399    for c in &chars {
400        if *c != first_char && *c != ' ' {
401            return false;
402        }
403    }
404
405    // Count non-space characters
406    let non_space_count = chars.iter().filter(|c| **c != ' ').count();
407    non_space_count >= 3
408}
409
410/// Check if a line is a numbered list item (e.g., "1. ", "10. ")
411fn is_numbered_list_item(line: &str) -> bool {
412    let mut chars = line.chars();
413
414    // Must start with a digit
415    if !chars.next().is_some_and(char::is_numeric) {
416        return false;
417    }
418
419    // Can have more digits
420    while let Some(c) = chars.next() {
421        if c == '.' {
422            // After period, must have a space (consistent with list marker extraction)
423            // "2019." alone is NOT treated as a list item to avoid false positives
424            return chars.next() == Some(' ');
425        }
426        if !c.is_numeric() {
427            return false;
428        }
429    }
430
431    false
432}
433
434/// Check if a trimmed line is an unordered list item (-, *, + followed by space)
435fn is_unordered_list_marker(s: &str) -> bool {
436    matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
437        && !is_horizontal_rule(s)
438        && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
439}
440
441/// Shared structural checks for block boundary detection.
442/// Checks elements that only depend on the trimmed line content.
443fn is_block_boundary_core(trimmed: &str) -> bool {
444    trimmed.is_empty()
445        || trimmed.starts_with('#')
446        || trimmed.starts_with("```")
447        || trimmed.starts_with("~~~")
448        || trimmed.starts_with('>')
449        || (trimmed.starts_with('[') && trimmed.contains("]:"))
450        || is_horizontal_rule(trimmed)
451        || is_unordered_list_marker(trimmed)
452        || is_numbered_list_item(trimmed)
453        || is_definition_list_item(trimmed)
454        || trimmed.starts_with(":::")
455}
456
457/// Check if a trimmed line starts a new structural block element.
458/// Used for paragraph boundary detection in `reflow_markdown()`.
459fn is_block_boundary(trimmed: &str) -> bool {
460    is_block_boundary_core(trimmed) || trimmed.starts_with('|')
461}
462
463/// Check if a line starts a new structural block for paragraph boundary detection
464/// in `reflow_paragraph_at_line()`. Extends the core checks with indented code blocks
465/// (≥4 spaces) and table row detection via `is_potential_table_row`.
466fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
467    is_block_boundary_core(trimmed)
468        || calculate_indentation_width_default(line) >= 4
469        || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
470}
471
472/// Check if a line ends with a hard break (either two spaces or backslash)
473///
474/// CommonMark supports two formats for hard line breaks:
475/// 1. Two or more trailing spaces
476/// 2. A backslash at the end of the line
477fn has_hard_break(line: &str) -> bool {
478    let line = line.strip_suffix('\r').unwrap_or(line);
479    line.ends_with("  ") || line.ends_with('\\')
480}
481
482/// Check if text ends with sentence-terminating punctuation (. ! ?)
483fn ends_with_sentence_punct(text: &str) -> bool {
484    text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
485}
486
487/// Trim trailing whitespace while preserving hard breaks (two trailing spaces or backslash)
488///
489/// Hard breaks in Markdown can be indicated by:
490/// 1. Two trailing spaces before a newline (traditional)
491/// 2. A backslash at the end of the line (mdformat style)
492fn trim_preserving_hard_break(s: &str) -> String {
493    // Strip trailing \r from CRLF line endings first to handle Windows files
494    let s = s.strip_suffix('\r').unwrap_or(s);
495
496    // Check for backslash hard break (mdformat style)
497    if s.ends_with('\\') {
498        // Preserve the backslash exactly as-is
499        return s.to_string();
500    }
501
502    // Check if there are at least 2 trailing spaces (traditional hard break)
503    if s.ends_with("  ") {
504        // Find the position where non-space content ends
505        let content_end = s.trim_end().len();
506        if content_end == 0 {
507            // String is all whitespace
508            return String::new();
509        }
510        // Preserve exactly 2 trailing spaces for hard break
511        format!("{}  ", &s[..content_end])
512    } else {
513        // No hard break, just trim all trailing whitespace
514        s.trim_end().to_string()
515    }
516}
517
518/// Parse markdown elements using the appropriate parser based on options.
519fn parse_elements(text: &str, options: &ReflowOptions) -> Vec<Element> {
520    if options.attr_lists {
521        parse_markdown_elements_with_attr_lists(text)
522    } else {
523        parse_markdown_elements(text)
524    }
525}
526
527pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
528    // For sentence-per-line mode, always process regardless of length
529    if options.sentence_per_line {
530        let elements = parse_elements(line, options);
531        return reflow_elements_sentence_per_line(&elements, &options.abbreviations, options.require_sentence_capital);
532    }
533
534    // For semantic line breaks mode, use cascading split strategy
535    if options.semantic_line_breaks {
536        let elements = parse_elements(line, options);
537        return reflow_elements_semantic(&elements, options);
538    }
539
540    // Quick check: if line is already short enough or no wrapping requested, return as-is
541    // line_length = 0 means no wrapping (unlimited line length)
542    if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
543        return vec![line.to_string()];
544    }
545
546    // Parse the markdown to identify elements
547    let elements = parse_elements(line, options);
548
549    // Reflow the elements into lines
550    reflow_elements(&elements, options)
551}
552
553/// Image source in a linked image structure
554#[derive(Debug, Clone)]
555enum LinkedImageSource {
556    /// Inline image URL: ![alt](url)
557    Inline(String),
558    /// Reference image: ![alt][ref]
559    Reference(String),
560}
561
562/// Link target in a linked image structure
563#[derive(Debug, Clone)]
564enum LinkedImageTarget {
565    /// Inline link URL: ](url)
566    Inline(String),
567    /// Reference link: ][ref]
568    Reference(String),
569}
570
571/// Represents a piece of content in the markdown
572#[derive(Debug, Clone)]
573enum Element {
574    /// Plain text that can be wrapped
575    Text(String),
576    /// A complete markdown inline link [text](url)
577    Link { text: String, url: String },
578    /// A complete markdown reference link [text][ref]
579    ReferenceLink { text: String, reference: String },
580    /// A complete markdown empty reference link [text][]
581    EmptyReferenceLink { text: String },
582    /// A complete markdown shortcut reference link [ref]
583    ShortcutReference { reference: String },
584    /// A complete markdown inline image ![alt](url)
585    InlineImage { alt: String, url: String },
586    /// A complete markdown reference image ![alt][ref]
587    ReferenceImage { alt: String, reference: String },
588    /// A complete markdown empty reference image ![alt][]
589    EmptyReferenceImage { alt: String },
590    /// A clickable image badge in any of 4 forms:
591    /// - [![alt](img-url)](link-url)
592    /// - [![alt][img-ref]](link-url)
593    /// - [![alt](img-url)][link-ref]
594    /// - [![alt][img-ref]][link-ref]
595    LinkedImage {
596        alt: String,
597        img_source: LinkedImageSource,
598        link_target: LinkedImageTarget,
599    },
600    /// Footnote reference [^note]
601    FootnoteReference { note: String },
602    /// Strikethrough text ~~text~~
603    Strikethrough(String),
604    /// Wiki-style link [[wiki]] or [[wiki|text]]
605    WikiLink(String),
606    /// Inline math $math$
607    InlineMath(String),
608    /// Display math $$math$$
609    DisplayMath(String),
610    /// Emoji shortcode :emoji:
611    EmojiShortcode(String),
612    /// Autolink <https://...> or <mailto:...> or <user@domain.com>
613    Autolink(String),
614    /// HTML tag <tag> or </tag> or <tag/>
615    HtmlTag(String),
616    /// HTML entity &nbsp; or &#123;
617    HtmlEntity(String),
618    /// Hugo/Go template shortcode {{< ... >}} or {{% ... %}}
619    HugoShortcode(String),
620    /// MkDocs/kramdown attribute list {#id .class key="value"}
621    AttrList(String),
622    /// Inline code `code`
623    Code(String),
624    /// Bold text **text** or __text__
625    Bold {
626        content: String,
627        /// True if underscore markers (__), false for asterisks (**)
628        underscore: bool,
629    },
630    /// Italic text *text* or _text_
631    Italic {
632        content: String,
633        /// True if underscore marker (_), false for asterisk (*)
634        underscore: bool,
635    },
636}
637
638impl std::fmt::Display for Element {
639    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
640        match self {
641            Element::Text(s) => write!(f, "{s}"),
642            Element::Link { text, url } => write!(f, "[{text}]({url})"),
643            Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
644            Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
645            Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
646            Element::InlineImage { alt, url } => write!(f, "![{alt}]({url})"),
647            Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
648            Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
649            Element::LinkedImage {
650                alt,
651                img_source,
652                link_target,
653            } => {
654                // Build the image part: ![alt](url) or ![alt][ref]
655                let img_part = match img_source {
656                    LinkedImageSource::Inline(url) => format!("![{alt}]({url})"),
657                    LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
658                };
659                // Build the link part: (url) or [ref]
660                match link_target {
661                    LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
662                    LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
663                }
664            }
665            Element::FootnoteReference { note } => write!(f, "[^{note}]"),
666            Element::Strikethrough(s) => write!(f, "~~{s}~~"),
667            Element::WikiLink(s) => write!(f, "[[{s}]]"),
668            Element::InlineMath(s) => write!(f, "${s}$"),
669            Element::DisplayMath(s) => write!(f, "$${s}$$"),
670            Element::EmojiShortcode(s) => write!(f, ":{s}:"),
671            Element::Autolink(s) => write!(f, "{s}"),
672            Element::HtmlTag(s) => write!(f, "{s}"),
673            Element::HtmlEntity(s) => write!(f, "{s}"),
674            Element::HugoShortcode(s) => write!(f, "{s}"),
675            Element::AttrList(s) => write!(f, "{s}"),
676            Element::Code(s) => write!(f, "`{s}`"),
677            Element::Bold { content, underscore } => {
678                if *underscore {
679                    write!(f, "__{content}__")
680                } else {
681                    write!(f, "**{content}**")
682                }
683            }
684            Element::Italic { content, underscore } => {
685                if *underscore {
686                    write!(f, "_{content}_")
687                } else {
688                    write!(f, "*{content}*")
689                }
690            }
691        }
692    }
693}
694
695impl Element {
696    /// Calculate the display width of this element using the given length mode.
697    /// This formats the element and computes its width, correctly handling
698    /// visual width for CJK characters and other wide glyphs.
699    fn display_width(&self, mode: ReflowLengthMode) -> usize {
700        let formatted = format!("{self}");
701        display_len(&formatted, mode)
702    }
703}
704
705/// An emphasis or formatting span parsed by pulldown-cmark
706#[derive(Debug, Clone)]
707struct EmphasisSpan {
708    /// Byte offset where the emphasis starts (including markers)
709    start: usize,
710    /// Byte offset where the emphasis ends (after closing markers)
711    end: usize,
712    /// The content inside the emphasis markers
713    content: String,
714    /// Whether this is strong (bold) emphasis
715    is_strong: bool,
716    /// Whether this is strikethrough (~~text~~)
717    is_strikethrough: bool,
718    /// Whether the original used underscore markers (for emphasis only)
719    uses_underscore: bool,
720}
721
722/// Extract emphasis and strikethrough spans from text using pulldown-cmark
723///
724/// This provides CommonMark-compliant emphasis parsing, correctly handling:
725/// - Nested emphasis like `*text **bold** more*`
726/// - Left/right flanking delimiter rules
727/// - Underscore vs asterisk markers
728/// - GFM strikethrough (~~text~~)
729///
730/// Returns spans sorted by start position.
731fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
732    let mut spans = Vec::new();
733    let mut options = Options::empty();
734    options.insert(Options::ENABLE_STRIKETHROUGH);
735
736    // Stacks to track nested formatting with their start positions
737    let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); // (start_byte, uses_underscore)
738    let mut strong_stack: Vec<(usize, bool)> = Vec::new();
739    let mut strikethrough_stack: Vec<usize> = Vec::new();
740
741    let parser = Parser::new_ext(text, options).into_offset_iter();
742
743    for (event, range) in parser {
744        match event {
745            Event::Start(Tag::Emphasis) => {
746                // Check if this uses underscore by looking at the original text
747                let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
748                emphasis_stack.push((range.start, uses_underscore));
749            }
750            Event::End(TagEnd::Emphasis) => {
751                if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
752                    // Extract content between the markers (1 char marker on each side)
753                    let content_start = start_byte + 1;
754                    let content_end = range.end - 1;
755                    if content_end > content_start
756                        && let Some(content) = text.get(content_start..content_end)
757                    {
758                        spans.push(EmphasisSpan {
759                            start: start_byte,
760                            end: range.end,
761                            content: content.to_string(),
762                            is_strong: false,
763                            is_strikethrough: false,
764                            uses_underscore,
765                        });
766                    }
767                }
768            }
769            Event::Start(Tag::Strong) => {
770                // Check if this uses underscore by looking at the original text
771                let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
772                strong_stack.push((range.start, uses_underscore));
773            }
774            Event::End(TagEnd::Strong) => {
775                if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
776                    // Extract content between the markers (2 char marker on each side)
777                    let content_start = start_byte + 2;
778                    let content_end = range.end - 2;
779                    if content_end > content_start
780                        && let Some(content) = text.get(content_start..content_end)
781                    {
782                        spans.push(EmphasisSpan {
783                            start: start_byte,
784                            end: range.end,
785                            content: content.to_string(),
786                            is_strong: true,
787                            is_strikethrough: false,
788                            uses_underscore,
789                        });
790                    }
791                }
792            }
793            Event::Start(Tag::Strikethrough) => {
794                strikethrough_stack.push(range.start);
795            }
796            Event::End(TagEnd::Strikethrough) => {
797                if let Some(start_byte) = strikethrough_stack.pop() {
798                    // Extract content between the ~~ markers (2 char marker on each side)
799                    let content_start = start_byte + 2;
800                    let content_end = range.end - 2;
801                    if content_end > content_start
802                        && let Some(content) = text.get(content_start..content_end)
803                    {
804                        spans.push(EmphasisSpan {
805                            start: start_byte,
806                            end: range.end,
807                            content: content.to_string(),
808                            is_strong: false,
809                            is_strikethrough: true,
810                            uses_underscore: false,
811                        });
812                    }
813                }
814            }
815            _ => {}
816        }
817    }
818
819    // Sort by start position
820    spans.sort_by_key(|s| s.start);
821    spans
822}
823
824/// Parse markdown elements from text preserving the raw syntax
825///
826/// Detection order is critical:
827/// 1. Linked images [![alt](img)](link) - must be detected first as atomic units
828/// 2. Inline images ![alt](url) - before links to handle ! prefix
829/// 3. Reference images ![alt][ref] - before reference links
830/// 4. Inline links [text](url) - before reference links
831/// 5. Reference links [text][ref] - before shortcut references
832/// 6. Shortcut reference links [ref] - detected last to avoid false positives
833/// 7. Other elements (code, bold, italic, etc.) - processed normally
834fn parse_markdown_elements(text: &str) -> Vec<Element> {
835    parse_markdown_elements_inner(text, false)
836}
837
838fn parse_markdown_elements_with_attr_lists(text: &str) -> Vec<Element> {
839    parse_markdown_elements_inner(text, true)
840}
841
842fn parse_markdown_elements_inner(text: &str, attr_lists: bool) -> Vec<Element> {
843    let mut elements = Vec::new();
844    let mut remaining = text;
845
846    // Pre-extract emphasis spans using pulldown-cmark for CommonMark-compliant parsing
847    let emphasis_spans = extract_emphasis_spans(text);
848
849    while !remaining.is_empty() {
850        // Calculate current byte offset in original text
851        let current_offset = text.len() - remaining.len();
852        // Find the earliest occurrence of any markdown pattern
853        // Store (start, end, pattern_name) to unify standard Regex and FancyRegex match results
854        let mut earliest_match: Option<(usize, usize, &str)> = None;
855
856        // Check for linked images FIRST (all 4 variants)
857        // Quick literal check: only run expensive regexes if we might have a linked image
858        // Pattern starts with "[!" so check for that first
859        if remaining.contains("[!") {
860            // Pattern 1: [![alt](img)](link) - inline image in inline link
861            if let Some(m) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
862                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
863            {
864                earliest_match = Some((m.start(), m.end(), "linked_image_ii"));
865            }
866
867            // Pattern 2: [![alt][ref]](link) - reference image in inline link
868            if let Some(m) = LINKED_IMAGE_REF_INLINE.find(remaining)
869                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
870            {
871                earliest_match = Some((m.start(), m.end(), "linked_image_ri"));
872            }
873
874            // Pattern 3: [![alt](img)][ref] - inline image in reference link
875            if let Some(m) = LINKED_IMAGE_INLINE_REF.find(remaining)
876                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
877            {
878                earliest_match = Some((m.start(), m.end(), "linked_image_ir"));
879            }
880
881            // Pattern 4: [![alt][ref]][ref] - reference image in reference link
882            if let Some(m) = LINKED_IMAGE_REF_REF.find(remaining)
883                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
884            {
885                earliest_match = Some((m.start(), m.end(), "linked_image_rr"));
886            }
887        }
888
889        // Check for images (they start with ! so should be detected before links)
890        // Inline images - ![alt](url)
891        if let Some(m) = INLINE_IMAGE_REGEX.find(remaining)
892            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
893        {
894            earliest_match = Some((m.start(), m.end(), "inline_image"));
895        }
896
897        // Reference images - ![alt][ref]
898        if let Some(m) = REF_IMAGE_REGEX.find(remaining)
899            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
900        {
901            earliest_match = Some((m.start(), m.end(), "ref_image"));
902        }
903
904        // Check for footnote references - [^note]
905        if let Some(m) = FOOTNOTE_REF_REGEX.find(remaining)
906            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
907        {
908            earliest_match = Some((m.start(), m.end(), "footnote_ref"));
909        }
910
911        // Check for inline links - [text](url)
912        if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
913            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
914        {
915            earliest_match = Some((m.start(), m.end(), "inline_link"));
916        }
917
918        // Check for reference links - [text][ref]
919        if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
920            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
921        {
922            earliest_match = Some((m.start(), m.end(), "ref_link"));
923        }
924
925        // Check for shortcut reference links - [ref]
926        // Only check if we haven't found an earlier pattern that would conflict
927        if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
928            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
929        {
930            earliest_match = Some((m.start(), m.end(), "shortcut_ref"));
931        }
932
933        // Check for wiki-style links - [[wiki]]
934        if let Some(m) = WIKI_LINK_REGEX.find(remaining)
935            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
936        {
937            earliest_match = Some((m.start(), m.end(), "wiki_link"));
938        }
939
940        // Check for display math first (before inline) - $$math$$
941        if let Some(m) = DISPLAY_MATH_REGEX.find(remaining)
942            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
943        {
944            earliest_match = Some((m.start(), m.end(), "display_math"));
945        }
946
947        // Check for inline math - $math$
948        if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
949            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
950        {
951            earliest_match = Some((m.start(), m.end(), "inline_math"));
952        }
953
954        // Note: Strikethrough is now handled by pulldown-cmark in extract_emphasis_spans
955
956        // Check for emoji shortcodes - :emoji:
957        if let Some(m) = EMOJI_SHORTCODE_REGEX.find(remaining)
958            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
959        {
960            earliest_match = Some((m.start(), m.end(), "emoji"));
961        }
962
963        // Check for HTML entities - &nbsp; etc
964        if let Some(m) = HTML_ENTITY_REGEX.find(remaining)
965            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
966        {
967            earliest_match = Some((m.start(), m.end(), "html_entity"));
968        }
969
970        // Check for Hugo shortcodes - {{< ... >}} or {{% ... %}}
971        // Must be checked before other patterns to avoid false sentence breaks
972        if let Some(m) = HUGO_SHORTCODE_REGEX.find(remaining)
973            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
974        {
975            earliest_match = Some((m.start(), m.end(), "hugo_shortcode"));
976        }
977
978        // Check for HTML tags - <tag> </tag> <tag/>
979        // But exclude autolinks like <https://...> or <mailto:...> or email autolinks <user@domain.com>
980        if let Some(m) = HTML_TAG_PATTERN.find(remaining)
981            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
982        {
983            // Check if this is an autolink (starts with protocol or mailto:)
984            let matched_text = &remaining[m.start()..m.end()];
985            let is_url_autolink = matched_text.starts_with("<http://")
986                || matched_text.starts_with("<https://")
987                || matched_text.starts_with("<mailto:")
988                || matched_text.starts_with("<ftp://")
989                || matched_text.starts_with("<ftps://");
990
991            // Check if this is an email autolink (per CommonMark spec: <local@domain.tld>)
992            // Use centralized EMAIL_PATTERN for consistency with MD034 and other rules
993            let is_email_autolink = {
994                let content = matched_text.trim_start_matches('<').trim_end_matches('>');
995                EMAIL_PATTERN.is_match(content)
996            };
997
998            if is_url_autolink || is_email_autolink {
999                earliest_match = Some((m.start(), m.end(), "autolink"));
1000            } else {
1001                earliest_match = Some((m.start(), m.end(), "html_tag"));
1002            }
1003        }
1004
1005        // Find earliest non-link special characters
1006        let mut next_special = remaining.len();
1007        let mut special_type = "";
1008        let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
1009        let mut attr_list_len: usize = 0;
1010
1011        // Check for code spans (not handled by pulldown-cmark in this context)
1012        if let Some(pos) = remaining.find('`')
1013            && pos < next_special
1014        {
1015            next_special = pos;
1016            special_type = "code";
1017        }
1018
1019        // Check for MkDocs/kramdown attr lists - {#id .class key="value"}
1020        if attr_lists
1021            && let Some(pos) = remaining.find('{')
1022            && pos < next_special
1023            && let Some(m) = ATTR_LIST_PATTERN.find(&remaining[pos..])
1024            && m.start() == 0
1025        {
1026            next_special = pos;
1027            special_type = "attr_list";
1028            attr_list_len = m.end();
1029        }
1030
1031        // Check for emphasis using pulldown-cmark's pre-extracted spans
1032        // Find the earliest emphasis span that starts within remaining text
1033        for span in &emphasis_spans {
1034            if span.start >= current_offset && span.start < current_offset + remaining.len() {
1035                let pos_in_remaining = span.start - current_offset;
1036                if pos_in_remaining < next_special {
1037                    next_special = pos_in_remaining;
1038                    special_type = "pulldown_emphasis";
1039                    pulldown_emphasis = Some(span);
1040                }
1041                break; // Spans are sorted by start position, so first match is earliest
1042            }
1043        }
1044
1045        // Determine which pattern to process first
1046        let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
1047            pos < next_special
1048        } else {
1049            false
1050        };
1051
1052        if should_process_markdown_link {
1053            let (pos, match_end, pattern_type) = earliest_match.unwrap();
1054
1055            // Add any text before the match
1056            if pos > 0 {
1057                elements.push(Element::Text(remaining[..pos].to_string()));
1058            }
1059
1060            // Process the matched pattern
1061            match pattern_type {
1062                // Pattern 1: [![alt](img)](link) - inline image in inline link
1063                "linked_image_ii" => {
1064                    if let Some(caps) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
1065                        let alt = caps.get(1).map_or("", |m| m.as_str());
1066                        let img_url = caps.get(2).map_or("", |m| m.as_str());
1067                        let link_url = caps.get(3).map_or("", |m| m.as_str());
1068                        elements.push(Element::LinkedImage {
1069                            alt: alt.to_string(),
1070                            img_source: LinkedImageSource::Inline(img_url.to_string()),
1071                            link_target: LinkedImageTarget::Inline(link_url.to_string()),
1072                        });
1073                        remaining = &remaining[match_end..];
1074                    } else {
1075                        elements.push(Element::Text("[".to_string()));
1076                        remaining = &remaining[1..];
1077                    }
1078                }
1079                // Pattern 2: [![alt][ref]](link) - reference image in inline link
1080                "linked_image_ri" => {
1081                    if let Some(caps) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
1082                        let alt = caps.get(1).map_or("", |m| m.as_str());
1083                        let img_ref = caps.get(2).map_or("", |m| m.as_str());
1084                        let link_url = caps.get(3).map_or("", |m| m.as_str());
1085                        elements.push(Element::LinkedImage {
1086                            alt: alt.to_string(),
1087                            img_source: LinkedImageSource::Reference(img_ref.to_string()),
1088                            link_target: LinkedImageTarget::Inline(link_url.to_string()),
1089                        });
1090                        remaining = &remaining[match_end..];
1091                    } else {
1092                        elements.push(Element::Text("[".to_string()));
1093                        remaining = &remaining[1..];
1094                    }
1095                }
1096                // Pattern 3: [![alt](img)][ref] - inline image in reference link
1097                "linked_image_ir" => {
1098                    if let Some(caps) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
1099                        let alt = caps.get(1).map_or("", |m| m.as_str());
1100                        let img_url = caps.get(2).map_or("", |m| m.as_str());
1101                        let link_ref = caps.get(3).map_or("", |m| m.as_str());
1102                        elements.push(Element::LinkedImage {
1103                            alt: alt.to_string(),
1104                            img_source: LinkedImageSource::Inline(img_url.to_string()),
1105                            link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1106                        });
1107                        remaining = &remaining[match_end..];
1108                    } else {
1109                        elements.push(Element::Text("[".to_string()));
1110                        remaining = &remaining[1..];
1111                    }
1112                }
1113                // Pattern 4: [![alt][ref]][ref] - reference image in reference link
1114                "linked_image_rr" => {
1115                    if let Some(caps) = LINKED_IMAGE_REF_REF.captures(remaining) {
1116                        let alt = caps.get(1).map_or("", |m| m.as_str());
1117                        let img_ref = caps.get(2).map_or("", |m| m.as_str());
1118                        let link_ref = caps.get(3).map_or("", |m| m.as_str());
1119                        elements.push(Element::LinkedImage {
1120                            alt: alt.to_string(),
1121                            img_source: LinkedImageSource::Reference(img_ref.to_string()),
1122                            link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1123                        });
1124                        remaining = &remaining[match_end..];
1125                    } else {
1126                        elements.push(Element::Text("[".to_string()));
1127                        remaining = &remaining[1..];
1128                    }
1129                }
1130                "inline_image" => {
1131                    if let Some(caps) = INLINE_IMAGE_REGEX.captures(remaining) {
1132                        let alt = caps.get(1).map_or("", |m| m.as_str());
1133                        let url = caps.get(2).map_or("", |m| m.as_str());
1134                        elements.push(Element::InlineImage {
1135                            alt: alt.to_string(),
1136                            url: url.to_string(),
1137                        });
1138                        remaining = &remaining[match_end..];
1139                    } else {
1140                        elements.push(Element::Text("!".to_string()));
1141                        remaining = &remaining[1..];
1142                    }
1143                }
1144                "ref_image" => {
1145                    if let Some(caps) = REF_IMAGE_REGEX.captures(remaining) {
1146                        let alt = caps.get(1).map_or("", |m| m.as_str());
1147                        let reference = caps.get(2).map_or("", |m| m.as_str());
1148
1149                        if reference.is_empty() {
1150                            elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1151                        } else {
1152                            elements.push(Element::ReferenceImage {
1153                                alt: alt.to_string(),
1154                                reference: reference.to_string(),
1155                            });
1156                        }
1157                        remaining = &remaining[match_end..];
1158                    } else {
1159                        elements.push(Element::Text("!".to_string()));
1160                        remaining = &remaining[1..];
1161                    }
1162                }
1163                "footnote_ref" => {
1164                    if let Some(caps) = FOOTNOTE_REF_REGEX.captures(remaining) {
1165                        let note = caps.get(1).map_or("", |m| m.as_str());
1166                        elements.push(Element::FootnoteReference { note: note.to_string() });
1167                        remaining = &remaining[match_end..];
1168                    } else {
1169                        elements.push(Element::Text("[".to_string()));
1170                        remaining = &remaining[1..];
1171                    }
1172                }
1173                "inline_link" => {
1174                    if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1175                        let text = caps.get(1).map_or("", |m| m.as_str());
1176                        let url = caps.get(2).map_or("", |m| m.as_str());
1177                        elements.push(Element::Link {
1178                            text: text.to_string(),
1179                            url: url.to_string(),
1180                        });
1181                        remaining = &remaining[match_end..];
1182                    } else {
1183                        // Fallback - shouldn't happen
1184                        elements.push(Element::Text("[".to_string()));
1185                        remaining = &remaining[1..];
1186                    }
1187                }
1188                "ref_link" => {
1189                    if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1190                        let text = caps.get(1).map_or("", |m| m.as_str());
1191                        let reference = caps.get(2).map_or("", |m| m.as_str());
1192
1193                        if reference.is_empty() {
1194                            // Empty reference link [text][]
1195                            elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1196                        } else {
1197                            // Regular reference link [text][ref]
1198                            elements.push(Element::ReferenceLink {
1199                                text: text.to_string(),
1200                                reference: reference.to_string(),
1201                            });
1202                        }
1203                        remaining = &remaining[match_end..];
1204                    } else {
1205                        // Fallback - shouldn't happen
1206                        elements.push(Element::Text("[".to_string()));
1207                        remaining = &remaining[1..];
1208                    }
1209                }
1210                "shortcut_ref" => {
1211                    if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1212                        let reference = caps.get(1).map_or("", |m| m.as_str());
1213                        elements.push(Element::ShortcutReference {
1214                            reference: reference.to_string(),
1215                        });
1216                        remaining = &remaining[match_end..];
1217                    } else {
1218                        // Fallback - shouldn't happen
1219                        elements.push(Element::Text("[".to_string()));
1220                        remaining = &remaining[1..];
1221                    }
1222                }
1223                "wiki_link" => {
1224                    if let Some(caps) = WIKI_LINK_REGEX.captures(remaining) {
1225                        let content = caps.get(1).map_or("", |m| m.as_str());
1226                        elements.push(Element::WikiLink(content.to_string()));
1227                        remaining = &remaining[match_end..];
1228                    } else {
1229                        elements.push(Element::Text("[[".to_string()));
1230                        remaining = &remaining[2..];
1231                    }
1232                }
1233                "display_math" => {
1234                    if let Some(caps) = DISPLAY_MATH_REGEX.captures(remaining) {
1235                        let math = caps.get(1).map_or("", |m| m.as_str());
1236                        elements.push(Element::DisplayMath(math.to_string()));
1237                        remaining = &remaining[match_end..];
1238                    } else {
1239                        elements.push(Element::Text("$$".to_string()));
1240                        remaining = &remaining[2..];
1241                    }
1242                }
1243                "inline_math" => {
1244                    if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1245                        let math = caps.get(1).map_or("", |m| m.as_str());
1246                        elements.push(Element::InlineMath(math.to_string()));
1247                        remaining = &remaining[match_end..];
1248                    } else {
1249                        elements.push(Element::Text("$".to_string()));
1250                        remaining = &remaining[1..];
1251                    }
1252                }
1253                // Note: "strikethrough" case removed - now handled by pulldown-cmark
1254                "emoji" => {
1255                    if let Some(caps) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1256                        let emoji = caps.get(1).map_or("", |m| m.as_str());
1257                        elements.push(Element::EmojiShortcode(emoji.to_string()));
1258                        remaining = &remaining[match_end..];
1259                    } else {
1260                        elements.push(Element::Text(":".to_string()));
1261                        remaining = &remaining[1..];
1262                    }
1263                }
1264                "html_entity" => {
1265                    // HTML entities are captured whole
1266                    elements.push(Element::HtmlEntity(remaining[pos..match_end].to_string()));
1267                    remaining = &remaining[match_end..];
1268                }
1269                "hugo_shortcode" => {
1270                    // Hugo shortcodes are atomic elements - preserve them exactly
1271                    elements.push(Element::HugoShortcode(remaining[pos..match_end].to_string()));
1272                    remaining = &remaining[match_end..];
1273                }
1274                "autolink" => {
1275                    // Autolinks are atomic elements - preserve them exactly
1276                    elements.push(Element::Autolink(remaining[pos..match_end].to_string()));
1277                    remaining = &remaining[match_end..];
1278                }
1279                "html_tag" => {
1280                    // HTML tags are captured whole
1281                    elements.push(Element::HtmlTag(remaining[pos..match_end].to_string()));
1282                    remaining = &remaining[match_end..];
1283                }
1284                _ => {
1285                    // Unknown pattern, treat as text
1286                    elements.push(Element::Text("[".to_string()));
1287                    remaining = &remaining[1..];
1288                }
1289            }
1290        } else {
1291            // Process non-link special characters
1292
1293            // Add any text before the special character
1294            if next_special > 0 && next_special < remaining.len() {
1295                elements.push(Element::Text(remaining[..next_special].to_string()));
1296                remaining = &remaining[next_special..];
1297            }
1298
1299            // Process the special element
1300            match special_type {
1301                "code" => {
1302                    // Find end of code
1303                    if let Some(code_end) = remaining[1..].find('`') {
1304                        let code = &remaining[1..=code_end];
1305                        elements.push(Element::Code(code.to_string()));
1306                        remaining = &remaining[1 + code_end + 1..];
1307                    } else {
1308                        // No closing backtick, treat as text
1309                        elements.push(Element::Text(remaining.to_string()));
1310                        break;
1311                    }
1312                }
1313                "attr_list" => {
1314                    elements.push(Element::AttrList(remaining[..attr_list_len].to_string()));
1315                    remaining = &remaining[attr_list_len..];
1316                }
1317                "pulldown_emphasis" => {
1318                    // Use pre-extracted emphasis/strikethrough span from pulldown-cmark
1319                    if let Some(span) = pulldown_emphasis {
1320                        let span_len = span.end - span.start;
1321                        if span.is_strikethrough {
1322                            elements.push(Element::Strikethrough(span.content.clone()));
1323                        } else if span.is_strong {
1324                            elements.push(Element::Bold {
1325                                content: span.content.clone(),
1326                                underscore: span.uses_underscore,
1327                            });
1328                        } else {
1329                            elements.push(Element::Italic {
1330                                content: span.content.clone(),
1331                                underscore: span.uses_underscore,
1332                            });
1333                        }
1334                        remaining = &remaining[span_len..];
1335                    } else {
1336                        // Fallback - shouldn't happen
1337                        elements.push(Element::Text(remaining[..1].to_string()));
1338                        remaining = &remaining[1..];
1339                    }
1340                }
1341                _ => {
1342                    // No special elements found, add all remaining text
1343                    elements.push(Element::Text(remaining.to_string()));
1344                    break;
1345                }
1346            }
1347        }
1348    }
1349
1350    elements
1351}
1352
1353/// Reflow elements for sentence-per-line mode
1354fn reflow_elements_sentence_per_line(
1355    elements: &[Element],
1356    custom_abbreviations: &Option<Vec<String>>,
1357    require_sentence_capital: bool,
1358) -> Vec<String> {
1359    let abbreviations = get_abbreviations(custom_abbreviations);
1360    let mut lines = Vec::new();
1361    let mut current_line = String::new();
1362
1363    for (idx, element) in elements.iter().enumerate() {
1364        let element_str = format!("{element}");
1365
1366        // For text elements, split into sentences
1367        if let Element::Text(text) = element {
1368            // Simply append text - it already has correct spacing from tokenization
1369            let combined = format!("{current_line}{text}");
1370            // Use the pre-computed abbreviations set to avoid redundant computation
1371            let sentences = split_into_sentences_with_set(&combined, &abbreviations, require_sentence_capital);
1372
1373            if sentences.len() > 1 {
1374                // We found sentence boundaries
1375                for (i, sentence) in sentences.iter().enumerate() {
1376                    if i == 0 {
1377                        // First sentence might continue from previous elements
1378                        // But check if it ends with an abbreviation
1379                        let trimmed = sentence.trim();
1380
1381                        if text_ends_with_abbreviation(trimmed, &abbreviations) {
1382                            // Don't emit yet - this sentence ends with abbreviation, continue accumulating
1383                            current_line.clone_from(sentence);
1384                        } else {
1385                            // Normal case - emit the first sentence
1386                            lines.push(sentence.clone());
1387                            current_line.clear();
1388                        }
1389                    } else if i == sentences.len() - 1 {
1390                        // Last sentence: check if it's complete or incomplete
1391                        let trimmed = sentence.trim();
1392                        let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1393
1394                        if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1395                            // Complete sentence - emit it immediately
1396                            lines.push(sentence.clone());
1397                            current_line.clear();
1398                        } else {
1399                            // Incomplete sentence - save for next iteration
1400                            current_line.clone_from(sentence);
1401                        }
1402                    } else {
1403                        // Complete sentences in the middle
1404                        lines.push(sentence.clone());
1405                    }
1406                }
1407            } else {
1408                // Single sentence - check if it's complete
1409                let trimmed = combined.trim();
1410
1411                // If the combined result is only whitespace, don't accumulate it.
1412                // This prevents leading spaces on subsequent elements when lines
1413                // are joined with spaces during reflow iteration.
1414                if trimmed.is_empty() {
1415                    continue;
1416                }
1417
1418                let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1419
1420                if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1421                    // Complete single sentence - emit it
1422                    lines.push(trimmed.to_string());
1423                    current_line.clear();
1424                } else {
1425                    // Incomplete sentence - continue accumulating
1426                    current_line = combined;
1427                }
1428            }
1429        } else if let Element::Italic { content, underscore } = element {
1430            // Handle italic elements - may contain multiple sentences that need continuation
1431            let marker = if *underscore { "_" } else { "*" };
1432            handle_emphasis_sentence_split(
1433                content,
1434                marker,
1435                &abbreviations,
1436                require_sentence_capital,
1437                &mut current_line,
1438                &mut lines,
1439            );
1440        } else if let Element::Bold { content, underscore } = element {
1441            // Handle bold elements - may contain multiple sentences that need continuation
1442            let marker = if *underscore { "__" } else { "**" };
1443            handle_emphasis_sentence_split(
1444                content,
1445                marker,
1446                &abbreviations,
1447                require_sentence_capital,
1448                &mut current_line,
1449                &mut lines,
1450            );
1451        } else if let Element::Strikethrough(content) = element {
1452            // Handle strikethrough elements - may contain multiple sentences that need continuation
1453            handle_emphasis_sentence_split(
1454                content,
1455                "~~",
1456                &abbreviations,
1457                require_sentence_capital,
1458                &mut current_line,
1459                &mut lines,
1460            );
1461        } else {
1462            // Non-text, non-emphasis elements (Code, Links, etc.)
1463            // Check if this element is adjacent to the preceding text (no space between)
1464            let is_adjacent = if idx > 0 {
1465                match &elements[idx - 1] {
1466                    Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1467                    _ => true,
1468                }
1469            } else {
1470                false
1471            };
1472
1473            // Add space before element if needed, but not for adjacent elements
1474            if !is_adjacent
1475                && !current_line.is_empty()
1476                && !current_line.ends_with(' ')
1477                && !current_line.ends_with('(')
1478                && !current_line.ends_with('[')
1479            {
1480                current_line.push(' ');
1481            }
1482            current_line.push_str(&element_str);
1483        }
1484    }
1485
1486    // Add any remaining content
1487    if !current_line.is_empty() {
1488        lines.push(current_line.trim().to_string());
1489    }
1490    lines
1491}
1492
1493/// Handle splitting emphasis content at sentence boundaries while preserving markers
1494fn handle_emphasis_sentence_split(
1495    content: &str,
1496    marker: &str,
1497    abbreviations: &HashSet<String>,
1498    require_sentence_capital: bool,
1499    current_line: &mut String,
1500    lines: &mut Vec<String>,
1501) {
1502    // Split the emphasis content into sentences
1503    let sentences = split_into_sentences_with_set(content, abbreviations, require_sentence_capital);
1504
1505    if sentences.len() <= 1 {
1506        // Single sentence or no boundaries - treat as atomic
1507        if !current_line.is_empty()
1508            && !current_line.ends_with(' ')
1509            && !current_line.ends_with('(')
1510            && !current_line.ends_with('[')
1511        {
1512            current_line.push(' ');
1513        }
1514        current_line.push_str(marker);
1515        current_line.push_str(content);
1516        current_line.push_str(marker);
1517
1518        // Check if the emphasis content ends with sentence punctuation - if so, emit
1519        let trimmed = content.trim();
1520        let ends_with_punct = ends_with_sentence_punct(trimmed);
1521        if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1522            lines.push(current_line.clone());
1523            current_line.clear();
1524        }
1525    } else {
1526        // Multiple sentences - each gets its own emphasis markers
1527        for (i, sentence) in sentences.iter().enumerate() {
1528            let trimmed = sentence.trim();
1529            if trimmed.is_empty() {
1530                continue;
1531            }
1532
1533            if i == 0 {
1534                // First sentence: combine with current_line and emit
1535                if !current_line.is_empty()
1536                    && !current_line.ends_with(' ')
1537                    && !current_line.ends_with('(')
1538                    && !current_line.ends_with('[')
1539                {
1540                    current_line.push(' ');
1541                }
1542                current_line.push_str(marker);
1543                current_line.push_str(trimmed);
1544                current_line.push_str(marker);
1545
1546                // Check if this is a complete sentence
1547                let ends_with_punct = ends_with_sentence_punct(trimmed);
1548                if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1549                    lines.push(current_line.clone());
1550                    current_line.clear();
1551                }
1552            } else if i == sentences.len() - 1 {
1553                // Last sentence: check if complete
1554                let ends_with_punct = ends_with_sentence_punct(trimmed);
1555
1556                let mut line = String::new();
1557                line.push_str(marker);
1558                line.push_str(trimmed);
1559                line.push_str(marker);
1560
1561                if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1562                    lines.push(line);
1563                } else {
1564                    // Incomplete - keep in current_line for potential continuation
1565                    *current_line = line;
1566                }
1567            } else {
1568                // Middle sentences: emit with markers
1569                let mut line = String::new();
1570                line.push_str(marker);
1571                line.push_str(trimmed);
1572                line.push_str(marker);
1573                lines.push(line);
1574            }
1575        }
1576    }
1577}
1578
1579/// English break-words used for semantic line break splitting.
1580/// These are conjunctions and relative pronouns where a line break
1581/// reads naturally.
1582const BREAK_WORDS: &[&str] = &[
1583    "and",
1584    "or",
1585    "but",
1586    "nor",
1587    "yet",
1588    "so",
1589    "for",
1590    "which",
1591    "that",
1592    "because",
1593    "when",
1594    "if",
1595    "while",
1596    "where",
1597    "although",
1598    "though",
1599    "unless",
1600    "since",
1601    "after",
1602    "before",
1603    "until",
1604    "as",
1605    "once",
1606    "whether",
1607    "however",
1608    "therefore",
1609    "moreover",
1610    "furthermore",
1611    "nevertheless",
1612    "whereas",
1613];
1614
1615/// Check if a character is clause punctuation for semantic line breaks
1616fn is_clause_punctuation(c: char) -> bool {
1617    matches!(c, ',' | ';' | ':' | '\u{2014}') // comma, semicolon, colon, em dash
1618}
1619
1620/// Find the closing `)` that balances the `(` at the start of `slice`.
1621///
1622/// `offset` is the byte position of the `(` in the original full-line string;
1623/// it is used to translate local byte positions into global positions for
1624/// element-span lookups.  Parens inside markdown element spans are skipped so
1625/// that, e.g., the closing `)` of an inline link does not prematurely end the
1626/// scan.  The char's *start* byte (not byte-after) is used for the span check
1627/// so that closing element delimiters — which sit exactly at the span's
1628/// exclusive-end boundary — are correctly excluded.
1629///
1630/// Returns `(end_local, inner)` where `end_local` is the byte offset within
1631/// `slice` just past the closing `)`, and `inner` is the content between the
1632/// outermost `(` and `)`.
1633fn paren_group_end<'a>(slice: &'a str, element_spans: &[(usize, usize)], offset: usize) -> Option<(usize, &'a str)> {
1634    debug_assert!(slice.starts_with('('));
1635    let mut depth: i32 = 0;
1636    for (local_byte, c) in slice.char_indices() {
1637        let global_byte = offset + local_byte;
1638        // When depth > 0, skip parens that belong to a markdown element.
1639        // Use the char's start byte so that a closing element delimiter
1640        // (whose byte_after equals the span's exclusive end) is treated as
1641        // inside the element rather than outside it.
1642        if depth > 0 && is_inside_element(global_byte, element_spans) {
1643            continue;
1644        }
1645        match c {
1646            '(' => depth += 1,
1647            ')' => {
1648                depth -= 1;
1649                if depth == 0 {
1650                    let end = local_byte + 1;
1651                    let inner = &slice[1..local_byte];
1652                    return Some((end, inner));
1653                }
1654            }
1655            _ => {}
1656        }
1657    }
1658    None
1659}
1660
1661/// Split a line at a parenthetical boundary for semantic line breaks.
1662///
1663/// Two strategies are tried in order:
1664///
1665/// 1. **Leading parenthetical** — if the line begins with `(`, isolate the
1666///    entire balanced group on this line and start the rest on the next.
1667///    This handles lines produced by a prior split that placed a `(` at the
1668///    very beginning.
1669///
1670/// 2. **Mid-line parenthetical** — find the rightmost balanced `(…)` whose
1671///    content spans multiple words and whose preceding text fits within
1672///    `[min_first_len, line_length]`.  Split just before the `(` so the
1673///    parenthetical begins the following line.
1674///
1675/// Parentheses that fall inside markdown element spans (links, code, etc.)
1676/// are ignored in both strategies.
1677fn split_at_parenthetical(
1678    text: &str,
1679    line_length: usize,
1680    element_spans: &[(usize, usize)],
1681    length_mode: ReflowLengthMode,
1682) -> Option<(String, String)> {
1683    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1684
1685    // Strategy 1: text starts with '(' — isolate the parenthetical as its own line.
1686    if text.starts_with('(')
1687        && let Some((end_local, inner)) = paren_group_end(text, element_spans, 0)
1688        && inner.contains(' ')
1689    {
1690        // If closing quotes or clause punctuation immediately follow the closing
1691        // ')', attach them to the parenthetical so the continuation line does
1692        // not start with a bare quote, comma, or semicolon.
1693        let tail = &text[end_local..];
1694        let attached_len = tail
1695            .char_indices()
1696            .take_while(|(_, c)| is_closing_quote(*c) || is_clause_punctuation(*c))
1697            .last()
1698            .map_or(0, |(idx, c)| idx + c.len_utf8());
1699        let first_end = end_local + attached_len;
1700        let rest_start = first_end;
1701        let first = &text[..first_end];
1702        let first_len = display_len(first, length_mode);
1703        // No MIN_SPLIT_RATIO check: a parenthetical unit is always a valid
1704        // semantic line regardless of its length.
1705        if first_len <= line_length {
1706            let rest = text[rest_start..].trim_start();
1707            if !rest.is_empty() {
1708                return Some((first.to_string(), rest.to_string()));
1709            }
1710        }
1711    }
1712
1713    // Strategy 2: find the rightmost multi-word '(' whose preceding text fits.
1714    let mut best_open_byte: Option<usize> = None;
1715    let mut pos = 0usize;
1716    while pos < text.len() {
1717        // '(' is ASCII so a single-byte comparison is safe in UTF-8.
1718        if text.as_bytes()[pos] != b'(' {
1719            let c = text[pos..].chars().next().unwrap();
1720            pos += c.len_utf8();
1721            continue;
1722        }
1723        // Skip '(' that are part of a markdown element (use start byte).
1724        if is_inside_element(pos, element_spans) {
1725            pos += 1;
1726            continue;
1727        }
1728        if let Some((end_local, inner)) = paren_group_end(&text[pos..], element_spans, pos) {
1729            let first = text[..pos].trim_end();
1730            let first_len = display_len(first, length_mode);
1731            if !first.is_empty()
1732                && first_len >= min_first_len
1733                && first_len <= line_length
1734                && inner.contains(' ')
1735                && best_open_byte.is_none_or(|prev| pos > prev)
1736            {
1737                best_open_byte = Some(pos);
1738            }
1739            pos += end_local;
1740        } else {
1741            pos += 1;
1742        }
1743    }
1744
1745    let open_byte = best_open_byte?;
1746    let first = text[..open_byte].trim_end().to_string();
1747    let rest = text[open_byte..].to_string();
1748    if first.is_empty() || rest.trim().is_empty() {
1749        return None;
1750    }
1751    Some((first, rest))
1752}
1753
1754/// Compute element spans for a flat text representation of elements.
1755/// Returns Vec of (start, end) byte offsets for non-Text elements,
1756/// so we can check that a split position doesn't fall inside them.
1757fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1758    let mut spans = Vec::new();
1759    let mut offset = 0;
1760    for element in elements {
1761        let rendered = format!("{element}");
1762        let len = rendered.len();
1763        if !matches!(element, Element::Text(_)) {
1764            spans.push((offset, offset + len));
1765        }
1766        offset += len;
1767    }
1768    spans
1769}
1770
1771/// Check if a byte position falls inside any non-Text element span
1772fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1773    spans.iter().any(|(start, end)| pos > *start && pos < *end)
1774}
1775
1776/// Minimum fraction of line_length that the first part of a split must occupy.
1777/// Prevents awkwardly short first lines like "A," or "Note:" on their own.
1778const MIN_SPLIT_RATIO: f64 = 0.3;
1779
1780/// Split a line at the latest clause punctuation that keeps the first part
1781/// within `line_length`. Returns None if no valid split point exists or if
1782/// the split would create an unreasonably short first line.
1783fn split_at_clause_punctuation(
1784    text: &str,
1785    line_length: usize,
1786    element_spans: &[(usize, usize)],
1787    length_mode: ReflowLengthMode,
1788) -> Option<(String, String)> {
1789    let chars: Vec<char> = text.chars().collect();
1790    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1791
1792    // Find the char index where accumulated display width exceeds line_length
1793    let mut width_acc = 0;
1794    let mut search_end_char = 0;
1795    for (idx, &c) in chars.iter().enumerate() {
1796        let c_width = display_len(&c.to_string(), length_mode);
1797        if width_acc + c_width > line_length {
1798            break;
1799        }
1800        width_acc += c_width;
1801        search_end_char = idx + 1;
1802    }
1803
1804    // Scan backwards tracking parenthesis depth to skip clause punctuation
1805    // inside plain-text parenthetical groups.  Scanning right-to-left means
1806    // ')' opens a depth level and '(' closes it.  Parens that belong to a
1807    // markdown element are excluded using the char's start byte (not byte-after)
1808    // so that closing element delimiters at the span boundary are correctly
1809    // treated as part of the element.
1810    let mut paren_depth: i32 = 0;
1811    let mut best_pos = None;
1812    for i in (0..search_end_char).rev() {
1813        // Start byte of char i (for paren element check)
1814        let byte_start: usize = chars[..i].iter().map(|c| c.len_utf8()).sum();
1815        // Byte just after char i (for clause punctuation element check — existing convention)
1816        let byte_after: usize = byte_start + chars[i].len_utf8();
1817
1818        if !is_inside_element(byte_start, element_spans) {
1819            match chars[i] {
1820                ')' => paren_depth += 1,
1821                '(' => paren_depth = paren_depth.saturating_sub(1),
1822                _ => {}
1823            }
1824        }
1825
1826        if paren_depth == 0 && is_clause_punctuation(chars[i]) && !is_inside_element(byte_after, element_spans) {
1827            best_pos = Some(i);
1828            break;
1829        }
1830    }
1831
1832    let pos = best_pos?;
1833
1834    // Reject splits that create very short first lines
1835    let first: String = chars[..=pos].iter().collect();
1836    let first_display_len = display_len(&first, length_mode);
1837    if first_display_len < min_first_len {
1838        return None;
1839    }
1840
1841    // Split after the punctuation character
1842    let rest: String = chars[pos + 1..].iter().collect();
1843    let rest = rest.trim_start().to_string();
1844
1845    if rest.is_empty() {
1846        return None;
1847    }
1848
1849    Some((first, rest))
1850}
1851
1852/// Compute plain-text paren-depth at each byte offset in `text`.
1853///
1854/// Returns a `Vec<i32>` of length `text.len()` where entry `i` is the
1855/// nesting depth at byte `i` — counting only `(` and `)` that fall
1856/// outside markdown element spans.  This lets callers quickly check
1857/// whether a byte position lies inside a plain-text parenthetical group.
1858fn paren_depth_map(text: &str, element_spans: &[(usize, usize)]) -> Vec<i32> {
1859    let mut map = vec![0i32; text.len()];
1860    let mut depth = 0i32;
1861    for (byte, c) in text.char_indices() {
1862        if !is_inside_element(byte, element_spans) {
1863            match c {
1864                '(' => depth += 1,
1865                ')' => depth = depth.saturating_sub(1),
1866                _ => {}
1867            }
1868        }
1869        // Fill the depth value for every byte of this (possibly multi-byte) char.
1870        let end = (byte + c.len_utf8()).min(map.len());
1871        for slot in &mut map[byte..end] {
1872            *slot = depth;
1873        }
1874    }
1875    map
1876}
1877
1878/// Return `true` if `line` is a complete, balanced, multi-word parenthetical
1879/// group — i.e. it starts with `(`, ends with `)` (possibly followed by
1880/// clause punctuation), has balanced parens throughout, and the inner content
1881/// contains at least one space (matching the ≥2-word threshold used by
1882/// `split_at_parenthetical` when deciding to split).
1883///
1884/// Used to prevent the short-line merge step from collapsing intentional
1885/// parenthetical splits back into the previous line.
1886fn is_standalone_parenthetical(line: &str) -> bool {
1887    let trimmed = line.trim();
1888    if !trimmed.starts_with('(') {
1889        return false;
1890    }
1891    // Strip optional trailing clause punctuation to find the real end.
1892    let core = trimmed.trim_end_matches(|c: char| is_clause_punctuation(c));
1893    if !core.ends_with(')') {
1894        return false;
1895    }
1896    // Inner content must span multiple words (same threshold as split_at_parenthetical).
1897    let inner = &core[1..core.len() - 1];
1898    if !inner.contains(' ') {
1899        return false;
1900    }
1901    // Verify the parens are balanced (depth returns to 0 at the last ')').
1902    let mut depth = 0i32;
1903    for c in core.chars() {
1904        match c {
1905            '(' => depth += 1,
1906            ')' => depth -= 1,
1907            _ => {}
1908        }
1909        if depth < 0 {
1910            return false;
1911        }
1912    }
1913    depth == 0
1914}
1915
1916/// Split a line before the latest break-word that keeps the first part
1917/// within `line_length`. Returns None if no valid split point exists or if
1918/// the split would create an unreasonably short first line.
1919fn split_at_break_word(
1920    text: &str,
1921    line_length: usize,
1922    element_spans: &[(usize, usize)],
1923    length_mode: ReflowLengthMode,
1924) -> Option<(String, String)> {
1925    let lower = text.to_lowercase();
1926    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1927    let mut best_split: Option<(usize, usize)> = None; // (byte_start, word_len_bytes)
1928
1929    // Build a paren-depth map so we can skip break-words inside plain-text
1930    // parenthetical groups (matching the protection added to split_at_clause_punctuation).
1931    let depth_map = paren_depth_map(text, element_spans);
1932
1933    for &word in BREAK_WORDS {
1934        let mut search_start = 0;
1935        while let Some(pos) = lower[search_start..].find(word) {
1936            let abs_pos = search_start + pos;
1937
1938            // Verify it's a word boundary: preceded by space, followed by space
1939            let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1940            let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1941
1942            if preceded_by_space && followed_by_space {
1943                // The break goes BEFORE the word, so first part ends at abs_pos - 1
1944                let first_part = text[..abs_pos].trim_end();
1945                let first_part_len = display_len(first_part, length_mode);
1946
1947                // Skip break-words inside plain-text parenthetical groups.
1948                let inside_paren = depth_map.get(abs_pos).is_some_and(|&d| d > 0);
1949
1950                if first_part_len >= min_first_len
1951                    && first_part_len <= line_length
1952                    && !is_inside_element(abs_pos, element_spans)
1953                    && !inside_paren
1954                {
1955                    // Prefer the latest valid split point
1956                    if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1957                        best_split = Some((abs_pos, word.len()));
1958                    }
1959                }
1960            }
1961
1962            search_start = abs_pos + word.len();
1963        }
1964    }
1965
1966    let (byte_start, _word_len) = best_split?;
1967
1968    let first = text[..byte_start].trim_end().to_string();
1969    let rest = text[byte_start..].to_string();
1970
1971    if first.is_empty() || rest.trim().is_empty() {
1972        return None;
1973    }
1974
1975    Some((first, rest))
1976}
1977
1978/// Recursively cascade-split a line that exceeds line_length.
1979/// Tries clause punctuation first, then break-words, then word wrap.
1980fn cascade_split_line(
1981    text: &str,
1982    line_length: usize,
1983    abbreviations: &Option<Vec<String>>,
1984    length_mode: ReflowLengthMode,
1985    attr_lists: bool,
1986) -> Vec<String> {
1987    if line_length == 0 || display_len(text, length_mode) <= line_length {
1988        return vec![text.to_string()];
1989    }
1990
1991    let elements = parse_markdown_elements_inner(text, attr_lists);
1992    let element_spans = compute_element_spans(&elements);
1993
1994    // Try parenthetical boundary split (before clause punctuation so that
1995    // multi-word parentheticals are kept intact as semantic units)
1996    if let Some((first, rest)) = split_at_parenthetical(text, line_length, &element_spans, length_mode) {
1997        let mut result = vec![first];
1998        result.extend(cascade_split_line(
1999            &rest,
2000            line_length,
2001            abbreviations,
2002            length_mode,
2003            attr_lists,
2004        ));
2005        return result;
2006    }
2007
2008    // Try clause punctuation split
2009    if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
2010        let mut result = vec![first];
2011        result.extend(cascade_split_line(
2012            &rest,
2013            line_length,
2014            abbreviations,
2015            length_mode,
2016            attr_lists,
2017        ));
2018        return result;
2019    }
2020
2021    // Try break-word split
2022    if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
2023        let mut result = vec![first];
2024        result.extend(cascade_split_line(
2025            &rest,
2026            line_length,
2027            abbreviations,
2028            length_mode,
2029            attr_lists,
2030        ));
2031        return result;
2032    }
2033
2034    // Fallback: word wrap using existing reflow_elements
2035    let options = ReflowOptions {
2036        line_length,
2037        break_on_sentences: false,
2038        preserve_breaks: false,
2039        sentence_per_line: false,
2040        semantic_line_breaks: false,
2041        abbreviations: abbreviations.clone(),
2042        length_mode,
2043        attr_lists,
2044        require_sentence_capital: true,
2045        max_list_continuation_indent: None,
2046    };
2047    reflow_elements(&elements, &options)
2048}
2049
2050/// Reflow elements using semantic line breaks strategy:
2051/// 1. Split at sentence boundaries (always)
2052/// 2. For lines exceeding line_length, cascade through clause punct → break-words → word wrap
2053fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
2054    // Step 1: Split into sentences using existing sentence-per-line logic
2055    let sentence_lines =
2056        reflow_elements_sentence_per_line(elements, &options.abbreviations, options.require_sentence_capital);
2057
2058    // Step 2: For each sentence line, apply cascading splits if it exceeds line_length
2059    // When line_length is 0 (unlimited), skip cascading — sentence splits only
2060    if options.line_length == 0 {
2061        return sentence_lines;
2062    }
2063
2064    let length_mode = options.length_mode;
2065    let mut result = Vec::new();
2066    for line in sentence_lines {
2067        if display_len(&line, length_mode) <= options.line_length {
2068            result.push(line);
2069        } else {
2070            result.extend(cascade_split_line(
2071                &line,
2072                options.line_length,
2073                &options.abbreviations,
2074                length_mode,
2075                options.attr_lists,
2076            ));
2077        }
2078    }
2079
2080    // Step 3: Merge very short trailing lines back into the previous line.
2081    // Word wrap can produce lines like "was" or "see" on their own, which reads poorly.
2082    let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
2083    let mut merged: Vec<String> = Vec::with_capacity(result.len());
2084    for line in result {
2085        if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
2086            // Don't merge a line that is itself a standalone parenthetical group —
2087            // it was placed on its own line intentionally by split_at_parenthetical.
2088            if is_standalone_parenthetical(&line) {
2089                merged.push(line);
2090                continue;
2091            }
2092
2093            // Don't merge across sentence boundaries — sentence splits are intentional
2094            let prev_ends_at_sentence = {
2095                let trimmed = merged.last().unwrap().trim_end();
2096                trimmed
2097                    .chars()
2098                    .rev()
2099                    .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
2100                    .is_some_and(|c| matches!(c, '.' | '!' | '?'))
2101            };
2102
2103            if !prev_ends_at_sentence {
2104                let prev = merged.last_mut().unwrap();
2105                let combined = format!("{prev} {line}");
2106                // Only merge if the combined line fits within the limit
2107                if display_len(&combined, length_mode) <= options.line_length {
2108                    *prev = combined;
2109                    continue;
2110                }
2111            }
2112        }
2113        merged.push(line);
2114    }
2115    merged
2116}
2117
2118/// Find the last space in `line` that is safe to split at.
2119/// Safe spaces are those NOT inside rendered non-Text elements.
2120/// `element_spans` contains (start, end) byte ranges of non-Text elements in the line.
2121/// Find the last space in `line` that is not inside any element span.
2122/// Spans use exclusive bounds (pos > start && pos < end) because element
2123/// delimiters (e.g., `[`, `]`, `(`, `)`, `<`, `>`, `` ` ``) are never
2124/// spaces, so only interior positions need protection.
2125fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
2126    line.char_indices()
2127        .rev()
2128        .map(|(pos, _)| pos)
2129        .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
2130}
2131
2132/// Reflow elements into lines that fit within the line length
2133fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
2134    let mut lines = Vec::new();
2135    let mut current_line = String::new();
2136    let mut current_length = 0;
2137    // Track byte spans of non-Text elements in current_line for safe splitting
2138    let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
2139    let length_mode = options.length_mode;
2140
2141    for (idx, element) in elements.iter().enumerate() {
2142        let element_str = format!("{element}");
2143        let element_len = element.display_width(length_mode);
2144
2145        // Determine adjacency from the original elements, not from current_line.
2146        // Elements are adjacent when there's no whitespace between them in the source:
2147        // - Text("v") → HugoShortcode("{{<...>}}") = adjacent (text has no trailing space)
2148        // - Text(" and ") → InlineLink("[a](url)") = NOT adjacent (text has trailing space)
2149        // - HugoShortcode("{{<...>}}") → Text(",") = adjacent (text has no leading space)
2150        let is_adjacent_to_prev = if idx > 0 {
2151            match (&elements[idx - 1], element) {
2152                (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
2153                (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
2154                _ => true,
2155            }
2156        } else {
2157            false
2158        };
2159
2160        // For text elements that might need breaking
2161        if let Element::Text(text) = element {
2162            // Check if original text had leading whitespace
2163            let has_leading_space = text.starts_with(char::is_whitespace);
2164            // If this is a text element, always process it word by word
2165            let words: Vec<&str> = text.split_whitespace().collect();
2166
2167            for (i, word) in words.iter().enumerate() {
2168                let word_len = display_len(word, length_mode);
2169                // Check if this "word" is just punctuation that should stay attached
2170                let is_trailing_punct = word
2171                    .chars()
2172                    .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
2173
2174                // First word of text adjacent to preceding non-text element
2175                // must stay attached (e.g., shortcode followed by punctuation or text)
2176                let is_first_adjacent = i == 0 && is_adjacent_to_prev;
2177
2178                if is_first_adjacent {
2179                    // Attach directly without space, preventing line break
2180                    if current_length + word_len > options.line_length && current_length > 0 {
2181                        // Would exceed — break before the adjacent group
2182                        // Use element-aware space search to avoid splitting inside links/code/etc.
2183                        if let Some(last_space) = rfind_safe_space(&current_line, &current_line_element_spans) {
2184                            let before = current_line[..last_space].trim_end().to_string();
2185                            let after = current_line[last_space + 1..].to_string();
2186                            lines.push(before);
2187                            current_line = format!("{after}{word}");
2188                            current_length = display_len(&current_line, length_mode);
2189                            current_line_element_spans.clear();
2190                        } else {
2191                            current_line.push_str(word);
2192                            current_length += word_len;
2193                        }
2194                    } else {
2195                        current_line.push_str(word);
2196                        current_length += word_len;
2197                    }
2198                } else if current_length > 0
2199                    && current_length + 1 + word_len > options.line_length
2200                    && !is_trailing_punct
2201                {
2202                    // Start a new line (but never for trailing punctuation)
2203                    lines.push(current_line.trim().to_string());
2204                    current_line = word.to_string();
2205                    current_length = word_len;
2206                    current_line_element_spans.clear();
2207                } else {
2208                    // Add word to current line
2209                    // Only add space if: we have content AND (this isn't the first word OR original had leading space)
2210                    // AND this isn't trailing punctuation (which attaches directly)
2211                    if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
2212                        current_line.push(' ');
2213                        current_length += 1;
2214                    }
2215                    current_line.push_str(word);
2216                    current_length += word_len;
2217                }
2218            }
2219        } else if matches!(
2220            element,
2221            Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
2222        ) && element_len > options.line_length
2223        {
2224            // Italic, bold, and strikethrough with content longer than line_length need word wrapping.
2225            // Split content word-by-word, attach the opening marker to the first word
2226            // and the closing marker to the last word.
2227            let (content, marker): (&str, &str) = match element {
2228                Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
2229                Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
2230                Element::Strikethrough(content) => (content.as_str(), "~~"),
2231                _ => unreachable!(),
2232            };
2233
2234            let words: Vec<&str> = content.split_whitespace().collect();
2235            let n = words.len();
2236
2237            if n == 0 {
2238                // Empty span — treat as atomic
2239                let full = format!("{marker}{marker}");
2240                let full_len = display_len(&full, length_mode);
2241                if !is_adjacent_to_prev && current_length > 0 {
2242                    current_line.push(' ');
2243                    current_length += 1;
2244                }
2245                current_line.push_str(&full);
2246                current_length += full_len;
2247            } else {
2248                for (i, word) in words.iter().enumerate() {
2249                    let is_first = i == 0;
2250                    let is_last = i == n - 1;
2251                    let word_str: String = match (is_first, is_last) {
2252                        (true, true) => format!("{marker}{word}{marker}"),
2253                        (true, false) => format!("{marker}{word}"),
2254                        (false, true) => format!("{word}{marker}"),
2255                        (false, false) => word.to_string(),
2256                    };
2257                    let word_len = display_len(&word_str, length_mode);
2258
2259                    let needs_space = if is_first {
2260                        !is_adjacent_to_prev && current_length > 0
2261                    } else {
2262                        current_length > 0
2263                    };
2264
2265                    if needs_space && current_length + 1 + word_len > options.line_length {
2266                        lines.push(current_line.trim_end().to_string());
2267                        current_line = word_str;
2268                        current_length = word_len;
2269                        current_line_element_spans.clear();
2270                    } else {
2271                        if needs_space {
2272                            current_line.push(' ');
2273                            current_length += 1;
2274                        }
2275                        current_line.push_str(&word_str);
2276                        current_length += word_len;
2277                    }
2278                }
2279            }
2280        } else {
2281            // For non-text elements (code, links, references), treat as atomic units
2282            // These should never be broken across lines
2283
2284            if is_adjacent_to_prev {
2285                // Adjacent to preceding text — attach directly without space
2286                if current_length + element_len > options.line_length {
2287                    // Would exceed limit — break before the adjacent word group
2288                    // Use element-aware space search to avoid splitting inside links/code/etc.
2289                    if let Some(last_space) = rfind_safe_space(&current_line, &current_line_element_spans) {
2290                        let before = current_line[..last_space].trim_end().to_string();
2291                        let after = current_line[last_space + 1..].to_string();
2292                        lines.push(before);
2293                        current_line = format!("{after}{element_str}");
2294                        current_length = display_len(&current_line, length_mode);
2295                        current_line_element_spans.clear();
2296                        // Record the element span in the new current_line
2297                        let start = after.len();
2298                        current_line_element_spans.push((start, start + element_str.len()));
2299                    } else {
2300                        // No safe space to break at — accept the long line
2301                        let start = current_line.len();
2302                        current_line.push_str(&element_str);
2303                        current_length += element_len;
2304                        current_line_element_spans.push((start, current_line.len()));
2305                    }
2306                } else {
2307                    let start = current_line.len();
2308                    current_line.push_str(&element_str);
2309                    current_length += element_len;
2310                    current_line_element_spans.push((start, current_line.len()));
2311                }
2312            } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
2313                // Not adjacent, would exceed — start new line
2314                lines.push(current_line.trim().to_string());
2315                current_line.clone_from(&element_str);
2316                current_length = element_len;
2317                current_line_element_spans.clear();
2318                current_line_element_spans.push((0, element_str.len()));
2319            } else {
2320                // Not adjacent, fits — add with space
2321                let ends_with_opener =
2322                    current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
2323                if current_length > 0 && !ends_with_opener {
2324                    current_line.push(' ');
2325                    current_length += 1;
2326                }
2327                let start = current_line.len();
2328                current_line.push_str(&element_str);
2329                current_length += element_len;
2330                current_line_element_spans.push((start, current_line.len()));
2331            }
2332        }
2333    }
2334
2335    // Don't forget the last line
2336    if !current_line.is_empty() {
2337        lines.push(current_line.trim_end().to_string());
2338    }
2339
2340    lines
2341}
2342
2343/// Reflow markdown content preserving structure
2344pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
2345    let lines: Vec<&str> = content.lines().collect();
2346    let mut result = Vec::new();
2347    let mut i = 0;
2348
2349    while i < lines.len() {
2350        let line = lines[i];
2351        let trimmed = line.trim();
2352
2353        // Preserve empty lines
2354        if trimmed.is_empty() {
2355            result.push(String::new());
2356            i += 1;
2357            continue;
2358        }
2359
2360        // Preserve headings as-is
2361        if trimmed.starts_with('#') {
2362            result.push(line.to_string());
2363            i += 1;
2364            continue;
2365        }
2366
2367        // Preserve Quarto/Pandoc div markers (:::) as-is
2368        if trimmed.starts_with(":::") {
2369            result.push(line.to_string());
2370            i += 1;
2371            continue;
2372        }
2373
2374        // Preserve fenced code blocks
2375        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2376            result.push(line.to_string());
2377            i += 1;
2378            // Copy lines until closing fence
2379            while i < lines.len() {
2380                result.push(lines[i].to_string());
2381                if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
2382                    i += 1;
2383                    break;
2384                }
2385                i += 1;
2386            }
2387            continue;
2388        }
2389
2390        // Preserve indented code blocks (4+ columns accounting for tab expansion)
2391        if calculate_indentation_width_default(line) >= 4 {
2392            // Collect all consecutive indented lines
2393            result.push(line.to_string());
2394            i += 1;
2395            while i < lines.len() {
2396                let next_line = lines[i];
2397                // Continue if next line is also indented or empty (empty lines in code blocks are ok)
2398                if calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
2399                    result.push(next_line.to_string());
2400                    i += 1;
2401                } else {
2402                    break;
2403                }
2404            }
2405            continue;
2406        }
2407
2408        // Preserve block quotes (but reflow their content)
2409        if trimmed.starts_with('>') {
2410            // find() returns byte position which is correct for str slicing
2411            // The unwrap is safe because we already verified trimmed starts with '>'
2412            let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
2413            let quote_prefix = line[0..=gt_pos].to_string();
2414            let quote_content = &line[quote_prefix.len()..].trim_start();
2415
2416            let reflowed = reflow_line(quote_content, options);
2417            for reflowed_line in &reflowed {
2418                result.push(format!("{quote_prefix} {reflowed_line}"));
2419            }
2420            i += 1;
2421            continue;
2422        }
2423
2424        // Preserve horizontal rules first (before checking for lists)
2425        if is_horizontal_rule(trimmed) {
2426            result.push(line.to_string());
2427            i += 1;
2428            continue;
2429        }
2430
2431        // Preserve lists (but not horizontal rules)
2432        if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
2433            // Find the list marker and preserve indentation
2434            let indent = line.len() - line.trim_start().len();
2435            let indent_str = " ".repeat(indent);
2436
2437            // For numbered lists, find the period and the space after it
2438            // For bullet lists, find the marker and the space after it
2439            let mut marker_end = indent;
2440            let mut content_start = indent;
2441
2442            if trimmed.chars().next().is_some_and(char::is_numeric) {
2443                // Numbered list: find the period
2444                if let Some(period_pos) = line[indent..].find('.') {
2445                    marker_end = indent + period_pos + 1; // Include the period
2446                    content_start = marker_end;
2447                    // Skip any spaces after the period to find content start
2448                    // Use byte-based check since content_start is a byte index
2449                    // This is safe because space is ASCII (single byte)
2450                    while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2451                        content_start += 1;
2452                    }
2453                }
2454            } else {
2455                // Bullet list: marker is single character
2456                marker_end = indent + 1; // Just the marker character
2457                content_start = marker_end;
2458                // Skip any spaces after the marker
2459                // Use byte-based check since content_start is a byte index
2460                // This is safe because space is ASCII (single byte)
2461                while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2462                    content_start += 1;
2463                }
2464            }
2465
2466            // Minimum indent for continuation lines (based on list marker, before checkbox)
2467            let min_continuation_indent = content_start;
2468
2469            // Detect checkbox/task list markers: [ ], [x], [X]
2470            // GFM task lists work with both unordered and ordered lists
2471            let rest = &line[content_start..];
2472            if rest.starts_with("[ ] ") || rest.starts_with("[x] ") || rest.starts_with("[X] ") {
2473                marker_end = content_start + 3; // Include the checkbox `[ ]`
2474                content_start += 4; // Skip past `[ ] `
2475            }
2476
2477            let marker = &line[indent..marker_end];
2478
2479            // Collect all content for this list item (including continuation lines)
2480            // Preserve hard breaks (2 trailing spaces) while trimming excessive whitespace
2481            let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
2482            i += 1;
2483
2484            // Collect continuation lines (indented lines that are part of this list item)
2485            // Use the base marker indent (not checkbox-extended) for collection,
2486            // since users may indent continuations to the bullet level, not the checkbox level
2487            while i < lines.len() {
2488                let next_line = lines[i];
2489                let next_trimmed = next_line.trim();
2490
2491                // Stop if we hit an empty line or another list item or special block
2492                if is_block_boundary(next_trimmed) {
2493                    break;
2494                }
2495
2496                // Check if this line is indented (continuation of list item)
2497                let next_indent = next_line.len() - next_line.trim_start().len();
2498                if next_indent >= min_continuation_indent {
2499                    // This is a continuation line - add its content
2500                    // Preserve hard breaks while trimming excessive whitespace
2501                    let trimmed_start = next_line.trim_start();
2502                    list_content.push(trim_preserving_hard_break(trimmed_start));
2503                    i += 1;
2504                } else {
2505                    // Not indented enough, not part of this list item
2506                    break;
2507                }
2508            }
2509
2510            // Join content, but respect hard breaks (lines ending with 2 spaces or backslash)
2511            // Hard breaks should prevent joining with the next line
2512            let combined_content = if options.preserve_breaks {
2513                list_content[0].clone()
2514            } else {
2515                // Check if any lines have hard breaks - if so, preserve the structure
2516                let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2517                if has_hard_breaks {
2518                    // Don't join lines with hard breaks - keep them separate with newlines
2519                    list_content.join("\n")
2520                } else {
2521                    // No hard breaks, safe to join with spaces
2522                    list_content.join(" ")
2523                }
2524            };
2525
2526            // Calculate the proper indentation for continuation lines
2527            let trimmed_marker = marker;
2528            let continuation_spaces = if let Some(max_indent) = options.max_list_continuation_indent {
2529                // Cap the relative indent (past the nesting level) to max_indent,
2530                // then add back the nesting indent so nested items stay correct
2531                indent + (content_start - indent).min(max_indent)
2532            } else {
2533                content_start
2534            };
2535
2536            // Adjust line length to account for list marker and space
2537            let prefix_length = indent + trimmed_marker.len() + 1;
2538
2539            // Create adjusted options with reduced line length
2540            let adjusted_options = ReflowOptions {
2541                line_length: options.line_length.saturating_sub(prefix_length),
2542                ..options.clone()
2543            };
2544
2545            let reflowed = reflow_line(&combined_content, &adjusted_options);
2546            for (j, reflowed_line) in reflowed.iter().enumerate() {
2547                if j == 0 {
2548                    result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2549                } else {
2550                    // Continuation lines aligned with text after marker
2551                    let continuation_indent = " ".repeat(continuation_spaces);
2552                    result.push(format!("{continuation_indent}{reflowed_line}"));
2553                }
2554            }
2555            continue;
2556        }
2557
2558        // Preserve tables
2559        if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2560            result.push(line.to_string());
2561            i += 1;
2562            continue;
2563        }
2564
2565        // Preserve reference definitions
2566        if trimmed.starts_with('[') && line.contains("]:") {
2567            result.push(line.to_string());
2568            i += 1;
2569            continue;
2570        }
2571
2572        // Preserve definition list items (extended markdown)
2573        if is_definition_list_item(trimmed) {
2574            result.push(line.to_string());
2575            i += 1;
2576            continue;
2577        }
2578
2579        // Check if this is a single line that doesn't need processing
2580        let mut is_single_line_paragraph = true;
2581        if i + 1 < lines.len() {
2582            let next_trimmed = lines[i + 1].trim();
2583            // Check if next line continues this paragraph
2584            if !is_block_boundary(next_trimmed) {
2585                is_single_line_paragraph = false;
2586            }
2587        }
2588
2589        // If it's a single line that fits, just add it as-is
2590        if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2591            result.push(line.to_string());
2592            i += 1;
2593            continue;
2594        }
2595
2596        // For regular paragraphs, collect consecutive lines
2597        let mut paragraph_parts = Vec::new();
2598        let mut current_part = vec![line];
2599        i += 1;
2600
2601        // If preserve_breaks is true, treat each line separately
2602        if options.preserve_breaks {
2603            // Don't collect consecutive lines - just reflow this single line
2604            let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2605                Some("\\")
2606            } else if line.ends_with("  ") {
2607                Some("  ")
2608            } else {
2609                None
2610            };
2611            let reflowed = reflow_line(line, options);
2612
2613            // Preserve hard breaks (two trailing spaces or backslash)
2614            if let Some(break_marker) = hard_break_type {
2615                if !reflowed.is_empty() {
2616                    let mut reflowed_with_break = reflowed;
2617                    let last_idx = reflowed_with_break.len() - 1;
2618                    if !has_hard_break(&reflowed_with_break[last_idx]) {
2619                        reflowed_with_break[last_idx].push_str(break_marker);
2620                    }
2621                    result.extend(reflowed_with_break);
2622                }
2623            } else {
2624                result.extend(reflowed);
2625            }
2626        } else {
2627            // Original behavior: collect consecutive lines into a paragraph
2628            while i < lines.len() {
2629                let prev_line = if !current_part.is_empty() {
2630                    current_part.last().unwrap()
2631                } else {
2632                    ""
2633                };
2634                let next_line = lines[i];
2635                let next_trimmed = next_line.trim();
2636
2637                // Stop at empty lines or special blocks
2638                if is_block_boundary(next_trimmed) {
2639                    break;
2640                }
2641
2642                // Check if previous line ends with hard break (two spaces or backslash)
2643                // or is a complete sentence in sentence_per_line mode
2644                let prev_trimmed = prev_line.trim();
2645                let abbreviations = get_abbreviations(&options.abbreviations);
2646                let ends_with_sentence = (prev_trimmed.ends_with('.')
2647                    || prev_trimmed.ends_with('!')
2648                    || prev_trimmed.ends_with('?')
2649                    || prev_trimmed.ends_with(".*")
2650                    || prev_trimmed.ends_with("!*")
2651                    || prev_trimmed.ends_with("?*")
2652                    || prev_trimmed.ends_with("._")
2653                    || prev_trimmed.ends_with("!_")
2654                    || prev_trimmed.ends_with("?_")
2655                    // Quote-terminated sentences (straight and curly quotes)
2656                    || prev_trimmed.ends_with(".\"")
2657                    || prev_trimmed.ends_with("!\"")
2658                    || prev_trimmed.ends_with("?\"")
2659                    || prev_trimmed.ends_with(".'")
2660                    || prev_trimmed.ends_with("!'")
2661                    || prev_trimmed.ends_with("?'")
2662                    || prev_trimmed.ends_with(".\u{201D}")
2663                    || prev_trimmed.ends_with("!\u{201D}")
2664                    || prev_trimmed.ends_with("?\u{201D}")
2665                    || prev_trimmed.ends_with(".\u{2019}")
2666                    || prev_trimmed.ends_with("!\u{2019}")
2667                    || prev_trimmed.ends_with("?\u{2019}"))
2668                    && !text_ends_with_abbreviation(
2669                        prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2670                        &abbreviations,
2671                    );
2672
2673                if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2674                    // Start a new part after hard break or complete sentence
2675                    paragraph_parts.push(current_part.join(" "));
2676                    current_part = vec![next_line];
2677                } else {
2678                    current_part.push(next_line);
2679                }
2680                i += 1;
2681            }
2682
2683            // Add the last part
2684            if !current_part.is_empty() {
2685                if current_part.len() == 1 {
2686                    // Single line, don't add trailing space
2687                    paragraph_parts.push(current_part[0].to_string());
2688                } else {
2689                    paragraph_parts.push(current_part.join(" "));
2690                }
2691            }
2692
2693            // Reflow each part separately, preserving hard breaks
2694            for (j, part) in paragraph_parts.iter().enumerate() {
2695                let reflowed = reflow_line(part, options);
2696                result.extend(reflowed);
2697
2698                // Preserve hard break by ensuring last line of part ends with hard break marker
2699                // Use two spaces as the default hard break format for reflows
2700                // But don't add hard breaks in sentence_per_line mode - lines are already separate
2701                if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2702                    let last_idx = result.len() - 1;
2703                    if !has_hard_break(&result[last_idx]) {
2704                        result[last_idx].push_str("  ");
2705                    }
2706                }
2707            }
2708        }
2709    }
2710
2711    // Preserve trailing newline if the original content had one
2712    let result_text = result.join("\n");
2713    if content.ends_with('\n') && !result_text.ends_with('\n') {
2714        format!("{result_text}\n")
2715    } else {
2716        result_text
2717    }
2718}
2719
2720/// Information about a reflowed paragraph
2721#[derive(Debug, Clone)]
2722pub struct ParagraphReflow {
2723    /// Starting byte offset of the paragraph in the original content
2724    pub start_byte: usize,
2725    /// Ending byte offset of the paragraph in the original content
2726    pub end_byte: usize,
2727    /// The reflowed text for this paragraph
2728    pub reflowed_text: String,
2729}
2730
2731/// A collected blockquote line used for style-preserving reflow.
2732///
2733/// The invariant `is_explicit == true` iff `prefix.is_some()` is enforced by the
2734/// constructors. Use [`BlockquoteLineData::explicit`] or [`BlockquoteLineData::lazy`]
2735/// rather than constructing the struct directly.
2736#[derive(Debug, Clone)]
2737pub struct BlockquoteLineData {
2738    /// Trimmed content without the `> ` prefix.
2739    pub(crate) content: String,
2740    /// Whether this line carries an explicit blockquote marker.
2741    pub(crate) is_explicit: bool,
2742    /// Full blockquote prefix (e.g. `"> "`, `"> > "`). `None` for lazy continuation lines.
2743    pub(crate) prefix: Option<String>,
2744}
2745
2746impl BlockquoteLineData {
2747    /// Create an explicit (marker-bearing) blockquote line.
2748    pub fn explicit(content: String, prefix: String) -> Self {
2749        Self {
2750            content,
2751            is_explicit: true,
2752            prefix: Some(prefix),
2753        }
2754    }
2755
2756    /// Create a lazy continuation line (no blockquote marker).
2757    pub fn lazy(content: String) -> Self {
2758        Self {
2759            content,
2760            is_explicit: false,
2761            prefix: None,
2762        }
2763    }
2764}
2765
2766/// Style for blockquote continuation lines after reflow.
2767#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2768pub enum BlockquoteContinuationStyle {
2769    Explicit,
2770    Lazy,
2771}
2772
2773/// Determine the continuation style for a blockquote paragraph from its collected lines.
2774///
2775/// The first line is always explicit (it carries the marker), so only continuation
2776/// lines (index 1+) are counted. Ties resolve to `Explicit`.
2777///
2778/// When the slice has only one element (no continuation lines to inspect), both
2779/// counts are zero and the tie-breaking rule returns `Explicit`.
2780pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
2781    let mut explicit_count = 0usize;
2782    let mut lazy_count = 0usize;
2783
2784    for line in lines.iter().skip(1) {
2785        if line.is_explicit {
2786            explicit_count += 1;
2787        } else {
2788            lazy_count += 1;
2789        }
2790    }
2791
2792    if explicit_count > 0 && lazy_count == 0 {
2793        BlockquoteContinuationStyle::Explicit
2794    } else if lazy_count > 0 && explicit_count == 0 {
2795        BlockquoteContinuationStyle::Lazy
2796    } else if explicit_count >= lazy_count {
2797        BlockquoteContinuationStyle::Explicit
2798    } else {
2799        BlockquoteContinuationStyle::Lazy
2800    }
2801}
2802
2803/// Determine the dominant blockquote prefix for a paragraph.
2804///
2805/// The most frequently occurring explicit prefix wins. Ties are broken by earliest
2806/// first appearance. Falls back to `fallback` when no explicit lines are present.
2807pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
2808    let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
2809
2810    for (idx, line) in lines.iter().enumerate() {
2811        let Some(prefix) = line.prefix.as_ref() else {
2812            continue;
2813        };
2814        counts
2815            .entry(prefix.clone())
2816            .and_modify(|entry| entry.0 += 1)
2817            .or_insert((1, idx));
2818    }
2819
2820    counts
2821        .into_iter()
2822        .max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
2823            count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
2824        })
2825        .map_or_else(|| fallback.to_string(), |(prefix, _)| prefix)
2826}
2827
2828/// Whether a reflowed blockquote content line must carry an explicit prefix.
2829///
2830/// Lines that would start a new block structure (headings, fences, lists, etc.)
2831/// cannot safely use lazy continuation syntax.
2832pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
2833    let trimmed = content_line.trim_start();
2834    trimmed.starts_with('>')
2835        || trimmed.starts_with('#')
2836        || trimmed.starts_with("```")
2837        || trimmed.starts_with("~~~")
2838        || is_unordered_list_marker(trimmed)
2839        || is_numbered_list_item(trimmed)
2840        || is_horizontal_rule(trimmed)
2841        || is_definition_list_item(trimmed)
2842        || (trimmed.starts_with('[') && trimmed.contains("]:"))
2843        || trimmed.starts_with(":::")
2844        || (trimmed.starts_with('<')
2845            && !trimmed.starts_with("<http")
2846            && !trimmed.starts_with("<https")
2847            && !trimmed.starts_with("<mailto:"))
2848}
2849
2850/// Reflow blockquote content lines and apply continuation style.
2851///
2852/// Segments separated by hard breaks are reflowed independently. The output lines
2853/// receive blockquote prefixes according to `continuation_style`: the first line and
2854/// any line that would start a new block structure always get an explicit prefix;
2855/// other lines follow the detected style.
2856///
2857/// Returns the styled, reflowed lines (without a trailing newline).
2858pub fn reflow_blockquote_content(
2859    lines: &[BlockquoteLineData],
2860    explicit_prefix: &str,
2861    continuation_style: BlockquoteContinuationStyle,
2862    options: &ReflowOptions,
2863) -> Vec<String> {
2864    let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
2865    let segments = split_into_segments_strs(&content_strs);
2866    let mut reflowed_content_lines: Vec<String> = Vec::new();
2867
2868    for segment in segments {
2869        let hard_break_type = segment.last().and_then(|&line| {
2870            let line = line.strip_suffix('\r').unwrap_or(line);
2871            if line.ends_with('\\') {
2872                Some("\\")
2873            } else if line.ends_with("  ") {
2874                Some("  ")
2875            } else {
2876                None
2877            }
2878        });
2879
2880        let pieces: Vec<&str> = segment
2881            .iter()
2882            .map(|&line| {
2883                if let Some(l) = line.strip_suffix('\\') {
2884                    l.trim_end()
2885                } else if let Some(l) = line.strip_suffix("  ") {
2886                    l.trim_end()
2887                } else {
2888                    line.trim_end()
2889                }
2890            })
2891            .collect();
2892
2893        let segment_text = pieces.join(" ");
2894        let segment_text = segment_text.trim();
2895        if segment_text.is_empty() {
2896            continue;
2897        }
2898
2899        let mut reflowed = reflow_line(segment_text, options);
2900        if let Some(break_marker) = hard_break_type
2901            && !reflowed.is_empty()
2902        {
2903            let last_idx = reflowed.len() - 1;
2904            if !has_hard_break(&reflowed[last_idx]) {
2905                reflowed[last_idx].push_str(break_marker);
2906            }
2907        }
2908        reflowed_content_lines.extend(reflowed);
2909    }
2910
2911    let mut styled_lines: Vec<String> = Vec::new();
2912    for (idx, line) in reflowed_content_lines.iter().enumerate() {
2913        let force_explicit = idx == 0
2914            || continuation_style == BlockquoteContinuationStyle::Explicit
2915            || should_force_explicit_blockquote_line(line);
2916        if force_explicit {
2917            styled_lines.push(format!("{explicit_prefix}{line}"));
2918        } else {
2919            styled_lines.push(line.clone());
2920        }
2921    }
2922
2923    styled_lines
2924}
2925
2926fn is_blockquote_content_boundary(content: &str) -> bool {
2927    let trimmed = content.trim();
2928    trimmed.is_empty()
2929        || is_block_boundary(trimmed)
2930        || crate::utils::table_utils::TableUtils::is_potential_table_row(content)
2931        || trimmed.starts_with(":::")
2932        || crate::utils::is_template_directive_only(content)
2933        || is_standalone_attr_list(content)
2934        || is_snippet_block_delimiter(content)
2935}
2936
2937fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
2938    let mut segments = Vec::new();
2939    let mut current = Vec::new();
2940
2941    for &line in lines {
2942        current.push(line);
2943        if has_hard_break(line) {
2944            segments.push(current);
2945            current = Vec::new();
2946        }
2947    }
2948
2949    if !current.is_empty() {
2950        segments.push(current);
2951    }
2952
2953    segments
2954}
2955
2956fn reflow_blockquote_paragraph_at_line(
2957    content: &str,
2958    lines: &[&str],
2959    target_idx: usize,
2960    options: &ReflowOptions,
2961) -> Option<ParagraphReflow> {
2962    let mut anchor_idx = target_idx;
2963    let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
2964        parsed.nesting_level
2965    } else {
2966        let mut found = None;
2967        let mut idx = target_idx;
2968        loop {
2969            if lines[idx].trim().is_empty() {
2970                break;
2971            }
2972            if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
2973                found = Some((idx, parsed.nesting_level));
2974                break;
2975            }
2976            if idx == 0 {
2977                break;
2978            }
2979            idx -= 1;
2980        }
2981        let (idx, level) = found?;
2982        anchor_idx = idx;
2983        level
2984    };
2985
2986    // Expand backward to capture prior quote content at the same nesting level.
2987    let mut para_start = anchor_idx;
2988    while para_start > 0 {
2989        let prev_idx = para_start - 1;
2990        let prev_line = lines[prev_idx];
2991
2992        if prev_line.trim().is_empty() {
2993            break;
2994        }
2995
2996        if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
2997            if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2998                break;
2999            }
3000            para_start = prev_idx;
3001            continue;
3002        }
3003
3004        let prev_lazy = prev_line.trim_start();
3005        if is_blockquote_content_boundary(prev_lazy) {
3006            break;
3007        }
3008        para_start = prev_idx;
3009    }
3010
3011    // Lazy continuation cannot precede the first explicit marker.
3012    while para_start < lines.len() {
3013        let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
3014            para_start += 1;
3015            continue;
3016        };
3017        target_level = parsed.nesting_level;
3018        break;
3019    }
3020
3021    if para_start >= lines.len() || para_start > target_idx {
3022        return None;
3023    }
3024
3025    // Collect explicit lines at target level and lazy continuation lines.
3026    // Each entry is (original_line_idx, BlockquoteLineData).
3027    let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
3028    let mut idx = para_start;
3029    while idx < lines.len() {
3030        if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
3031            break;
3032        }
3033
3034        let line = lines[idx];
3035        if line.trim().is_empty() {
3036            break;
3037        }
3038
3039        if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
3040            if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
3041                break;
3042            }
3043            collected.push((
3044                idx,
3045                BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
3046            ));
3047            idx += 1;
3048            continue;
3049        }
3050
3051        let lazy_content = line.trim_start();
3052        if is_blockquote_content_boundary(lazy_content) {
3053            break;
3054        }
3055
3056        collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
3057        idx += 1;
3058    }
3059
3060    if collected.is_empty() {
3061        return None;
3062    }
3063
3064    let para_end = collected[collected.len() - 1].0;
3065    if target_idx < para_start || target_idx > para_end {
3066        return None;
3067    }
3068
3069    let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();
3070
3071    let fallback_prefix = line_data
3072        .iter()
3073        .find_map(|d| d.prefix.clone())
3074        .unwrap_or_else(|| "> ".to_string());
3075    let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
3076    let continuation_style = blockquote_continuation_style(&line_data);
3077
3078    let adjusted_line_length = options
3079        .line_length
3080        .saturating_sub(display_len(&explicit_prefix, options.length_mode))
3081        .max(1);
3082
3083    let adjusted_options = ReflowOptions {
3084        line_length: adjusted_line_length,
3085        ..options.clone()
3086    };
3087
3088    let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);
3089
3090    if styled_lines.is_empty() {
3091        return None;
3092    }
3093
3094    // Calculate byte offsets.
3095    let mut start_byte = 0;
3096    for line in lines.iter().take(para_start) {
3097        start_byte += line.len() + 1;
3098    }
3099
3100    let mut end_byte = start_byte;
3101    for line in lines.iter().take(para_end + 1).skip(para_start) {
3102        end_byte += line.len() + 1;
3103    }
3104
3105    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3106    if !includes_trailing_newline {
3107        end_byte -= 1;
3108    }
3109
3110    let reflowed_joined = styled_lines.join("\n");
3111    let reflowed_text = if includes_trailing_newline {
3112        if reflowed_joined.ends_with('\n') {
3113            reflowed_joined
3114        } else {
3115            format!("{reflowed_joined}\n")
3116        }
3117    } else if reflowed_joined.ends_with('\n') {
3118        reflowed_joined.trim_end_matches('\n').to_string()
3119    } else {
3120        reflowed_joined
3121    };
3122
3123    Some(ParagraphReflow {
3124        start_byte,
3125        end_byte,
3126        reflowed_text,
3127    })
3128}
3129
3130/// Reflow a single paragraph at the specified line number
3131///
3132/// This function finds the paragraph containing the given line number,
3133/// reflows it according to the specified line length, and returns
3134/// information about the paragraph location and its reflowed text.
3135///
3136/// # Arguments
3137///
3138/// * `content` - The full document content
3139/// * `line_number` - The 1-based line number within the paragraph to reflow
3140/// * `line_length` - The target line length for reflowing
3141///
3142/// # Returns
3143///
3144/// Returns `Some(ParagraphReflow)` if a paragraph was found and reflowed,
3145/// or `None` if the line number is out of bounds or the content at that
3146/// line shouldn't be reflowed (e.g., code blocks, headings, etc.)
3147pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
3148    reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
3149}
3150
3151/// Reflow a paragraph at the given line with a specific length mode.
3152pub fn reflow_paragraph_at_line_with_mode(
3153    content: &str,
3154    line_number: usize,
3155    line_length: usize,
3156    length_mode: ReflowLengthMode,
3157) -> Option<ParagraphReflow> {
3158    let options = ReflowOptions {
3159        line_length,
3160        length_mode,
3161        ..Default::default()
3162    };
3163    reflow_paragraph_at_line_with_options(content, line_number, &options)
3164}
3165
3166/// Reflow a paragraph at the given line using the provided options.
3167///
3168/// This is the canonical implementation used by both the rule's fix mode and the
3169/// LSP "Reflow paragraph" action. Passing a fully configured `ReflowOptions` allows
3170/// the LSP action to respect user-configured reflow mode, abbreviations, etc.
3171///
3172/// # Returns
3173///
3174/// Returns `Some(ParagraphReflow)` with byte offsets and reflowed text, or `None`
3175/// if the line is out of bounds or sits inside a non-reflow-able construct.
3176pub fn reflow_paragraph_at_line_with_options(
3177    content: &str,
3178    line_number: usize,
3179    options: &ReflowOptions,
3180) -> Option<ParagraphReflow> {
3181    if line_number == 0 {
3182        return None;
3183    }
3184
3185    let lines: Vec<&str> = content.lines().collect();
3186
3187    // Check if line number is valid (1-based)
3188    if line_number > lines.len() {
3189        return None;
3190    }
3191
3192    let target_idx = line_number - 1; // Convert to 0-based
3193    let target_line = lines[target_idx];
3194    let trimmed = target_line.trim();
3195
3196    // Handle blockquote paragraphs (including lazy continuation lines) with
3197    // style-preserving output.
3198    if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
3199        return Some(blockquote_reflow);
3200    }
3201
3202    // Don't reflow special blocks
3203    if is_paragraph_boundary(trimmed, target_line) {
3204        return None;
3205    }
3206
3207    // Find paragraph start - scan backward until blank line or special block
3208    let mut para_start = target_idx;
3209    while para_start > 0 {
3210        let prev_idx = para_start - 1;
3211        let prev_line = lines[prev_idx];
3212        let prev_trimmed = prev_line.trim();
3213
3214        // Stop at blank line or special blocks
3215        if is_paragraph_boundary(prev_trimmed, prev_line) {
3216            break;
3217        }
3218
3219        para_start = prev_idx;
3220    }
3221
3222    // Find paragraph end - scan forward until blank line or special block
3223    let mut para_end = target_idx;
3224    while para_end + 1 < lines.len() {
3225        let next_idx = para_end + 1;
3226        let next_line = lines[next_idx];
3227        let next_trimmed = next_line.trim();
3228
3229        // Stop at blank line or special blocks
3230        if is_paragraph_boundary(next_trimmed, next_line) {
3231            break;
3232        }
3233
3234        para_end = next_idx;
3235    }
3236
3237    // Extract paragraph lines
3238    let paragraph_lines = &lines[para_start..=para_end];
3239
3240    // Calculate byte offsets
3241    let mut start_byte = 0;
3242    for line in lines.iter().take(para_start) {
3243        start_byte += line.len() + 1; // +1 for newline
3244    }
3245
3246    let mut end_byte = start_byte;
3247    for line in paragraph_lines {
3248        end_byte += line.len() + 1; // +1 for newline
3249    }
3250
3251    // Track whether the byte range includes a trailing newline
3252    // (it doesn't if this is the last line and the file doesn't end with newline)
3253    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3254
3255    // Adjust end_byte if the last line doesn't have a newline
3256    if !includes_trailing_newline {
3257        end_byte -= 1;
3258    }
3259
3260    // Join paragraph lines and reflow
3261    let paragraph_text = paragraph_lines.join("\n");
3262
3263    // Reflow the paragraph using reflow_markdown to handle it properly
3264    let reflowed = reflow_markdown(&paragraph_text, options);
3265
3266    // Ensure reflowed text matches whether the byte range includes a trailing newline
3267    // This is critical: if the range includes a newline, the replacement must too,
3268    // otherwise the next line will get appended to the reflowed paragraph
3269    let reflowed_text = if includes_trailing_newline {
3270        // Range includes newline - ensure reflowed text has one
3271        if reflowed.ends_with('\n') {
3272            reflowed
3273        } else {
3274            format!("{reflowed}\n")
3275        }
3276    } else {
3277        // Range doesn't include newline - ensure reflowed text doesn't have one
3278        if reflowed.ends_with('\n') {
3279            reflowed.trim_end_matches('\n').to_string()
3280        } else {
3281            reflowed
3282        }
3283    };
3284
3285    Some(ParagraphReflow {
3286        start_byte,
3287        end_byte,
3288        reflowed_text,
3289    })
3290}
3291
3292#[cfg(test)]
3293mod tests {
3294    use super::*;
3295
3296    /// Unit test for private helper function text_ends_with_abbreviation()
3297    ///
3298    /// This test stays inline because it tests a private function.
3299    /// All other tests (public API, integration tests) are in tests/utils/text_reflow_test.rs
3300    #[test]
3301    fn test_helper_function_text_ends_with_abbreviation() {
3302        // Test the helper function directly
3303        let abbreviations = get_abbreviations(&None);
3304
3305        // True cases - built-in abbreviations (titles and i.e./e.g.)
3306        assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
3307        assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
3308        assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
3309        assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
3310        assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
3311        assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
3312        assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
3313        assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
3314
3315        // False cases - NOT in built-in list (etc doesn't always have period)
3316        assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
3317        assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
3318        assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
3319        assert!(!text_ends_with_abbreviation("items.", &abbreviations));
3320        assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
3321        assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); // question mark, not period
3322        assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); // exclamation, not period
3323        assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); // question mark
3324        assert!(!text_ends_with_abbreviation("word", &abbreviations)); // no punctuation
3325        assert!(!text_ends_with_abbreviation("", &abbreviations)); // empty string
3326    }
3327
3328    #[test]
3329    fn test_is_unordered_list_marker() {
3330        // Valid unordered list markers
3331        assert!(is_unordered_list_marker("- item"));
3332        assert!(is_unordered_list_marker("* item"));
3333        assert!(is_unordered_list_marker("+ item"));
3334        assert!(is_unordered_list_marker("-")); // lone marker
3335        assert!(is_unordered_list_marker("*"));
3336        assert!(is_unordered_list_marker("+"));
3337
3338        // Not list markers
3339        assert!(!is_unordered_list_marker("---")); // horizontal rule
3340        assert!(!is_unordered_list_marker("***")); // horizontal rule
3341        assert!(!is_unordered_list_marker("- - -")); // horizontal rule
3342        assert!(!is_unordered_list_marker("* * *")); // horizontal rule
3343        assert!(!is_unordered_list_marker("*emphasis*")); // emphasis, not list
3344        assert!(!is_unordered_list_marker("-word")); // no space after marker
3345        assert!(!is_unordered_list_marker("")); // empty
3346        assert!(!is_unordered_list_marker("text")); // plain text
3347        assert!(!is_unordered_list_marker("# heading")); // heading
3348    }
3349
3350    #[test]
3351    fn test_is_block_boundary() {
3352        // Block boundaries
3353        assert!(is_block_boundary("")); // empty line
3354        assert!(is_block_boundary("# Heading")); // ATX heading
3355        assert!(is_block_boundary("## Level 2")); // ATX heading
3356        assert!(is_block_boundary("```rust")); // code fence
3357        assert!(is_block_boundary("~~~")); // tilde code fence
3358        assert!(is_block_boundary("> quote")); // blockquote
3359        assert!(is_block_boundary("| cell |")); // table
3360        assert!(is_block_boundary("[link]: http://example.com")); // reference def
3361        assert!(is_block_boundary("---")); // horizontal rule
3362        assert!(is_block_boundary("***")); // horizontal rule
3363        assert!(is_block_boundary("- item")); // unordered list
3364        assert!(is_block_boundary("* item")); // unordered list
3365        assert!(is_block_boundary("+ item")); // unordered list
3366        assert!(is_block_boundary("1. item")); // ordered list
3367        assert!(is_block_boundary("10. item")); // ordered list
3368        assert!(is_block_boundary(": definition")); // definition list
3369        assert!(is_block_boundary(":::")); // div marker
3370        assert!(is_block_boundary("::::: {.callout-note}")); // div marker with attrs
3371
3372        // NOT block boundaries (paragraph continuation)
3373        assert!(!is_block_boundary("regular text"));
3374        assert!(!is_block_boundary("*emphasis*")); // emphasis, not list
3375        assert!(!is_block_boundary("[link](url)")); // inline link, not reference def
3376        assert!(!is_block_boundary("some words here"));
3377    }
3378
3379    #[test]
3380    fn test_definition_list_boundary_in_single_line_paragraph() {
3381        // Verifies that a definition list item after a single-line paragraph
3382        // is treated as a block boundary, not merged into the paragraph
3383        let options = ReflowOptions {
3384            line_length: 80,
3385            ..Default::default()
3386        };
3387        let input = "Term\n: Definition of the term";
3388        let result = reflow_markdown(input, &options);
3389        // The definition list marker should remain on its own line
3390        assert!(
3391            result.contains(": Definition"),
3392            "Definition list item should not be merged into previous line. Got: {result:?}"
3393        );
3394        let lines: Vec<&str> = result.lines().collect();
3395        assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
3396        assert_eq!(lines[0], "Term");
3397        assert_eq!(lines[1], ": Definition of the term");
3398    }
3399
3400    #[test]
3401    fn test_is_paragraph_boundary() {
3402        // Core block boundary checks are inherited
3403        assert!(is_paragraph_boundary("# Heading", "# Heading"));
3404        assert!(is_paragraph_boundary("- item", "- item"));
3405        assert!(is_paragraph_boundary(":::", ":::"));
3406        assert!(is_paragraph_boundary(": definition", ": definition"));
3407
3408        // Indented code blocks (≥4 spaces or tab)
3409        assert!(is_paragraph_boundary("code", "    code"));
3410        assert!(is_paragraph_boundary("code", "\tcode"));
3411
3412        // Table rows via is_potential_table_row
3413        assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
3414        assert!(is_paragraph_boundary("a | b", "a | b")); // pipe-delimited without leading pipe
3415
3416        // Not paragraph boundaries
3417        assert!(!is_paragraph_boundary("regular text", "regular text"));
3418        assert!(!is_paragraph_boundary("text", "  text")); // 2-space indent is not code
3419    }
3420
3421    #[test]
3422    fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
3423        // Verifies that div markers (:::) are treated as paragraph boundaries
3424        // in reflow_paragraph_at_line, preventing reflow across div boundaries
3425        let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
3426        // Line 3 is the div marker — should not be reflowed
3427        let result = reflow_paragraph_at_line(content, 3, 80);
3428        assert!(result.is_none(), "Div marker line should not be reflowed");
3429    }
3430}
rumdl_lib/utils/text_reflow.rs

rumdl_lib/utils/
text_reflow.rs