rumdl_lib/utils/
text_reflow.rs

1//! Text reflow utilities for MD013
2//!
3//! This module implements text wrapping/reflow functionality that preserves
4//! Markdown elements like links, emphasis, code spans, etc.
5
6use crate::utils::calculate_indentation_width_default;
7use crate::utils::is_definition_list_item;
8use crate::utils::mkdocs_attr_list::{ATTR_LIST_PATTERN, is_standalone_attr_list};
9use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
10use crate::utils::regex_cache::{
11    DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
12    HUGO_SHORTCODE_REGEX, INLINE_IMAGE_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, LINKED_IMAGE_INLINE_INLINE,
13    LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF, REF_IMAGE_REGEX, REF_LINK_REGEX,
14    SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
15};
16use crate::utils::sentence_utils::{
17    get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
18    text_ends_with_abbreviation,
19};
20use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
21use std::collections::HashSet;
22use unicode_width::UnicodeWidthStr;
23
24/// Length calculation mode for reflow
25#[derive(Clone, Copy, Debug, Default, PartialEq)]
26pub enum ReflowLengthMode {
27    /// Count Unicode characters (grapheme clusters)
28    Chars,
29    /// Count visual display width (CJK = 2 columns, emoji = 2, etc.)
30    #[default]
31    Visual,
32    /// Count raw bytes
33    Bytes,
34}
35
36/// Calculate the display length of a string based on the length mode
37fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
38    match mode {
39        ReflowLengthMode::Chars => s.chars().count(),
40        ReflowLengthMode::Visual => s.width(),
41        ReflowLengthMode::Bytes => s.len(),
42    }
43}
44
45/// Options for reflowing text
46#[derive(Clone)]
47pub struct ReflowOptions {
48    /// Target line length
49    pub line_length: usize,
50    /// Whether to break on sentence boundaries when possible
51    pub break_on_sentences: bool,
52    /// Whether to preserve existing line breaks in paragraphs
53    pub preserve_breaks: bool,
54    /// Whether to enforce one sentence per line
55    pub sentence_per_line: bool,
56    /// Whether to use semantic line breaks (cascading split strategy)
57    pub semantic_line_breaks: bool,
58    /// Custom abbreviations for sentence detection
59    /// Periods are optional - both "Dr" and "Dr." work the same
60    /// Custom abbreviations are always added to the built-in defaults
61    pub abbreviations: Option<Vec<String>>,
62    /// How to measure string length for line-length comparisons
63    pub length_mode: ReflowLengthMode,
64    /// Whether to treat {#id .class key="value"} as atomic (unsplittable) elements.
65    /// Enabled for MkDocs and Kramdown flavors.
66    pub attr_lists: bool,
67    /// Whether to require uppercase after periods for sentence detection.
68    /// When true (default), only "word. Capital" is a sentence boundary.
69    /// When false, "word. lowercase" is also treated as a sentence boundary.
70    /// Does not affect ! and ? which are always treated as sentence boundaries.
71    pub require_sentence_capital: bool,
72    /// Cap list continuation indent to this value when set.
73    /// Used by mkdocs flavor where continuation is always 4 spaces
74    /// regardless of checkbox markers.
75    pub max_list_continuation_indent: Option<usize>,
76}
77
78impl Default for ReflowOptions {
79    fn default() -> Self {
80        Self {
81            line_length: 80,
82            break_on_sentences: true,
83            preserve_breaks: false,
84            sentence_per_line: false,
85            semantic_line_breaks: false,
86            abbreviations: None,
87            length_mode: ReflowLengthMode::default(),
88            attr_lists: false,
89            require_sentence_capital: true,
90            max_list_continuation_indent: None,
91        }
92    }
93}
94
95/// Build a boolean mask indicating which character positions are inside inline code spans.
96/// Handles single, double, and triple backtick delimiters.
97fn compute_inline_code_mask(text: &str) -> Vec<bool> {
98    let chars: Vec<char> = text.chars().collect();
99    let len = chars.len();
100    let mut mask = vec![false; len];
101    let mut i = 0;
102
103    while i < len {
104        if chars[i] == '`' {
105            // Count opening backticks
106            let open_start = i;
107            let mut backtick_count = 0;
108            while i < len && chars[i] == '`' {
109                backtick_count += 1;
110                i += 1;
111            }
112
113            // Find matching closing backticks (same count)
114            let mut found_close = false;
115            let content_start = i;
116            while i < len {
117                if chars[i] == '`' {
118                    let close_start = i;
119                    let mut close_count = 0;
120                    while i < len && chars[i] == '`' {
121                        close_count += 1;
122                        i += 1;
123                    }
124                    if close_count == backtick_count {
125                        // Mark the content between the delimiters (not the backticks themselves)
126                        for item in mask.iter_mut().take(close_start).skip(content_start) {
127                            *item = true;
128                        }
129                        // Also mark the opening and closing backticks
130                        for item in mask.iter_mut().take(content_start).skip(open_start) {
131                            *item = true;
132                        }
133                        for item in mask.iter_mut().take(i).skip(close_start) {
134                            *item = true;
135                        }
136                        found_close = true;
137                        break;
138                    }
139                } else {
140                    i += 1;
141                }
142            }
143
144            if !found_close {
145                // No matching close — backticks are literal, not code span
146                i = open_start + backtick_count;
147            }
148        } else {
149            i += 1;
150        }
151    }
152
153    mask
154}
155
156/// Detect if a character position is a sentence boundary
157/// Based on the approach from github.com/JoshuaKGoldberg/sentences-per-line
158/// Supports both ASCII punctuation (. ! ?) and CJK punctuation (。 ！ ？)
159fn is_sentence_boundary(
160    text: &str,
161    pos: usize,
162    abbreviations: &HashSet<String>,
163    require_sentence_capital: bool,
164) -> bool {
165    let chars: Vec<char> = text.chars().collect();
166
167    if pos + 1 >= chars.len() {
168        return false;
169    }
170
171    let c = chars[pos];
172    let next_char = chars[pos + 1];
173
174    // Check for CJK sentence-ending punctuation (。, ！, ？)
175    // CJK punctuation doesn't require space or uppercase after it
176    if is_cjk_sentence_ending(c) {
177        // Skip any trailing emphasis/strikethrough markers
178        let mut after_punct_pos = pos + 1;
179        while after_punct_pos < chars.len()
180            && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
181        {
182            after_punct_pos += 1;
183        }
184
185        // Skip whitespace
186        while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
187            after_punct_pos += 1;
188        }
189
190        // Check if we have more content (any non-whitespace)
191        if after_punct_pos >= chars.len() {
192            return false;
193        }
194
195        // Skip leading emphasis/strikethrough markers
196        while after_punct_pos < chars.len()
197            && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
198        {
199            after_punct_pos += 1;
200        }
201
202        if after_punct_pos >= chars.len() {
203            return false;
204        }
205
206        // For CJK, we accept any character as the start of the next sentence
207        // (no uppercase requirement, since CJK doesn't have case)
208        return true;
209    }
210
211    // Check for ASCII sentence-ending punctuation
212    if c != '.' && c != '!' && c != '?' {
213        return false;
214    }
215
216    // Must be followed by space, closing quote, or emphasis/strikethrough marker followed by space
217    let (_space_pos, after_space_pos) = if next_char == ' ' {
218        // Normal case: punctuation followed by space
219        (pos + 1, pos + 2)
220    } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
221        // Sentence ends with quote - check what follows the quote
222        if chars[pos + 2] == ' ' {
223            // Just quote followed by space: 'sentence." '
224            (pos + 2, pos + 3)
225        } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
226            // Quote followed by emphasis: 'sentence."* '
227            (pos + 3, pos + 4)
228        } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
229            && pos + 4 < chars.len()
230            && chars[pos + 3] == chars[pos + 2]
231            && chars[pos + 4] == ' '
232        {
233            // Quote followed by bold: 'sentence."** '
234            (pos + 4, pos + 5)
235        } else {
236            return false;
237        }
238    } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
239        // Sentence ends with emphasis: "sentence.* " or "sentence._ "
240        (pos + 2, pos + 3)
241    } else if (next_char == '*' || next_char == '_')
242        && pos + 3 < chars.len()
243        && chars[pos + 2] == next_char
244        && chars[pos + 3] == ' '
245    {
246        // Sentence ends with bold: "sentence.** " or "sentence.__ "
247        (pos + 3, pos + 4)
248    } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
249        // Sentence ends with strikethrough: "sentence.~~ "
250        (pos + 3, pos + 4)
251    } else {
252        return false;
253    };
254
255    // Skip all whitespace after the space to find the start of the next sentence
256    let mut next_char_pos = after_space_pos;
257    while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
258        next_char_pos += 1;
259    }
260
261    // Check if we reached the end of the string
262    if next_char_pos >= chars.len() {
263        return false;
264    }
265
266    // Skip leading emphasis/strikethrough markers and opening quotes to find the actual first letter
267    let mut first_letter_pos = next_char_pos;
268    while first_letter_pos < chars.len()
269        && (chars[first_letter_pos] == '*'
270            || chars[first_letter_pos] == '_'
271            || chars[first_letter_pos] == '~'
272            || is_opening_quote(chars[first_letter_pos]))
273    {
274        first_letter_pos += 1;
275    }
276
277    // Check if we reached the end after skipping emphasis
278    if first_letter_pos >= chars.len() {
279        return false;
280    }
281
282    let first_char = chars[first_letter_pos];
283
284    // For ! and ?, sentence boundaries are unambiguous — no uppercase requirement
285    if c == '!' || c == '?' {
286        return true;
287    }
288
289    // Period-specific checks: periods are ambiguous (abbreviations, decimals, initials)
290    // so we apply additional guards before accepting a sentence boundary.
291
292    if pos > 0 {
293        // Check for common abbreviations
294        let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
295        if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
296            return false;
297        }
298
299        // Check for decimal numbers (e.g., "3.14 is pi")
300        if chars[pos - 1].is_numeric() && first_char.is_ascii_digit() {
301            return false;
302        }
303
304        // Check for single-letter initials (e.g., "J. K. Rowling")
305        // A single uppercase letter before the period preceded by whitespace or start
306        // is likely an initial, not a sentence ending.
307        if chars[pos - 1].is_ascii_uppercase() && (pos == 1 || (pos >= 2 && chars[pos - 2].is_whitespace())) {
308            return false;
309        }
310    }
311
312    // In strict mode, require uppercase or CJK to start the next sentence after a period.
313    // In relaxed mode, accept any alphanumeric character.
314    if require_sentence_capital && !first_char.is_uppercase() && !is_cjk_char(first_char) {
315        return false;
316    }
317
318    true
319}
320
321/// Split text into sentences
322pub fn split_into_sentences(text: &str) -> Vec<String> {
323    split_into_sentences_custom(text, &None)
324}
325
326/// Split text into sentences with custom abbreviations
327pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
328    let abbreviations = get_abbreviations(custom_abbreviations);
329    split_into_sentences_with_set(text, &abbreviations, true)
330}
331
332/// Internal function to split text into sentences with a pre-computed abbreviations set
333/// Use this when calling multiple times in a loop to avoid repeatedly computing the set
334fn split_into_sentences_with_set(
335    text: &str,
336    abbreviations: &HashSet<String>,
337    require_sentence_capital: bool,
338) -> Vec<String> {
339    // Pre-compute which character positions are inside inline code spans
340    let in_code = compute_inline_code_mask(text);
341
342    let mut sentences = Vec::new();
343    let mut current_sentence = String::new();
344    let mut chars = text.chars().peekable();
345    let mut pos = 0;
346
347    while let Some(c) = chars.next() {
348        current_sentence.push(c);
349
350        if !in_code[pos] && is_sentence_boundary(text, pos, abbreviations, require_sentence_capital) {
351            // Consume any trailing emphasis/strikethrough markers and quotes (they belong to the current sentence)
352            while let Some(&next) = chars.peek() {
353                if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
354                    current_sentence.push(chars.next().unwrap());
355                    pos += 1;
356                } else {
357                    break;
358                }
359            }
360
361            // Consume the space after the sentence
362            if chars.peek() == Some(&' ') {
363                chars.next();
364                pos += 1;
365            }
366
367            sentences.push(current_sentence.trim().to_string());
368            current_sentence.clear();
369        }
370
371        pos += 1;
372    }
373
374    // Add any remaining text as the last sentence
375    if !current_sentence.trim().is_empty() {
376        sentences.push(current_sentence.trim().to_string());
377    }
378    sentences
379}
380
381/// Check if a line is a horizontal rule (---, ___, ***)
382fn is_horizontal_rule(line: &str) -> bool {
383    if line.len() < 3 {
384        return false;
385    }
386
387    // Check if line consists only of -, _, or * characters (at least 3)
388    let chars: Vec<char> = line.chars().collect();
389    if chars.is_empty() {
390        return false;
391    }
392
393    let first_char = chars[0];
394    if first_char != '-' && first_char != '_' && first_char != '*' {
395        return false;
396    }
397
398    // All characters should be the same (allowing spaces between)
399    for c in &chars {
400        if *c != first_char && *c != ' ' {
401            return false;
402        }
403    }
404
405    // Count non-space characters
406    let non_space_count = chars.iter().filter(|c| **c != ' ').count();
407    non_space_count >= 3
408}
409
410/// Check if a line is a numbered list item (e.g., "1. ", "10. ")
411fn is_numbered_list_item(line: &str) -> bool {
412    let mut chars = line.chars();
413
414    // Must start with a digit
415    if !chars.next().is_some_and(|c| c.is_numeric()) {
416        return false;
417    }
418
419    // Can have more digits
420    while let Some(c) = chars.next() {
421        if c == '.' {
422            // After period, must have a space (consistent with list marker extraction)
423            // "2019." alone is NOT treated as a list item to avoid false positives
424            return chars.next() == Some(' ');
425        }
426        if !c.is_numeric() {
427            return false;
428        }
429    }
430
431    false
432}
433
434/// Check if a trimmed line is an unordered list item (-, *, + followed by space)
435fn is_unordered_list_marker(s: &str) -> bool {
436    matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
437        && !is_horizontal_rule(s)
438        && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
439}
440
441/// Shared structural checks for block boundary detection.
442/// Checks elements that only depend on the trimmed line content.
443fn is_block_boundary_core(trimmed: &str) -> bool {
444    trimmed.is_empty()
445        || trimmed.starts_with('#')
446        || trimmed.starts_with("```")
447        || trimmed.starts_with("~~~")
448        || trimmed.starts_with('>')
449        || (trimmed.starts_with('[') && trimmed.contains("]:"))
450        || is_horizontal_rule(trimmed)
451        || is_unordered_list_marker(trimmed)
452        || is_numbered_list_item(trimmed)
453        || is_definition_list_item(trimmed)
454        || trimmed.starts_with(":::")
455}
456
457/// Check if a trimmed line starts a new structural block element.
458/// Used for paragraph boundary detection in `reflow_markdown()`.
459fn is_block_boundary(trimmed: &str) -> bool {
460    is_block_boundary_core(trimmed) || trimmed.starts_with('|')
461}
462
463/// Check if a line starts a new structural block for paragraph boundary detection
464/// in `reflow_paragraph_at_line()`. Extends the core checks with indented code blocks
465/// (≥4 spaces) and table row detection via `is_potential_table_row`.
466fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
467    is_block_boundary_core(trimmed)
468        || calculate_indentation_width_default(line) >= 4
469        || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
470}
471
472/// Check if a line ends with a hard break (either two spaces or backslash)
473///
474/// CommonMark supports two formats for hard line breaks:
475/// 1. Two or more trailing spaces
476/// 2. A backslash at the end of the line
477fn has_hard_break(line: &str) -> bool {
478    let line = line.strip_suffix('\r').unwrap_or(line);
479    line.ends_with("  ") || line.ends_with('\\')
480}
481
482/// Check if text ends with sentence-terminating punctuation (. ! ?)
483fn ends_with_sentence_punct(text: &str) -> bool {
484    text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
485}
486
487/// Trim trailing whitespace while preserving hard breaks (two trailing spaces or backslash)
488///
489/// Hard breaks in Markdown can be indicated by:
490/// 1. Two trailing spaces before a newline (traditional)
491/// 2. A backslash at the end of the line (mdformat style)
492fn trim_preserving_hard_break(s: &str) -> String {
493    // Strip trailing \r from CRLF line endings first to handle Windows files
494    let s = s.strip_suffix('\r').unwrap_or(s);
495
496    // Check for backslash hard break (mdformat style)
497    if s.ends_with('\\') {
498        // Preserve the backslash exactly as-is
499        return s.to_string();
500    }
501
502    // Check if there are at least 2 trailing spaces (traditional hard break)
503    if s.ends_with("  ") {
504        // Find the position where non-space content ends
505        let content_end = s.trim_end().len();
506        if content_end == 0 {
507            // String is all whitespace
508            return String::new();
509        }
510        // Preserve exactly 2 trailing spaces for hard break
511        format!("{}  ", &s[..content_end])
512    } else {
513        // No hard break, just trim all trailing whitespace
514        s.trim_end().to_string()
515    }
516}
517
518/// Parse markdown elements using the appropriate parser based on options.
519fn parse_elements(text: &str, options: &ReflowOptions) -> Vec<Element> {
520    if options.attr_lists {
521        parse_markdown_elements_with_attr_lists(text)
522    } else {
523        parse_markdown_elements(text)
524    }
525}
526
527pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
528    // For sentence-per-line mode, always process regardless of length
529    if options.sentence_per_line {
530        let elements = parse_elements(line, options);
531        return reflow_elements_sentence_per_line(&elements, &options.abbreviations, options.require_sentence_capital);
532    }
533
534    // For semantic line breaks mode, use cascading split strategy
535    if options.semantic_line_breaks {
536        let elements = parse_elements(line, options);
537        return reflow_elements_semantic(&elements, options);
538    }
539
540    // Quick check: if line is already short enough or no wrapping requested, return as-is
541    // line_length = 0 means no wrapping (unlimited line length)
542    if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
543        return vec![line.to_string()];
544    }
545
546    // Parse the markdown to identify elements
547    let elements = parse_elements(line, options);
548
549    // Reflow the elements into lines
550    reflow_elements(&elements, options)
551}
552
553/// Image source in a linked image structure
554#[derive(Debug, Clone)]
555enum LinkedImageSource {
556    /// Inline image URL: ![alt](url)
557    Inline(String),
558    /// Reference image: ![alt][ref]
559    Reference(String),
560}
561
562/// Link target in a linked image structure
563#[derive(Debug, Clone)]
564enum LinkedImageTarget {
565    /// Inline link URL: ](url)
566    Inline(String),
567    /// Reference link: ][ref]
568    Reference(String),
569}
570
571/// Represents a piece of content in the markdown
572#[derive(Debug, Clone)]
573enum Element {
574    /// Plain text that can be wrapped
575    Text(String),
576    /// A complete markdown inline link [text](url)
577    Link { text: String, url: String },
578    /// A complete markdown reference link [text][ref]
579    ReferenceLink { text: String, reference: String },
580    /// A complete markdown empty reference link [text][]
581    EmptyReferenceLink { text: String },
582    /// A complete markdown shortcut reference link [ref]
583    ShortcutReference { reference: String },
584    /// A complete markdown inline image ![alt](url)
585    InlineImage { alt: String, url: String },
586    /// A complete markdown reference image ![alt][ref]
587    ReferenceImage { alt: String, reference: String },
588    /// A complete markdown empty reference image ![alt][]
589    EmptyReferenceImage { alt: String },
590    /// A clickable image badge in any of 4 forms:
591    /// - [![alt](img-url)](link-url)
592    /// - [![alt][img-ref]](link-url)
593    /// - [![alt](img-url)][link-ref]
594    /// - [![alt][img-ref]][link-ref]
595    LinkedImage {
596        alt: String,
597        img_source: LinkedImageSource,
598        link_target: LinkedImageTarget,
599    },
600    /// Footnote reference [^note]
601    FootnoteReference { note: String },
602    /// Strikethrough text ~~text~~
603    Strikethrough(String),
604    /// Wiki-style link [[wiki]] or [[wiki|text]]
605    WikiLink(String),
606    /// Inline math $math$
607    InlineMath(String),
608    /// Display math $$math$$
609    DisplayMath(String),
610    /// Emoji shortcode :emoji:
611    EmojiShortcode(String),
612    /// Autolink <https://...> or <mailto:...> or <user@domain.com>
613    Autolink(String),
614    /// HTML tag <tag> or </tag> or <tag/>
615    HtmlTag(String),
616    /// HTML entity &nbsp; or &#123;
617    HtmlEntity(String),
618    /// Hugo/Go template shortcode {{< ... >}} or {{% ... %}}
619    HugoShortcode(String),
620    /// MkDocs/kramdown attribute list {#id .class key="value"}
621    AttrList(String),
622    /// Inline code `code`
623    Code(String),
624    /// Bold text **text** or __text__
625    Bold {
626        content: String,
627        /// True if underscore markers (__), false for asterisks (**)
628        underscore: bool,
629    },
630    /// Italic text *text* or _text_
631    Italic {
632        content: String,
633        /// True if underscore marker (_), false for asterisk (*)
634        underscore: bool,
635    },
636}
637
638impl std::fmt::Display for Element {
639    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
640        match self {
641            Element::Text(s) => write!(f, "{s}"),
642            Element::Link { text, url } => write!(f, "[{text}]({url})"),
643            Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
644            Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
645            Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
646            Element::InlineImage { alt, url } => write!(f, "![{alt}]({url})"),
647            Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
648            Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
649            Element::LinkedImage {
650                alt,
651                img_source,
652                link_target,
653            } => {
654                // Build the image part: ![alt](url) or ![alt][ref]
655                let img_part = match img_source {
656                    LinkedImageSource::Inline(url) => format!("![{alt}]({url})"),
657                    LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
658                };
659                // Build the link part: (url) or [ref]
660                match link_target {
661                    LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
662                    LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
663                }
664            }
665            Element::FootnoteReference { note } => write!(f, "[^{note}]"),
666            Element::Strikethrough(s) => write!(f, "~~{s}~~"),
667            Element::WikiLink(s) => write!(f, "[[{s}]]"),
668            Element::InlineMath(s) => write!(f, "${s}$"),
669            Element::DisplayMath(s) => write!(f, "$${s}$$"),
670            Element::EmojiShortcode(s) => write!(f, ":{s}:"),
671            Element::Autolink(s) => write!(f, "{s}"),
672            Element::HtmlTag(s) => write!(f, "{s}"),
673            Element::HtmlEntity(s) => write!(f, "{s}"),
674            Element::HugoShortcode(s) => write!(f, "{s}"),
675            Element::AttrList(s) => write!(f, "{s}"),
676            Element::Code(s) => write!(f, "`{s}`"),
677            Element::Bold { content, underscore } => {
678                if *underscore {
679                    write!(f, "__{content}__")
680                } else {
681                    write!(f, "**{content}**")
682                }
683            }
684            Element::Italic { content, underscore } => {
685                if *underscore {
686                    write!(f, "_{content}_")
687                } else {
688                    write!(f, "*{content}*")
689                }
690            }
691        }
692    }
693}
694
695impl Element {
696    /// Calculate the display width of this element using the given length mode.
697    /// This formats the element and computes its width, correctly handling
698    /// visual width for CJK characters and other wide glyphs.
699    fn display_width(&self, mode: ReflowLengthMode) -> usize {
700        let formatted = format!("{self}");
701        display_len(&formatted, mode)
702    }
703}
704
705/// An emphasis or formatting span parsed by pulldown-cmark
706#[derive(Debug, Clone)]
707struct EmphasisSpan {
708    /// Byte offset where the emphasis starts (including markers)
709    start: usize,
710    /// Byte offset where the emphasis ends (after closing markers)
711    end: usize,
712    /// The content inside the emphasis markers
713    content: String,
714    /// Whether this is strong (bold) emphasis
715    is_strong: bool,
716    /// Whether this is strikethrough (~~text~~)
717    is_strikethrough: bool,
718    /// Whether the original used underscore markers (for emphasis only)
719    uses_underscore: bool,
720}
721
722/// Extract emphasis and strikethrough spans from text using pulldown-cmark
723///
724/// This provides CommonMark-compliant emphasis parsing, correctly handling:
725/// - Nested emphasis like `*text **bold** more*`
726/// - Left/right flanking delimiter rules
727/// - Underscore vs asterisk markers
728/// - GFM strikethrough (~~text~~)
729///
730/// Returns spans sorted by start position.
731fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
732    let mut spans = Vec::new();
733    let mut options = Options::empty();
734    options.insert(Options::ENABLE_STRIKETHROUGH);
735
736    // Stacks to track nested formatting with their start positions
737    let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); // (start_byte, uses_underscore)
738    let mut strong_stack: Vec<(usize, bool)> = Vec::new();
739    let mut strikethrough_stack: Vec<usize> = Vec::new();
740
741    let parser = Parser::new_ext(text, options).into_offset_iter();
742
743    for (event, range) in parser {
744        match event {
745            Event::Start(Tag::Emphasis) => {
746                // Check if this uses underscore by looking at the original text
747                let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
748                emphasis_stack.push((range.start, uses_underscore));
749            }
750            Event::End(TagEnd::Emphasis) => {
751                if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
752                    // Extract content between the markers (1 char marker on each side)
753                    let content_start = start_byte + 1;
754                    let content_end = range.end - 1;
755                    if content_end > content_start
756                        && let Some(content) = text.get(content_start..content_end)
757                    {
758                        spans.push(EmphasisSpan {
759                            start: start_byte,
760                            end: range.end,
761                            content: content.to_string(),
762                            is_strong: false,
763                            is_strikethrough: false,
764                            uses_underscore,
765                        });
766                    }
767                }
768            }
769            Event::Start(Tag::Strong) => {
770                // Check if this uses underscore by looking at the original text
771                let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
772                strong_stack.push((range.start, uses_underscore));
773            }
774            Event::End(TagEnd::Strong) => {
775                if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
776                    // Extract content between the markers (2 char marker on each side)
777                    let content_start = start_byte + 2;
778                    let content_end = range.end - 2;
779                    if content_end > content_start
780                        && let Some(content) = text.get(content_start..content_end)
781                    {
782                        spans.push(EmphasisSpan {
783                            start: start_byte,
784                            end: range.end,
785                            content: content.to_string(),
786                            is_strong: true,
787                            is_strikethrough: false,
788                            uses_underscore,
789                        });
790                    }
791                }
792            }
793            Event::Start(Tag::Strikethrough) => {
794                strikethrough_stack.push(range.start);
795            }
796            Event::End(TagEnd::Strikethrough) => {
797                if let Some(start_byte) = strikethrough_stack.pop() {
798                    // Extract content between the ~~ markers (2 char marker on each side)
799                    let content_start = start_byte + 2;
800                    let content_end = range.end - 2;
801                    if content_end > content_start
802                        && let Some(content) = text.get(content_start..content_end)
803                    {
804                        spans.push(EmphasisSpan {
805                            start: start_byte,
806                            end: range.end,
807                            content: content.to_string(),
808                            is_strong: false,
809                            is_strikethrough: true,
810                            uses_underscore: false,
811                        });
812                    }
813                }
814            }
815            _ => {}
816        }
817    }
818
819    // Sort by start position
820    spans.sort_by_key(|s| s.start);
821    spans
822}
823
824/// Parse markdown elements from text preserving the raw syntax
825///
826/// Detection order is critical:
827/// 1. Linked images [![alt](img)](link) - must be detected first as atomic units
828/// 2. Inline images ![alt](url) - before links to handle ! prefix
829/// 3. Reference images ![alt][ref] - before reference links
830/// 4. Inline links [text](url) - before reference links
831/// 5. Reference links [text][ref] - before shortcut references
832/// 6. Shortcut reference links [ref] - detected last to avoid false positives
833/// 7. Other elements (code, bold, italic, etc.) - processed normally
834fn parse_markdown_elements(text: &str) -> Vec<Element> {
835    parse_markdown_elements_inner(text, false)
836}
837
838fn parse_markdown_elements_with_attr_lists(text: &str) -> Vec<Element> {
839    parse_markdown_elements_inner(text, true)
840}
841
842fn parse_markdown_elements_inner(text: &str, attr_lists: bool) -> Vec<Element> {
843    let mut elements = Vec::new();
844    let mut remaining = text;
845
846    // Pre-extract emphasis spans using pulldown-cmark for CommonMark-compliant parsing
847    let emphasis_spans = extract_emphasis_spans(text);
848
849    while !remaining.is_empty() {
850        // Calculate current byte offset in original text
851        let current_offset = text.len() - remaining.len();
852        // Find the earliest occurrence of any markdown pattern
853        // Store (start, end, pattern_name) to unify standard Regex and FancyRegex match results
854        let mut earliest_match: Option<(usize, usize, &str)> = None;
855
856        // Check for linked images FIRST (all 4 variants)
857        // Quick literal check: only run expensive regexes if we might have a linked image
858        // Pattern starts with "[!" so check for that first
859        if remaining.contains("[!") {
860            // Pattern 1: [![alt](img)](link) - inline image in inline link
861            if let Some(m) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
862                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
863            {
864                earliest_match = Some((m.start(), m.end(), "linked_image_ii"));
865            }
866
867            // Pattern 2: [![alt][ref]](link) - reference image in inline link
868            if let Some(m) = LINKED_IMAGE_REF_INLINE.find(remaining)
869                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
870            {
871                earliest_match = Some((m.start(), m.end(), "linked_image_ri"));
872            }
873
874            // Pattern 3: [![alt](img)][ref] - inline image in reference link
875            if let Some(m) = LINKED_IMAGE_INLINE_REF.find(remaining)
876                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
877            {
878                earliest_match = Some((m.start(), m.end(), "linked_image_ir"));
879            }
880
881            // Pattern 4: [![alt][ref]][ref] - reference image in reference link
882            if let Some(m) = LINKED_IMAGE_REF_REF.find(remaining)
883                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
884            {
885                earliest_match = Some((m.start(), m.end(), "linked_image_rr"));
886            }
887        }
888
889        // Check for images (they start with ! so should be detected before links)
890        // Inline images - ![alt](url)
891        if let Some(m) = INLINE_IMAGE_REGEX.find(remaining)
892            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
893        {
894            earliest_match = Some((m.start(), m.end(), "inline_image"));
895        }
896
897        // Reference images - ![alt][ref]
898        if let Some(m) = REF_IMAGE_REGEX.find(remaining)
899            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
900        {
901            earliest_match = Some((m.start(), m.end(), "ref_image"));
902        }
903
904        // Check for footnote references - [^note]
905        if let Some(m) = FOOTNOTE_REF_REGEX.find(remaining)
906            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
907        {
908            earliest_match = Some((m.start(), m.end(), "footnote_ref"));
909        }
910
911        // Check for inline links - [text](url)
912        if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
913            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
914        {
915            earliest_match = Some((m.start(), m.end(), "inline_link"));
916        }
917
918        // Check for reference links - [text][ref]
919        if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
920            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
921        {
922            earliest_match = Some((m.start(), m.end(), "ref_link"));
923        }
924
925        // Check for shortcut reference links - [ref]
926        // Only check if we haven't found an earlier pattern that would conflict
927        if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
928            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
929        {
930            earliest_match = Some((m.start(), m.end(), "shortcut_ref"));
931        }
932
933        // Check for wiki-style links - [[wiki]]
934        if let Some(m) = WIKI_LINK_REGEX.find(remaining)
935            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
936        {
937            earliest_match = Some((m.start(), m.end(), "wiki_link"));
938        }
939
940        // Check for display math first (before inline) - $$math$$
941        if let Some(m) = DISPLAY_MATH_REGEX.find(remaining)
942            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
943        {
944            earliest_match = Some((m.start(), m.end(), "display_math"));
945        }
946
947        // Check for inline math - $math$
948        if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
949            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
950        {
951            earliest_match = Some((m.start(), m.end(), "inline_math"));
952        }
953
954        // Note: Strikethrough is now handled by pulldown-cmark in extract_emphasis_spans
955
956        // Check for emoji shortcodes - :emoji:
957        if let Some(m) = EMOJI_SHORTCODE_REGEX.find(remaining)
958            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
959        {
960            earliest_match = Some((m.start(), m.end(), "emoji"));
961        }
962
963        // Check for HTML entities - &nbsp; etc
964        if let Some(m) = HTML_ENTITY_REGEX.find(remaining)
965            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
966        {
967            earliest_match = Some((m.start(), m.end(), "html_entity"));
968        }
969
970        // Check for Hugo shortcodes - {{< ... >}} or {{% ... %}}
971        // Must be checked before other patterns to avoid false sentence breaks
972        if let Some(m) = HUGO_SHORTCODE_REGEX.find(remaining)
973            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
974        {
975            earliest_match = Some((m.start(), m.end(), "hugo_shortcode"));
976        }
977
978        // Check for HTML tags - <tag> </tag> <tag/>
979        // But exclude autolinks like <https://...> or <mailto:...> or email autolinks <user@domain.com>
980        if let Some(m) = HTML_TAG_PATTERN.find(remaining)
981            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
982        {
983            // Check if this is an autolink (starts with protocol or mailto:)
984            let matched_text = &remaining[m.start()..m.end()];
985            let is_url_autolink = matched_text.starts_with("<http://")
986                || matched_text.starts_with("<https://")
987                || matched_text.starts_with("<mailto:")
988                || matched_text.starts_with("<ftp://")
989                || matched_text.starts_with("<ftps://");
990
991            // Check if this is an email autolink (per CommonMark spec: <local@domain.tld>)
992            // Use centralized EMAIL_PATTERN for consistency with MD034 and other rules
993            let is_email_autolink = {
994                let content = matched_text.trim_start_matches('<').trim_end_matches('>');
995                EMAIL_PATTERN.is_match(content)
996            };
997
998            if is_url_autolink || is_email_autolink {
999                earliest_match = Some((m.start(), m.end(), "autolink"));
1000            } else {
1001                earliest_match = Some((m.start(), m.end(), "html_tag"));
1002            }
1003        }
1004
1005        // Find earliest non-link special characters
1006        let mut next_special = remaining.len();
1007        let mut special_type = "";
1008        let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
1009        let mut attr_list_len: usize = 0;
1010
1011        // Check for code spans (not handled by pulldown-cmark in this context)
1012        if let Some(pos) = remaining.find('`')
1013            && pos < next_special
1014        {
1015            next_special = pos;
1016            special_type = "code";
1017        }
1018
1019        // Check for MkDocs/kramdown attr lists - {#id .class key="value"}
1020        if attr_lists
1021            && let Some(pos) = remaining.find('{')
1022            && pos < next_special
1023            && let Some(m) = ATTR_LIST_PATTERN.find(&remaining[pos..])
1024            && m.start() == 0
1025        {
1026            next_special = pos;
1027            special_type = "attr_list";
1028            attr_list_len = m.end();
1029        }
1030
1031        // Check for emphasis using pulldown-cmark's pre-extracted spans
1032        // Find the earliest emphasis span that starts within remaining text
1033        for span in &emphasis_spans {
1034            if span.start >= current_offset && span.start < current_offset + remaining.len() {
1035                let pos_in_remaining = span.start - current_offset;
1036                if pos_in_remaining < next_special {
1037                    next_special = pos_in_remaining;
1038                    special_type = "pulldown_emphasis";
1039                    pulldown_emphasis = Some(span);
1040                }
1041                break; // Spans are sorted by start position, so first match is earliest
1042            }
1043        }
1044
1045        // Determine which pattern to process first
1046        let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
1047            pos < next_special
1048        } else {
1049            false
1050        };
1051
1052        if should_process_markdown_link {
1053            let (pos, match_end, pattern_type) = earliest_match.unwrap();
1054
1055            // Add any text before the match
1056            if pos > 0 {
1057                elements.push(Element::Text(remaining[..pos].to_string()));
1058            }
1059
1060            // Process the matched pattern
1061            match pattern_type {
1062                // Pattern 1: [![alt](img)](link) - inline image in inline link
1063                "linked_image_ii" => {
1064                    if let Some(caps) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
1065                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1066                        let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1067                        let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1068                        elements.push(Element::LinkedImage {
1069                            alt: alt.to_string(),
1070                            img_source: LinkedImageSource::Inline(img_url.to_string()),
1071                            link_target: LinkedImageTarget::Inline(link_url.to_string()),
1072                        });
1073                        remaining = &remaining[match_end..];
1074                    } else {
1075                        elements.push(Element::Text("[".to_string()));
1076                        remaining = &remaining[1..];
1077                    }
1078                }
1079                // Pattern 2: [![alt][ref]](link) - reference image in inline link
1080                "linked_image_ri" => {
1081                    if let Some(caps) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
1082                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1083                        let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1084                        let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1085                        elements.push(Element::LinkedImage {
1086                            alt: alt.to_string(),
1087                            img_source: LinkedImageSource::Reference(img_ref.to_string()),
1088                            link_target: LinkedImageTarget::Inline(link_url.to_string()),
1089                        });
1090                        remaining = &remaining[match_end..];
1091                    } else {
1092                        elements.push(Element::Text("[".to_string()));
1093                        remaining = &remaining[1..];
1094                    }
1095                }
1096                // Pattern 3: [![alt](img)][ref] - inline image in reference link
1097                "linked_image_ir" => {
1098                    if let Some(caps) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
1099                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1100                        let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1101                        let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1102                        elements.push(Element::LinkedImage {
1103                            alt: alt.to_string(),
1104                            img_source: LinkedImageSource::Inline(img_url.to_string()),
1105                            link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1106                        });
1107                        remaining = &remaining[match_end..];
1108                    } else {
1109                        elements.push(Element::Text("[".to_string()));
1110                        remaining = &remaining[1..];
1111                    }
1112                }
1113                // Pattern 4: [![alt][ref]][ref] - reference image in reference link
1114                "linked_image_rr" => {
1115                    if let Some(caps) = LINKED_IMAGE_REF_REF.captures(remaining) {
1116                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1117                        let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1118                        let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1119                        elements.push(Element::LinkedImage {
1120                            alt: alt.to_string(),
1121                            img_source: LinkedImageSource::Reference(img_ref.to_string()),
1122                            link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1123                        });
1124                        remaining = &remaining[match_end..];
1125                    } else {
1126                        elements.push(Element::Text("[".to_string()));
1127                        remaining = &remaining[1..];
1128                    }
1129                }
1130                "inline_image" => {
1131                    if let Some(caps) = INLINE_IMAGE_REGEX.captures(remaining) {
1132                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1133                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1134                        elements.push(Element::InlineImage {
1135                            alt: alt.to_string(),
1136                            url: url.to_string(),
1137                        });
1138                        remaining = &remaining[match_end..];
1139                    } else {
1140                        elements.push(Element::Text("!".to_string()));
1141                        remaining = &remaining[1..];
1142                    }
1143                }
1144                "ref_image" => {
1145                    if let Some(caps) = REF_IMAGE_REGEX.captures(remaining) {
1146                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1147                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1148
1149                        if reference.is_empty() {
1150                            elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1151                        } else {
1152                            elements.push(Element::ReferenceImage {
1153                                alt: alt.to_string(),
1154                                reference: reference.to_string(),
1155                            });
1156                        }
1157                        remaining = &remaining[match_end..];
1158                    } else {
1159                        elements.push(Element::Text("!".to_string()));
1160                        remaining = &remaining[1..];
1161                    }
1162                }
1163                "footnote_ref" => {
1164                    if let Some(caps) = FOOTNOTE_REF_REGEX.captures(remaining) {
1165                        let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1166                        elements.push(Element::FootnoteReference { note: note.to_string() });
1167                        remaining = &remaining[match_end..];
1168                    } else {
1169                        elements.push(Element::Text("[".to_string()));
1170                        remaining = &remaining[1..];
1171                    }
1172                }
1173                "inline_link" => {
1174                    if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1175                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1176                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1177                        elements.push(Element::Link {
1178                            text: text.to_string(),
1179                            url: url.to_string(),
1180                        });
1181                        remaining = &remaining[match_end..];
1182                    } else {
1183                        // Fallback - shouldn't happen
1184                        elements.push(Element::Text("[".to_string()));
1185                        remaining = &remaining[1..];
1186                    }
1187                }
1188                "ref_link" => {
1189                    if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1190                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1191                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1192
1193                        if reference.is_empty() {
1194                            // Empty reference link [text][]
1195                            elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1196                        } else {
1197                            // Regular reference link [text][ref]
1198                            elements.push(Element::ReferenceLink {
1199                                text: text.to_string(),
1200                                reference: reference.to_string(),
1201                            });
1202                        }
1203                        remaining = &remaining[match_end..];
1204                    } else {
1205                        // Fallback - shouldn't happen
1206                        elements.push(Element::Text("[".to_string()));
1207                        remaining = &remaining[1..];
1208                    }
1209                }
1210                "shortcut_ref" => {
1211                    if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1212                        let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1213                        elements.push(Element::ShortcutReference {
1214                            reference: reference.to_string(),
1215                        });
1216                        remaining = &remaining[match_end..];
1217                    } else {
1218                        // Fallback - shouldn't happen
1219                        elements.push(Element::Text("[".to_string()));
1220                        remaining = &remaining[1..];
1221                    }
1222                }
1223                "wiki_link" => {
1224                    if let Some(caps) = WIKI_LINK_REGEX.captures(remaining) {
1225                        let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1226                        elements.push(Element::WikiLink(content.to_string()));
1227                        remaining = &remaining[match_end..];
1228                    } else {
1229                        elements.push(Element::Text("[[".to_string()));
1230                        remaining = &remaining[2..];
1231                    }
1232                }
1233                "display_math" => {
1234                    if let Some(caps) = DISPLAY_MATH_REGEX.captures(remaining) {
1235                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1236                        elements.push(Element::DisplayMath(math.to_string()));
1237                        remaining = &remaining[match_end..];
1238                    } else {
1239                        elements.push(Element::Text("$$".to_string()));
1240                        remaining = &remaining[2..];
1241                    }
1242                }
1243                "inline_math" => {
1244                    if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1245                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1246                        elements.push(Element::InlineMath(math.to_string()));
1247                        remaining = &remaining[match_end..];
1248                    } else {
1249                        elements.push(Element::Text("$".to_string()));
1250                        remaining = &remaining[1..];
1251                    }
1252                }
1253                // Note: "strikethrough" case removed - now handled by pulldown-cmark
1254                "emoji" => {
1255                    if let Some(caps) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1256                        let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1257                        elements.push(Element::EmojiShortcode(emoji.to_string()));
1258                        remaining = &remaining[match_end..];
1259                    } else {
1260                        elements.push(Element::Text(":".to_string()));
1261                        remaining = &remaining[1..];
1262                    }
1263                }
1264                "html_entity" => {
1265                    // HTML entities are captured whole
1266                    elements.push(Element::HtmlEntity(remaining[pos..match_end].to_string()));
1267                    remaining = &remaining[match_end..];
1268                }
1269                "hugo_shortcode" => {
1270                    // Hugo shortcodes are atomic elements - preserve them exactly
1271                    elements.push(Element::HugoShortcode(remaining[pos..match_end].to_string()));
1272                    remaining = &remaining[match_end..];
1273                }
1274                "autolink" => {
1275                    // Autolinks are atomic elements - preserve them exactly
1276                    elements.push(Element::Autolink(remaining[pos..match_end].to_string()));
1277                    remaining = &remaining[match_end..];
1278                }
1279                "html_tag" => {
1280                    // HTML tags are captured whole
1281                    elements.push(Element::HtmlTag(remaining[pos..match_end].to_string()));
1282                    remaining = &remaining[match_end..];
1283                }
1284                _ => {
1285                    // Unknown pattern, treat as text
1286                    elements.push(Element::Text("[".to_string()));
1287                    remaining = &remaining[1..];
1288                }
1289            }
1290        } else {
1291            // Process non-link special characters
1292
1293            // Add any text before the special character
1294            if next_special > 0 && next_special < remaining.len() {
1295                elements.push(Element::Text(remaining[..next_special].to_string()));
1296                remaining = &remaining[next_special..];
1297            }
1298
1299            // Process the special element
1300            match special_type {
1301                "code" => {
1302                    // Find end of code
1303                    if let Some(code_end) = remaining[1..].find('`') {
1304                        let code = &remaining[1..1 + code_end];
1305                        elements.push(Element::Code(code.to_string()));
1306                        remaining = &remaining[1 + code_end + 1..];
1307                    } else {
1308                        // No closing backtick, treat as text
1309                        elements.push(Element::Text(remaining.to_string()));
1310                        break;
1311                    }
1312                }
1313                "attr_list" => {
1314                    elements.push(Element::AttrList(remaining[..attr_list_len].to_string()));
1315                    remaining = &remaining[attr_list_len..];
1316                }
1317                "pulldown_emphasis" => {
1318                    // Use pre-extracted emphasis/strikethrough span from pulldown-cmark
1319                    if let Some(span) = pulldown_emphasis {
1320                        let span_len = span.end - span.start;
1321                        if span.is_strikethrough {
1322                            elements.push(Element::Strikethrough(span.content.clone()));
1323                        } else if span.is_strong {
1324                            elements.push(Element::Bold {
1325                                content: span.content.clone(),
1326                                underscore: span.uses_underscore,
1327                            });
1328                        } else {
1329                            elements.push(Element::Italic {
1330                                content: span.content.clone(),
1331                                underscore: span.uses_underscore,
1332                            });
1333                        }
1334                        remaining = &remaining[span_len..];
1335                    } else {
1336                        // Fallback - shouldn't happen
1337                        elements.push(Element::Text(remaining[..1].to_string()));
1338                        remaining = &remaining[1..];
1339                    }
1340                }
1341                _ => {
1342                    // No special elements found, add all remaining text
1343                    elements.push(Element::Text(remaining.to_string()));
1344                    break;
1345                }
1346            }
1347        }
1348    }
1349
1350    elements
1351}
1352
1353/// Reflow elements for sentence-per-line mode
1354fn reflow_elements_sentence_per_line(
1355    elements: &[Element],
1356    custom_abbreviations: &Option<Vec<String>>,
1357    require_sentence_capital: bool,
1358) -> Vec<String> {
1359    let abbreviations = get_abbreviations(custom_abbreviations);
1360    let mut lines = Vec::new();
1361    let mut current_line = String::new();
1362
1363    for (idx, element) in elements.iter().enumerate() {
1364        let element_str = format!("{element}");
1365
1366        // For text elements, split into sentences
1367        if let Element::Text(text) = element {
1368            // Simply append text - it already has correct spacing from tokenization
1369            let combined = format!("{current_line}{text}");
1370            // Use the pre-computed abbreviations set to avoid redundant computation
1371            let sentences = split_into_sentences_with_set(&combined, &abbreviations, require_sentence_capital);
1372
1373            if sentences.len() > 1 {
1374                // We found sentence boundaries
1375                for (i, sentence) in sentences.iter().enumerate() {
1376                    if i == 0 {
1377                        // First sentence might continue from previous elements
1378                        // But check if it ends with an abbreviation
1379                        let trimmed = sentence.trim();
1380
1381                        if text_ends_with_abbreviation(trimmed, &abbreviations) {
1382                            // Don't emit yet - this sentence ends with abbreviation, continue accumulating
1383                            current_line = sentence.to_string();
1384                        } else {
1385                            // Normal case - emit the first sentence
1386                            lines.push(sentence.to_string());
1387                            current_line.clear();
1388                        }
1389                    } else if i == sentences.len() - 1 {
1390                        // Last sentence: check if it's complete or incomplete
1391                        let trimmed = sentence.trim();
1392                        let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1393
1394                        if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1395                            // Complete sentence - emit it immediately
1396                            lines.push(sentence.to_string());
1397                            current_line.clear();
1398                        } else {
1399                            // Incomplete sentence - save for next iteration
1400                            current_line = sentence.to_string();
1401                        }
1402                    } else {
1403                        // Complete sentences in the middle
1404                        lines.push(sentence.to_string());
1405                    }
1406                }
1407            } else {
1408                // Single sentence - check if it's complete
1409                let trimmed = combined.trim();
1410
1411                // If the combined result is only whitespace, don't accumulate it.
1412                // This prevents leading spaces on subsequent elements when lines
1413                // are joined with spaces during reflow iteration.
1414                if trimmed.is_empty() {
1415                    continue;
1416                }
1417
1418                let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1419
1420                if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1421                    // Complete single sentence - emit it
1422                    lines.push(trimmed.to_string());
1423                    current_line.clear();
1424                } else {
1425                    // Incomplete sentence - continue accumulating
1426                    current_line = combined;
1427                }
1428            }
1429        } else if let Element::Italic { content, underscore } = element {
1430            // Handle italic elements - may contain multiple sentences that need continuation
1431            let marker = if *underscore { "_" } else { "*" };
1432            handle_emphasis_sentence_split(
1433                content,
1434                marker,
1435                &abbreviations,
1436                require_sentence_capital,
1437                &mut current_line,
1438                &mut lines,
1439            );
1440        } else if let Element::Bold { content, underscore } = element {
1441            // Handle bold elements - may contain multiple sentences that need continuation
1442            let marker = if *underscore { "__" } else { "**" };
1443            handle_emphasis_sentence_split(
1444                content,
1445                marker,
1446                &abbreviations,
1447                require_sentence_capital,
1448                &mut current_line,
1449                &mut lines,
1450            );
1451        } else if let Element::Strikethrough(content) = element {
1452            // Handle strikethrough elements - may contain multiple sentences that need continuation
1453            handle_emphasis_sentence_split(
1454                content,
1455                "~~",
1456                &abbreviations,
1457                require_sentence_capital,
1458                &mut current_line,
1459                &mut lines,
1460            );
1461        } else {
1462            // Non-text, non-emphasis elements (Code, Links, etc.)
1463            // Check if this element is adjacent to the preceding text (no space between)
1464            let is_adjacent = if idx > 0 {
1465                match &elements[idx - 1] {
1466                    Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1467                    _ => true,
1468                }
1469            } else {
1470                false
1471            };
1472
1473            // Add space before element if needed, but not for adjacent elements
1474            if !is_adjacent
1475                && !current_line.is_empty()
1476                && !current_line.ends_with(' ')
1477                && !current_line.ends_with('(')
1478                && !current_line.ends_with('[')
1479            {
1480                current_line.push(' ');
1481            }
1482            current_line.push_str(&element_str);
1483        }
1484    }
1485
1486    // Add any remaining content
1487    if !current_line.is_empty() {
1488        lines.push(current_line.trim().to_string());
1489    }
1490    lines
1491}
1492
1493/// Handle splitting emphasis content at sentence boundaries while preserving markers
1494fn handle_emphasis_sentence_split(
1495    content: &str,
1496    marker: &str,
1497    abbreviations: &HashSet<String>,
1498    require_sentence_capital: bool,
1499    current_line: &mut String,
1500    lines: &mut Vec<String>,
1501) {
1502    // Split the emphasis content into sentences
1503    let sentences = split_into_sentences_with_set(content, abbreviations, require_sentence_capital);
1504
1505    if sentences.len() <= 1 {
1506        // Single sentence or no boundaries - treat as atomic
1507        if !current_line.is_empty()
1508            && !current_line.ends_with(' ')
1509            && !current_line.ends_with('(')
1510            && !current_line.ends_with('[')
1511        {
1512            current_line.push(' ');
1513        }
1514        current_line.push_str(marker);
1515        current_line.push_str(content);
1516        current_line.push_str(marker);
1517
1518        // Check if the emphasis content ends with sentence punctuation - if so, emit
1519        let trimmed = content.trim();
1520        let ends_with_punct = ends_with_sentence_punct(trimmed);
1521        if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1522            lines.push(current_line.clone());
1523            current_line.clear();
1524        }
1525    } else {
1526        // Multiple sentences - each gets its own emphasis markers
1527        for (i, sentence) in sentences.iter().enumerate() {
1528            let trimmed = sentence.trim();
1529            if trimmed.is_empty() {
1530                continue;
1531            }
1532
1533            if i == 0 {
1534                // First sentence: combine with current_line and emit
1535                if !current_line.is_empty()
1536                    && !current_line.ends_with(' ')
1537                    && !current_line.ends_with('(')
1538                    && !current_line.ends_with('[')
1539                {
1540                    current_line.push(' ');
1541                }
1542                current_line.push_str(marker);
1543                current_line.push_str(trimmed);
1544                current_line.push_str(marker);
1545
1546                // Check if this is a complete sentence
1547                let ends_with_punct = ends_with_sentence_punct(trimmed);
1548                if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1549                    lines.push(current_line.clone());
1550                    current_line.clear();
1551                }
1552            } else if i == sentences.len() - 1 {
1553                // Last sentence: check if complete
1554                let ends_with_punct = ends_with_sentence_punct(trimmed);
1555
1556                let mut line = String::new();
1557                line.push_str(marker);
1558                line.push_str(trimmed);
1559                line.push_str(marker);
1560
1561                if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1562                    lines.push(line);
1563                } else {
1564                    // Incomplete - keep in current_line for potential continuation
1565                    *current_line = line;
1566                }
1567            } else {
1568                // Middle sentences: emit with markers
1569                let mut line = String::new();
1570                line.push_str(marker);
1571                line.push_str(trimmed);
1572                line.push_str(marker);
1573                lines.push(line);
1574            }
1575        }
1576    }
1577}
1578
1579/// English break-words used for semantic line break splitting.
1580/// These are conjunctions and relative pronouns where a line break
1581/// reads naturally.
1582const BREAK_WORDS: &[&str] = &[
1583    "and",
1584    "or",
1585    "but",
1586    "nor",
1587    "yet",
1588    "so",
1589    "for",
1590    "which",
1591    "that",
1592    "because",
1593    "when",
1594    "if",
1595    "while",
1596    "where",
1597    "although",
1598    "though",
1599    "unless",
1600    "since",
1601    "after",
1602    "before",
1603    "until",
1604    "as",
1605    "once",
1606    "whether",
1607    "however",
1608    "therefore",
1609    "moreover",
1610    "furthermore",
1611    "nevertheless",
1612    "whereas",
1613];
1614
1615/// Check if a character is clause punctuation for semantic line breaks
1616fn is_clause_punctuation(c: char) -> bool {
1617    matches!(c, ',' | ';' | ':' | '\u{2014}') // comma, semicolon, colon, em dash
1618}
1619
1620/// Find the closing `)` that balances the `(` at the start of `slice`.
1621///
1622/// `offset` is the byte position of the `(` in the original full-line string;
1623/// it is used to translate local byte positions into global positions for
1624/// element-span lookups.  Parens inside markdown element spans are skipped so
1625/// that, e.g., the closing `)` of an inline link does not prematurely end the
1626/// scan.  The char's *start* byte (not byte-after) is used for the span check
1627/// so that closing element delimiters — which sit exactly at the span's
1628/// exclusive-end boundary — are correctly excluded.
1629///
1630/// Returns `(end_local, inner)` where `end_local` is the byte offset within
1631/// `slice` just past the closing `)`, and `inner` is the content between the
1632/// outermost `(` and `)`.
1633fn paren_group_end<'a>(slice: &'a str, element_spans: &[(usize, usize)], offset: usize) -> Option<(usize, &'a str)> {
1634    debug_assert!(slice.starts_with('('));
1635    let mut depth: i32 = 0;
1636    for (local_byte, c) in slice.char_indices() {
1637        let global_byte = offset + local_byte;
1638        // When depth > 0, skip parens that belong to a markdown element.
1639        // Use the char's start byte so that a closing element delimiter
1640        // (whose byte_after equals the span's exclusive end) is treated as
1641        // inside the element rather than outside it.
1642        if depth > 0 && is_inside_element(global_byte, element_spans) {
1643            continue;
1644        }
1645        match c {
1646            '(' => depth += 1,
1647            ')' => {
1648                depth -= 1;
1649                if depth == 0 {
1650                    let end = local_byte + 1;
1651                    let inner = &slice[1..local_byte];
1652                    return Some((end, inner));
1653                }
1654            }
1655            _ => {}
1656        }
1657    }
1658    None
1659}
1660
1661/// Split a line at a parenthetical boundary for semantic line breaks.
1662///
1663/// Two strategies are tried in order:
1664///
1665/// 1. **Leading parenthetical** — if the line begins with `(`, isolate the
1666///    entire balanced group on this line and start the rest on the next.
1667///    This handles lines produced by a prior split that placed a `(` at the
1668///    very beginning.
1669///
1670/// 2. **Mid-line parenthetical** — find the rightmost balanced `(…)` whose
1671///    content spans multiple words and whose preceding text fits within
1672///    `[min_first_len, line_length]`.  Split just before the `(` so the
1673///    parenthetical begins the following line.
1674///
1675/// Parentheses that fall inside markdown element spans (links, code, etc.)
1676/// are ignored in both strategies.
1677fn split_at_parenthetical(
1678    text: &str,
1679    line_length: usize,
1680    element_spans: &[(usize, usize)],
1681    length_mode: ReflowLengthMode,
1682) -> Option<(String, String)> {
1683    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1684
1685    // Strategy 1: text starts with '(' — isolate the parenthetical as its own line.
1686    if text.starts_with('(')
1687        && let Some((end_local, inner)) = paren_group_end(text, element_spans, 0)
1688        && inner.contains(' ')
1689    {
1690        // If clause punctuation immediately follows the closing ')', attach it
1691        // to the parenthetical so the continuation line does not start with a
1692        // bare comma or semicolon (e.g., "(foo, bar), then" → "(foo, bar),"
1693        // on one line and "then" on the next).
1694        let tail = &text[end_local..];
1695        let (first_end, rest_start) = match tail.chars().next() {
1696            Some(c) if is_clause_punctuation(c) => (end_local + c.len_utf8(), end_local + c.len_utf8()),
1697            _ => (end_local, end_local),
1698        };
1699        let first = &text[..first_end];
1700        let first_len = display_len(first, length_mode);
1701        // No MIN_SPLIT_RATIO check: a parenthetical unit is always a valid
1702        // semantic line regardless of its length.
1703        if first_len <= line_length {
1704            let rest = text[rest_start..].trim_start();
1705            if !rest.is_empty() {
1706                return Some((first.to_string(), rest.to_string()));
1707            }
1708        }
1709    }
1710
1711    // Strategy 2: find the rightmost multi-word '(' whose preceding text fits.
1712    let mut best_open_byte: Option<usize> = None;
1713    let mut pos = 0usize;
1714    while pos < text.len() {
1715        // '(' is ASCII so a single-byte comparison is safe in UTF-8.
1716        if text.as_bytes()[pos] != b'(' {
1717            let c = text[pos..].chars().next().unwrap();
1718            pos += c.len_utf8();
1719            continue;
1720        }
1721        // Skip '(' that are part of a markdown element (use start byte).
1722        if is_inside_element(pos, element_spans) {
1723            pos += 1;
1724            continue;
1725        }
1726        if let Some((end_local, inner)) = paren_group_end(&text[pos..], element_spans, pos) {
1727            let first = text[..pos].trim_end();
1728            let first_len = display_len(first, length_mode);
1729            if !first.is_empty()
1730                && first_len >= min_first_len
1731                && first_len <= line_length
1732                && inner.contains(' ')
1733                && best_open_byte.is_none_or(|prev| pos > prev)
1734            {
1735                best_open_byte = Some(pos);
1736            }
1737            pos += end_local;
1738        } else {
1739            pos += 1;
1740        }
1741    }
1742
1743    let open_byte = best_open_byte?;
1744    let first = text[..open_byte].trim_end().to_string();
1745    let rest = text[open_byte..].to_string();
1746    if first.is_empty() || rest.trim().is_empty() {
1747        return None;
1748    }
1749    Some((first, rest))
1750}
1751
1752/// Compute element spans for a flat text representation of elements.
1753/// Returns Vec of (start, end) byte offsets for non-Text elements,
1754/// so we can check that a split position doesn't fall inside them.
1755fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1756    let mut spans = Vec::new();
1757    let mut offset = 0;
1758    for element in elements {
1759        let rendered = format!("{element}");
1760        let len = rendered.len();
1761        if !matches!(element, Element::Text(_)) {
1762            spans.push((offset, offset + len));
1763        }
1764        offset += len;
1765    }
1766    spans
1767}
1768
1769/// Check if a byte position falls inside any non-Text element span
1770fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1771    spans.iter().any(|(start, end)| pos > *start && pos < *end)
1772}
1773
1774/// Minimum fraction of line_length that the first part of a split must occupy.
1775/// Prevents awkwardly short first lines like "A," or "Note:" on their own.
1776const MIN_SPLIT_RATIO: f64 = 0.3;
1777
1778/// Split a line at the latest clause punctuation that keeps the first part
1779/// within `line_length`. Returns None if no valid split point exists or if
1780/// the split would create an unreasonably short first line.
1781fn split_at_clause_punctuation(
1782    text: &str,
1783    line_length: usize,
1784    element_spans: &[(usize, usize)],
1785    length_mode: ReflowLengthMode,
1786) -> Option<(String, String)> {
1787    let chars: Vec<char> = text.chars().collect();
1788    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1789
1790    // Find the char index where accumulated display width exceeds line_length
1791    let mut width_acc = 0;
1792    let mut search_end_char = 0;
1793    for (idx, &c) in chars.iter().enumerate() {
1794        let c_width = display_len(&c.to_string(), length_mode);
1795        if width_acc + c_width > line_length {
1796            break;
1797        }
1798        width_acc += c_width;
1799        search_end_char = idx + 1;
1800    }
1801
1802    // Scan backwards tracking parenthesis depth to skip clause punctuation
1803    // inside plain-text parenthetical groups.  Scanning right-to-left means
1804    // ')' opens a depth level and '(' closes it.  Parens that belong to a
1805    // markdown element are excluded using the char's start byte (not byte-after)
1806    // so that closing element delimiters at the span boundary are correctly
1807    // treated as part of the element.
1808    let mut paren_depth: i32 = 0;
1809    let mut best_pos = None;
1810    for i in (0..search_end_char).rev() {
1811        // Start byte of char i (for paren element check)
1812        let byte_start: usize = chars[..i].iter().map(|c| c.len_utf8()).sum();
1813        // Byte just after char i (for clause punctuation element check — existing convention)
1814        let byte_after: usize = byte_start + chars[i].len_utf8();
1815
1816        if !is_inside_element(byte_start, element_spans) {
1817            match chars[i] {
1818                ')' => paren_depth += 1,
1819                '(' => paren_depth = paren_depth.saturating_sub(1),
1820                _ => {}
1821            }
1822        }
1823
1824        if paren_depth == 0 && is_clause_punctuation(chars[i]) && !is_inside_element(byte_after, element_spans) {
1825            best_pos = Some(i);
1826            break;
1827        }
1828    }
1829
1830    let pos = best_pos?;
1831
1832    // Reject splits that create very short first lines
1833    let first: String = chars[..=pos].iter().collect();
1834    let first_display_len = display_len(&first, length_mode);
1835    if first_display_len < min_first_len {
1836        return None;
1837    }
1838
1839    // Split after the punctuation character
1840    let rest: String = chars[pos + 1..].iter().collect();
1841    let rest = rest.trim_start().to_string();
1842
1843    if rest.is_empty() {
1844        return None;
1845    }
1846
1847    Some((first, rest))
1848}
1849
1850/// Compute plain-text paren-depth at each byte offset in `text`.
1851///
1852/// Returns a `Vec<i32>` of length `text.len()` where entry `i` is the
1853/// nesting depth at byte `i` — counting only `(` and `)` that fall
1854/// outside markdown element spans.  This lets callers quickly check
1855/// whether a byte position lies inside a plain-text parenthetical group.
1856fn paren_depth_map(text: &str, element_spans: &[(usize, usize)]) -> Vec<i32> {
1857    let mut map = vec![0i32; text.len()];
1858    let mut depth = 0i32;
1859    for (byte, c) in text.char_indices() {
1860        if !is_inside_element(byte, element_spans) {
1861            match c {
1862                '(' => depth += 1,
1863                ')' => depth = depth.saturating_sub(1),
1864                _ => {}
1865            }
1866        }
1867        // Fill the depth value for every byte of this (possibly multi-byte) char.
1868        let end = (byte + c.len_utf8()).min(map.len());
1869        for slot in &mut map[byte..end] {
1870            *slot = depth;
1871        }
1872    }
1873    map
1874}
1875
1876/// Return `true` if `line` is a complete, balanced, multi-word parenthetical
1877/// group — i.e. it starts with `(`, ends with `)` (possibly followed by
1878/// clause punctuation), has balanced parens throughout, and the inner content
1879/// contains at least one space (matching the ≥2-word threshold used by
1880/// `split_at_parenthetical` when deciding to split).
1881///
1882/// Used to prevent the short-line merge step from collapsing intentional
1883/// parenthetical splits back into the previous line.
1884fn is_standalone_parenthetical(line: &str) -> bool {
1885    let trimmed = line.trim();
1886    if !trimmed.starts_with('(') {
1887        return false;
1888    }
1889    // Strip optional trailing clause punctuation to find the real end.
1890    let core = trimmed.trim_end_matches(|c: char| is_clause_punctuation(c));
1891    if !core.ends_with(')') {
1892        return false;
1893    }
1894    // Inner content must span multiple words (same threshold as split_at_parenthetical).
1895    let inner = &core[1..core.len() - 1];
1896    if !inner.contains(' ') {
1897        return false;
1898    }
1899    // Verify the parens are balanced (depth returns to 0 at the last ')').
1900    let mut depth = 0i32;
1901    for c in core.chars() {
1902        match c {
1903            '(' => depth += 1,
1904            ')' => depth -= 1,
1905            _ => {}
1906        }
1907        if depth < 0 {
1908            return false;
1909        }
1910    }
1911    depth == 0
1912}
1913
1914/// Split a line before the latest break-word that keeps the first part
1915/// within `line_length`. Returns None if no valid split point exists or if
1916/// the split would create an unreasonably short first line.
1917fn split_at_break_word(
1918    text: &str,
1919    line_length: usize,
1920    element_spans: &[(usize, usize)],
1921    length_mode: ReflowLengthMode,
1922) -> Option<(String, String)> {
1923    let lower = text.to_lowercase();
1924    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1925    let mut best_split: Option<(usize, usize)> = None; // (byte_start, word_len_bytes)
1926
1927    // Build a paren-depth map so we can skip break-words inside plain-text
1928    // parenthetical groups (matching the protection added to split_at_clause_punctuation).
1929    let depth_map = paren_depth_map(text, element_spans);
1930
1931    for &word in BREAK_WORDS {
1932        let mut search_start = 0;
1933        while let Some(pos) = lower[search_start..].find(word) {
1934            let abs_pos = search_start + pos;
1935
1936            // Verify it's a word boundary: preceded by space, followed by space
1937            let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1938            let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1939
1940            if preceded_by_space && followed_by_space {
1941                // The break goes BEFORE the word, so first part ends at abs_pos - 1
1942                let first_part = text[..abs_pos].trim_end();
1943                let first_part_len = display_len(first_part, length_mode);
1944
1945                // Skip break-words inside plain-text parenthetical groups.
1946                let inside_paren = depth_map.get(abs_pos).is_some_and(|&d| d > 0);
1947
1948                if first_part_len >= min_first_len
1949                    && first_part_len <= line_length
1950                    && !is_inside_element(abs_pos, element_spans)
1951                    && !inside_paren
1952                {
1953                    // Prefer the latest valid split point
1954                    if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1955                        best_split = Some((abs_pos, word.len()));
1956                    }
1957                }
1958            }
1959
1960            search_start = abs_pos + word.len();
1961        }
1962    }
1963
1964    let (byte_start, _word_len) = best_split?;
1965
1966    let first = text[..byte_start].trim_end().to_string();
1967    let rest = text[byte_start..].to_string();
1968
1969    if first.is_empty() || rest.trim().is_empty() {
1970        return None;
1971    }
1972
1973    Some((first, rest))
1974}
1975
1976/// Recursively cascade-split a line that exceeds line_length.
1977/// Tries clause punctuation first, then break-words, then word wrap.
1978fn cascade_split_line(
1979    text: &str,
1980    line_length: usize,
1981    abbreviations: &Option<Vec<String>>,
1982    length_mode: ReflowLengthMode,
1983    attr_lists: bool,
1984) -> Vec<String> {
1985    if line_length == 0 || display_len(text, length_mode) <= line_length {
1986        return vec![text.to_string()];
1987    }
1988
1989    let elements = parse_markdown_elements_inner(text, attr_lists);
1990    let element_spans = compute_element_spans(&elements);
1991
1992    // Try parenthetical boundary split (before clause punctuation so that
1993    // multi-word parentheticals are kept intact as semantic units)
1994    if let Some((first, rest)) = split_at_parenthetical(text, line_length, &element_spans, length_mode) {
1995        let mut result = vec![first];
1996        result.extend(cascade_split_line(
1997            &rest,
1998            line_length,
1999            abbreviations,
2000            length_mode,
2001            attr_lists,
2002        ));
2003        return result;
2004    }
2005
2006    // Try clause punctuation split
2007    if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
2008        let mut result = vec![first];
2009        result.extend(cascade_split_line(
2010            &rest,
2011            line_length,
2012            abbreviations,
2013            length_mode,
2014            attr_lists,
2015        ));
2016        return result;
2017    }
2018
2019    // Try break-word split
2020    if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
2021        let mut result = vec![first];
2022        result.extend(cascade_split_line(
2023            &rest,
2024            line_length,
2025            abbreviations,
2026            length_mode,
2027            attr_lists,
2028        ));
2029        return result;
2030    }
2031
2032    // Fallback: word wrap using existing reflow_elements
2033    let options = ReflowOptions {
2034        line_length,
2035        break_on_sentences: false,
2036        preserve_breaks: false,
2037        sentence_per_line: false,
2038        semantic_line_breaks: false,
2039        abbreviations: abbreviations.clone(),
2040        length_mode,
2041        attr_lists,
2042        require_sentence_capital: true,
2043        max_list_continuation_indent: None,
2044    };
2045    reflow_elements(&elements, &options)
2046}
2047
2048/// Reflow elements using semantic line breaks strategy:
2049/// 1. Split at sentence boundaries (always)
2050/// 2. For lines exceeding line_length, cascade through clause punct → break-words → word wrap
2051fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
2052    // Step 1: Split into sentences using existing sentence-per-line logic
2053    let sentence_lines =
2054        reflow_elements_sentence_per_line(elements, &options.abbreviations, options.require_sentence_capital);
2055
2056    // Step 2: For each sentence line, apply cascading splits if it exceeds line_length
2057    // When line_length is 0 (unlimited), skip cascading — sentence splits only
2058    if options.line_length == 0 {
2059        return sentence_lines;
2060    }
2061
2062    let length_mode = options.length_mode;
2063    let mut result = Vec::new();
2064    for line in sentence_lines {
2065        if display_len(&line, length_mode) <= options.line_length {
2066            result.push(line);
2067        } else {
2068            result.extend(cascade_split_line(
2069                &line,
2070                options.line_length,
2071                &options.abbreviations,
2072                length_mode,
2073                options.attr_lists,
2074            ));
2075        }
2076    }
2077
2078    // Step 3: Merge very short trailing lines back into the previous line.
2079    // Word wrap can produce lines like "was" or "see" on their own, which reads poorly.
2080    let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
2081    let mut merged: Vec<String> = Vec::with_capacity(result.len());
2082    for line in result {
2083        if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
2084            // Don't merge a line that is itself a standalone parenthetical group —
2085            // it was placed on its own line intentionally by split_at_parenthetical.
2086            if is_standalone_parenthetical(&line) {
2087                merged.push(line);
2088                continue;
2089            }
2090
2091            // Don't merge across sentence boundaries — sentence splits are intentional
2092            let prev_ends_at_sentence = {
2093                let trimmed = merged.last().unwrap().trim_end();
2094                trimmed
2095                    .chars()
2096                    .rev()
2097                    .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
2098                    .is_some_and(|c| matches!(c, '.' | '!' | '?'))
2099            };
2100
2101            if !prev_ends_at_sentence {
2102                let prev = merged.last_mut().unwrap();
2103                let combined = format!("{prev} {line}");
2104                // Only merge if the combined line fits within the limit
2105                if display_len(&combined, length_mode) <= options.line_length {
2106                    *prev = combined;
2107                    continue;
2108                }
2109            }
2110        }
2111        merged.push(line);
2112    }
2113    merged
2114}
2115
2116/// Find the last space in `line` that is safe to split at.
2117/// Safe spaces are those NOT inside rendered non-Text elements.
2118/// `element_spans` contains (start, end) byte ranges of non-Text elements in the line.
2119/// Find the last space in `line` that is not inside any element span.
2120/// Spans use exclusive bounds (pos > start && pos < end) because element
2121/// delimiters (e.g., `[`, `]`, `(`, `)`, `<`, `>`, `` ` ``) are never
2122/// spaces, so only interior positions need protection.
2123fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
2124    line.char_indices()
2125        .rev()
2126        .map(|(pos, _)| pos)
2127        .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
2128}
2129
2130/// Reflow elements into lines that fit within the line length
2131fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
2132    let mut lines = Vec::new();
2133    let mut current_line = String::new();
2134    let mut current_length = 0;
2135    // Track byte spans of non-Text elements in current_line for safe splitting
2136    let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
2137    let length_mode = options.length_mode;
2138
2139    for (idx, element) in elements.iter().enumerate() {
2140        let element_str = format!("{element}");
2141        let element_len = element.display_width(length_mode);
2142
2143        // Determine adjacency from the original elements, not from current_line.
2144        // Elements are adjacent when there's no whitespace between them in the source:
2145        // - Text("v") → HugoShortcode("{{<...>}}") = adjacent (text has no trailing space)
2146        // - Text(" and ") → InlineLink("[a](url)") = NOT adjacent (text has trailing space)
2147        // - HugoShortcode("{{<...>}}") → Text(",") = adjacent (text has no leading space)
2148        let is_adjacent_to_prev = if idx > 0 {
2149            match (&elements[idx - 1], element) {
2150                (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
2151                (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
2152                _ => true,
2153            }
2154        } else {
2155            false
2156        };
2157
2158        // For text elements that might need breaking
2159        if let Element::Text(text) = element {
2160            // Check if original text had leading whitespace
2161            let has_leading_space = text.starts_with(char::is_whitespace);
2162            // If this is a text element, always process it word by word
2163            let words: Vec<&str> = text.split_whitespace().collect();
2164
2165            for (i, word) in words.iter().enumerate() {
2166                let word_len = display_len(word, length_mode);
2167                // Check if this "word" is just punctuation that should stay attached
2168                let is_trailing_punct = word
2169                    .chars()
2170                    .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
2171
2172                // First word of text adjacent to preceding non-text element
2173                // must stay attached (e.g., shortcode followed by punctuation or text)
2174                let is_first_adjacent = i == 0 && is_adjacent_to_prev;
2175
2176                if is_first_adjacent {
2177                    // Attach directly without space, preventing line break
2178                    if current_length + word_len > options.line_length && current_length > 0 {
2179                        // Would exceed — break before the adjacent group
2180                        // Use element-aware space search to avoid splitting inside links/code/etc.
2181                        if let Some(last_space) = rfind_safe_space(&current_line, &current_line_element_spans) {
2182                            let before = current_line[..last_space].trim_end().to_string();
2183                            let after = current_line[last_space + 1..].to_string();
2184                            lines.push(before);
2185                            current_line = format!("{after}{word}");
2186                            current_length = display_len(&current_line, length_mode);
2187                            current_line_element_spans.clear();
2188                        } else {
2189                            current_line.push_str(word);
2190                            current_length += word_len;
2191                        }
2192                    } else {
2193                        current_line.push_str(word);
2194                        current_length += word_len;
2195                    }
2196                } else if current_length > 0
2197                    && current_length + 1 + word_len > options.line_length
2198                    && !is_trailing_punct
2199                {
2200                    // Start a new line (but never for trailing punctuation)
2201                    lines.push(current_line.trim().to_string());
2202                    current_line = word.to_string();
2203                    current_length = word_len;
2204                    current_line_element_spans.clear();
2205                } else {
2206                    // Add word to current line
2207                    // Only add space if: we have content AND (this isn't the first word OR original had leading space)
2208                    // AND this isn't trailing punctuation (which attaches directly)
2209                    if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
2210                        current_line.push(' ');
2211                        current_length += 1;
2212                    }
2213                    current_line.push_str(word);
2214                    current_length += word_len;
2215                }
2216            }
2217        } else if matches!(
2218            element,
2219            Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
2220        ) && element_len > options.line_length
2221        {
2222            // Italic, bold, and strikethrough with content longer than line_length need word wrapping.
2223            // Split content word-by-word, attach the opening marker to the first word
2224            // and the closing marker to the last word.
2225            let (content, marker): (&str, &str) = match element {
2226                Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
2227                Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
2228                Element::Strikethrough(content) => (content.as_str(), "~~"),
2229                _ => unreachable!(),
2230            };
2231
2232            let words: Vec<&str> = content.split_whitespace().collect();
2233            let n = words.len();
2234
2235            if n == 0 {
2236                // Empty span — treat as atomic
2237                let full = format!("{marker}{marker}");
2238                let full_len = display_len(&full, length_mode);
2239                if !is_adjacent_to_prev && current_length > 0 {
2240                    current_line.push(' ');
2241                    current_length += 1;
2242                }
2243                current_line.push_str(&full);
2244                current_length += full_len;
2245            } else {
2246                for (i, word) in words.iter().enumerate() {
2247                    let is_first = i == 0;
2248                    let is_last = i == n - 1;
2249                    let word_str: String = match (is_first, is_last) {
2250                        (true, true) => format!("{marker}{word}{marker}"),
2251                        (true, false) => format!("{marker}{word}"),
2252                        (false, true) => format!("{word}{marker}"),
2253                        (false, false) => word.to_string(),
2254                    };
2255                    let word_len = display_len(&word_str, length_mode);
2256
2257                    let needs_space = if is_first {
2258                        !is_adjacent_to_prev && current_length > 0
2259                    } else {
2260                        current_length > 0
2261                    };
2262
2263                    if needs_space && current_length + 1 + word_len > options.line_length {
2264                        lines.push(current_line.trim_end().to_string());
2265                        current_line = word_str;
2266                        current_length = word_len;
2267                        current_line_element_spans.clear();
2268                    } else {
2269                        if needs_space {
2270                            current_line.push(' ');
2271                            current_length += 1;
2272                        }
2273                        current_line.push_str(&word_str);
2274                        current_length += word_len;
2275                    }
2276                }
2277            }
2278        } else {
2279            // For non-text elements (code, links, references), treat as atomic units
2280            // These should never be broken across lines
2281
2282            if is_adjacent_to_prev {
2283                // Adjacent to preceding text — attach directly without space
2284                if current_length + element_len > options.line_length {
2285                    // Would exceed limit — break before the adjacent word group
2286                    // Use element-aware space search to avoid splitting inside links/code/etc.
2287                    if let Some(last_space) = rfind_safe_space(&current_line, &current_line_element_spans) {
2288                        let before = current_line[..last_space].trim_end().to_string();
2289                        let after = current_line[last_space + 1..].to_string();
2290                        lines.push(before);
2291                        current_line = format!("{after}{element_str}");
2292                        current_length = display_len(&current_line, length_mode);
2293                        current_line_element_spans.clear();
2294                        // Record the element span in the new current_line
2295                        let start = after.len();
2296                        current_line_element_spans.push((start, start + element_str.len()));
2297                    } else {
2298                        // No safe space to break at — accept the long line
2299                        let start = current_line.len();
2300                        current_line.push_str(&element_str);
2301                        current_length += element_len;
2302                        current_line_element_spans.push((start, current_line.len()));
2303                    }
2304                } else {
2305                    let start = current_line.len();
2306                    current_line.push_str(&element_str);
2307                    current_length += element_len;
2308                    current_line_element_spans.push((start, current_line.len()));
2309                }
2310            } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
2311                // Not adjacent, would exceed — start new line
2312                lines.push(current_line.trim().to_string());
2313                current_line = element_str.clone();
2314                current_length = element_len;
2315                current_line_element_spans.clear();
2316                current_line_element_spans.push((0, element_str.len()));
2317            } else {
2318                // Not adjacent, fits — add with space
2319                let ends_with_opener =
2320                    current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
2321                if current_length > 0 && !ends_with_opener {
2322                    current_line.push(' ');
2323                    current_length += 1;
2324                }
2325                let start = current_line.len();
2326                current_line.push_str(&element_str);
2327                current_length += element_len;
2328                current_line_element_spans.push((start, current_line.len()));
2329            }
2330        }
2331    }
2332
2333    // Don't forget the last line
2334    if !current_line.is_empty() {
2335        lines.push(current_line.trim_end().to_string());
2336    }
2337
2338    lines
2339}
2340
2341/// Reflow markdown content preserving structure
2342pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
2343    let lines: Vec<&str> = content.lines().collect();
2344    let mut result = Vec::new();
2345    let mut i = 0;
2346
2347    while i < lines.len() {
2348        let line = lines[i];
2349        let trimmed = line.trim();
2350
2351        // Preserve empty lines
2352        if trimmed.is_empty() {
2353            result.push(String::new());
2354            i += 1;
2355            continue;
2356        }
2357
2358        // Preserve headings as-is
2359        if trimmed.starts_with('#') {
2360            result.push(line.to_string());
2361            i += 1;
2362            continue;
2363        }
2364
2365        // Preserve Quarto/Pandoc div markers (:::) as-is
2366        if trimmed.starts_with(":::") {
2367            result.push(line.to_string());
2368            i += 1;
2369            continue;
2370        }
2371
2372        // Preserve fenced code blocks
2373        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2374            result.push(line.to_string());
2375            i += 1;
2376            // Copy lines until closing fence
2377            while i < lines.len() {
2378                result.push(lines[i].to_string());
2379                if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
2380                    i += 1;
2381                    break;
2382                }
2383                i += 1;
2384            }
2385            continue;
2386        }
2387
2388        // Preserve indented code blocks (4+ columns accounting for tab expansion)
2389        if calculate_indentation_width_default(line) >= 4 {
2390            // Collect all consecutive indented lines
2391            result.push(line.to_string());
2392            i += 1;
2393            while i < lines.len() {
2394                let next_line = lines[i];
2395                // Continue if next line is also indented or empty (empty lines in code blocks are ok)
2396                if calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
2397                    result.push(next_line.to_string());
2398                    i += 1;
2399                } else {
2400                    break;
2401                }
2402            }
2403            continue;
2404        }
2405
2406        // Preserve block quotes (but reflow their content)
2407        if trimmed.starts_with('>') {
2408            // find() returns byte position which is correct for str slicing
2409            // The unwrap is safe because we already verified trimmed starts with '>'
2410            let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
2411            let quote_prefix = line[0..gt_pos + 1].to_string();
2412            let quote_content = &line[quote_prefix.len()..].trim_start();
2413
2414            let reflowed = reflow_line(quote_content, options);
2415            for reflowed_line in reflowed.iter() {
2416                result.push(format!("{quote_prefix} {reflowed_line}"));
2417            }
2418            i += 1;
2419            continue;
2420        }
2421
2422        // Preserve horizontal rules first (before checking for lists)
2423        if is_horizontal_rule(trimmed) {
2424            result.push(line.to_string());
2425            i += 1;
2426            continue;
2427        }
2428
2429        // Preserve lists (but not horizontal rules)
2430        if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
2431            // Find the list marker and preserve indentation
2432            let indent = line.len() - line.trim_start().len();
2433            let indent_str = " ".repeat(indent);
2434
2435            // For numbered lists, find the period and the space after it
2436            // For bullet lists, find the marker and the space after it
2437            let mut marker_end = indent;
2438            let mut content_start = indent;
2439
2440            if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
2441                // Numbered list: find the period
2442                if let Some(period_pos) = line[indent..].find('.') {
2443                    marker_end = indent + period_pos + 1; // Include the period
2444                    content_start = marker_end;
2445                    // Skip any spaces after the period to find content start
2446                    // Use byte-based check since content_start is a byte index
2447                    // This is safe because space is ASCII (single byte)
2448                    while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2449                        content_start += 1;
2450                    }
2451                }
2452            } else {
2453                // Bullet list: marker is single character
2454                marker_end = indent + 1; // Just the marker character
2455                content_start = marker_end;
2456                // Skip any spaces after the marker
2457                // Use byte-based check since content_start is a byte index
2458                // This is safe because space is ASCII (single byte)
2459                while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2460                    content_start += 1;
2461                }
2462            }
2463
2464            // Minimum indent for continuation lines (based on list marker, before checkbox)
2465            let min_continuation_indent = content_start;
2466
2467            // Detect checkbox/task list markers: [ ], [x], [X]
2468            // GFM task lists work with both unordered and ordered lists
2469            let rest = &line[content_start..];
2470            if rest.starts_with("[ ] ") || rest.starts_with("[x] ") || rest.starts_with("[X] ") {
2471                marker_end = content_start + 3; // Include the checkbox `[ ]`
2472                content_start += 4; // Skip past `[ ] `
2473            }
2474
2475            let marker = &line[indent..marker_end];
2476
2477            // Collect all content for this list item (including continuation lines)
2478            // Preserve hard breaks (2 trailing spaces) while trimming excessive whitespace
2479            let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
2480            i += 1;
2481
2482            // Collect continuation lines (indented lines that are part of this list item)
2483            // Use the base marker indent (not checkbox-extended) for collection,
2484            // since users may indent continuations to the bullet level, not the checkbox level
2485            while i < lines.len() {
2486                let next_line = lines[i];
2487                let next_trimmed = next_line.trim();
2488
2489                // Stop if we hit an empty line or another list item or special block
2490                if is_block_boundary(next_trimmed) {
2491                    break;
2492                }
2493
2494                // Check if this line is indented (continuation of list item)
2495                let next_indent = next_line.len() - next_line.trim_start().len();
2496                if next_indent >= min_continuation_indent {
2497                    // This is a continuation line - add its content
2498                    // Preserve hard breaks while trimming excessive whitespace
2499                    let trimmed_start = next_line.trim_start();
2500                    list_content.push(trim_preserving_hard_break(trimmed_start));
2501                    i += 1;
2502                } else {
2503                    // Not indented enough, not part of this list item
2504                    break;
2505                }
2506            }
2507
2508            // Join content, but respect hard breaks (lines ending with 2 spaces or backslash)
2509            // Hard breaks should prevent joining with the next line
2510            let combined_content = if options.preserve_breaks {
2511                list_content[0].clone()
2512            } else {
2513                // Check if any lines have hard breaks - if so, preserve the structure
2514                let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2515                if has_hard_breaks {
2516                    // Don't join lines with hard breaks - keep them separate with newlines
2517                    list_content.join("\n")
2518                } else {
2519                    // No hard breaks, safe to join with spaces
2520                    list_content.join(" ")
2521                }
2522            };
2523
2524            // Calculate the proper indentation for continuation lines
2525            let trimmed_marker = marker;
2526            let continuation_spaces = if let Some(max_indent) = options.max_list_continuation_indent {
2527                // Cap the relative indent (past the nesting level) to max_indent,
2528                // then add back the nesting indent so nested items stay correct
2529                indent + (content_start - indent).min(max_indent)
2530            } else {
2531                content_start
2532            };
2533
2534            // Adjust line length to account for list marker and space
2535            let prefix_length = indent + trimmed_marker.len() + 1;
2536
2537            // Create adjusted options with reduced line length
2538            let adjusted_options = ReflowOptions {
2539                line_length: options.line_length.saturating_sub(prefix_length),
2540                ..options.clone()
2541            };
2542
2543            let reflowed = reflow_line(&combined_content, &adjusted_options);
2544            for (j, reflowed_line) in reflowed.iter().enumerate() {
2545                if j == 0 {
2546                    result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2547                } else {
2548                    // Continuation lines aligned with text after marker
2549                    let continuation_indent = " ".repeat(continuation_spaces);
2550                    result.push(format!("{continuation_indent}{reflowed_line}"));
2551                }
2552            }
2553            continue;
2554        }
2555
2556        // Preserve tables
2557        if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2558            result.push(line.to_string());
2559            i += 1;
2560            continue;
2561        }
2562
2563        // Preserve reference definitions
2564        if trimmed.starts_with('[') && line.contains("]:") {
2565            result.push(line.to_string());
2566            i += 1;
2567            continue;
2568        }
2569
2570        // Preserve definition list items (extended markdown)
2571        if is_definition_list_item(trimmed) {
2572            result.push(line.to_string());
2573            i += 1;
2574            continue;
2575        }
2576
2577        // Check if this is a single line that doesn't need processing
2578        let mut is_single_line_paragraph = true;
2579        if i + 1 < lines.len() {
2580            let next_trimmed = lines[i + 1].trim();
2581            // Check if next line continues this paragraph
2582            if !is_block_boundary(next_trimmed) {
2583                is_single_line_paragraph = false;
2584            }
2585        }
2586
2587        // If it's a single line that fits, just add it as-is
2588        if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2589            result.push(line.to_string());
2590            i += 1;
2591            continue;
2592        }
2593
2594        // For regular paragraphs, collect consecutive lines
2595        let mut paragraph_parts = Vec::new();
2596        let mut current_part = vec![line];
2597        i += 1;
2598
2599        // If preserve_breaks is true, treat each line separately
2600        if options.preserve_breaks {
2601            // Don't collect consecutive lines - just reflow this single line
2602            let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2603                Some("\\")
2604            } else if line.ends_with("  ") {
2605                Some("  ")
2606            } else {
2607                None
2608            };
2609            let reflowed = reflow_line(line, options);
2610
2611            // Preserve hard breaks (two trailing spaces or backslash)
2612            if let Some(break_marker) = hard_break_type {
2613                if !reflowed.is_empty() {
2614                    let mut reflowed_with_break = reflowed;
2615                    let last_idx = reflowed_with_break.len() - 1;
2616                    if !has_hard_break(&reflowed_with_break[last_idx]) {
2617                        reflowed_with_break[last_idx].push_str(break_marker);
2618                    }
2619                    result.extend(reflowed_with_break);
2620                }
2621            } else {
2622                result.extend(reflowed);
2623            }
2624        } else {
2625            // Original behavior: collect consecutive lines into a paragraph
2626            while i < lines.len() {
2627                let prev_line = if !current_part.is_empty() {
2628                    current_part.last().unwrap()
2629                } else {
2630                    ""
2631                };
2632                let next_line = lines[i];
2633                let next_trimmed = next_line.trim();
2634
2635                // Stop at empty lines or special blocks
2636                if is_block_boundary(next_trimmed) {
2637                    break;
2638                }
2639
2640                // Check if previous line ends with hard break (two spaces or backslash)
2641                // or is a complete sentence in sentence_per_line mode
2642                let prev_trimmed = prev_line.trim();
2643                let abbreviations = get_abbreviations(&options.abbreviations);
2644                let ends_with_sentence = (prev_trimmed.ends_with('.')
2645                    || prev_trimmed.ends_with('!')
2646                    || prev_trimmed.ends_with('?')
2647                    || prev_trimmed.ends_with(".*")
2648                    || prev_trimmed.ends_with("!*")
2649                    || prev_trimmed.ends_with("?*")
2650                    || prev_trimmed.ends_with("._")
2651                    || prev_trimmed.ends_with("!_")
2652                    || prev_trimmed.ends_with("?_")
2653                    // Quote-terminated sentences (straight and curly quotes)
2654                    || prev_trimmed.ends_with(".\"")
2655                    || prev_trimmed.ends_with("!\"")
2656                    || prev_trimmed.ends_with("?\"")
2657                    || prev_trimmed.ends_with(".'")
2658                    || prev_trimmed.ends_with("!'")
2659                    || prev_trimmed.ends_with("?'")
2660                    || prev_trimmed.ends_with(".\u{201D}")
2661                    || prev_trimmed.ends_with("!\u{201D}")
2662                    || prev_trimmed.ends_with("?\u{201D}")
2663                    || prev_trimmed.ends_with(".\u{2019}")
2664                    || prev_trimmed.ends_with("!\u{2019}")
2665                    || prev_trimmed.ends_with("?\u{2019}"))
2666                    && !text_ends_with_abbreviation(
2667                        prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2668                        &abbreviations,
2669                    );
2670
2671                if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2672                    // Start a new part after hard break or complete sentence
2673                    paragraph_parts.push(current_part.join(" "));
2674                    current_part = vec![next_line];
2675                } else {
2676                    current_part.push(next_line);
2677                }
2678                i += 1;
2679            }
2680
2681            // Add the last part
2682            if !current_part.is_empty() {
2683                if current_part.len() == 1 {
2684                    // Single line, don't add trailing space
2685                    paragraph_parts.push(current_part[0].to_string());
2686                } else {
2687                    paragraph_parts.push(current_part.join(" "));
2688                }
2689            }
2690
2691            // Reflow each part separately, preserving hard breaks
2692            for (j, part) in paragraph_parts.iter().enumerate() {
2693                let reflowed = reflow_line(part, options);
2694                result.extend(reflowed);
2695
2696                // Preserve hard break by ensuring last line of part ends with hard break marker
2697                // Use two spaces as the default hard break format for reflows
2698                // But don't add hard breaks in sentence_per_line mode - lines are already separate
2699                if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2700                    let last_idx = result.len() - 1;
2701                    if !has_hard_break(&result[last_idx]) {
2702                        result[last_idx].push_str("  ");
2703                    }
2704                }
2705            }
2706        }
2707    }
2708
2709    // Preserve trailing newline if the original content had one
2710    let result_text = result.join("\n");
2711    if content.ends_with('\n') && !result_text.ends_with('\n') {
2712        format!("{result_text}\n")
2713    } else {
2714        result_text
2715    }
2716}
2717
2718/// Information about a reflowed paragraph
2719#[derive(Debug, Clone)]
2720pub struct ParagraphReflow {
2721    /// Starting byte offset of the paragraph in the original content
2722    pub start_byte: usize,
2723    /// Ending byte offset of the paragraph in the original content
2724    pub end_byte: usize,
2725    /// The reflowed text for this paragraph
2726    pub reflowed_text: String,
2727}
2728
2729/// A collected blockquote line used for style-preserving reflow.
2730///
2731/// The invariant `is_explicit == true` iff `prefix.is_some()` is enforced by the
2732/// constructors. Use [`BlockquoteLineData::explicit`] or [`BlockquoteLineData::lazy`]
2733/// rather than constructing the struct directly.
2734#[derive(Debug, Clone)]
2735pub struct BlockquoteLineData {
2736    /// Trimmed content without the `> ` prefix.
2737    pub(crate) content: String,
2738    /// Whether this line carries an explicit blockquote marker.
2739    pub(crate) is_explicit: bool,
2740    /// Full blockquote prefix (e.g. `"> "`, `"> > "`). `None` for lazy continuation lines.
2741    pub(crate) prefix: Option<String>,
2742}
2743
2744impl BlockquoteLineData {
2745    /// Create an explicit (marker-bearing) blockquote line.
2746    pub fn explicit(content: String, prefix: String) -> Self {
2747        Self {
2748            content,
2749            is_explicit: true,
2750            prefix: Some(prefix),
2751        }
2752    }
2753
2754    /// Create a lazy continuation line (no blockquote marker).
2755    pub fn lazy(content: String) -> Self {
2756        Self {
2757            content,
2758            is_explicit: false,
2759            prefix: None,
2760        }
2761    }
2762}
2763
2764/// Style for blockquote continuation lines after reflow.
2765#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2766pub enum BlockquoteContinuationStyle {
2767    Explicit,
2768    Lazy,
2769}
2770
2771/// Determine the continuation style for a blockquote paragraph from its collected lines.
2772///
2773/// The first line is always explicit (it carries the marker), so only continuation
2774/// lines (index 1+) are counted. Ties resolve to `Explicit`.
2775///
2776/// When the slice has only one element (no continuation lines to inspect), both
2777/// counts are zero and the tie-breaking rule returns `Explicit`.
2778pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
2779    let mut explicit_count = 0usize;
2780    let mut lazy_count = 0usize;
2781
2782    for line in lines.iter().skip(1) {
2783        if line.is_explicit {
2784            explicit_count += 1;
2785        } else {
2786            lazy_count += 1;
2787        }
2788    }
2789
2790    if explicit_count > 0 && lazy_count == 0 {
2791        BlockquoteContinuationStyle::Explicit
2792    } else if lazy_count > 0 && explicit_count == 0 {
2793        BlockquoteContinuationStyle::Lazy
2794    } else if explicit_count >= lazy_count {
2795        BlockquoteContinuationStyle::Explicit
2796    } else {
2797        BlockquoteContinuationStyle::Lazy
2798    }
2799}
2800
2801/// Determine the dominant blockquote prefix for a paragraph.
2802///
2803/// The most frequently occurring explicit prefix wins. Ties are broken by earliest
2804/// first appearance. Falls back to `fallback` when no explicit lines are present.
2805pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
2806    let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
2807
2808    for (idx, line) in lines.iter().enumerate() {
2809        let Some(prefix) = line.prefix.as_ref() else {
2810            continue;
2811        };
2812        counts
2813            .entry(prefix.clone())
2814            .and_modify(|entry| entry.0 += 1)
2815            .or_insert((1, idx));
2816    }
2817
2818    counts
2819        .into_iter()
2820        .max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
2821            count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
2822        })
2823        .map(|(prefix, _)| prefix)
2824        .unwrap_or_else(|| fallback.to_string())
2825}
2826
2827/// Whether a reflowed blockquote content line must carry an explicit prefix.
2828///
2829/// Lines that would start a new block structure (headings, fences, lists, etc.)
2830/// cannot safely use lazy continuation syntax.
2831pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
2832    let trimmed = content_line.trim_start();
2833    trimmed.starts_with('>')
2834        || trimmed.starts_with('#')
2835        || trimmed.starts_with("```")
2836        || trimmed.starts_with("~~~")
2837        || is_unordered_list_marker(trimmed)
2838        || is_numbered_list_item(trimmed)
2839        || is_horizontal_rule(trimmed)
2840        || is_definition_list_item(trimmed)
2841        || (trimmed.starts_with('[') && trimmed.contains("]:"))
2842        || trimmed.starts_with(":::")
2843        || (trimmed.starts_with('<')
2844            && !trimmed.starts_with("<http")
2845            && !trimmed.starts_with("<https")
2846            && !trimmed.starts_with("<mailto:"))
2847}
2848
2849/// Reflow blockquote content lines and apply continuation style.
2850///
2851/// Segments separated by hard breaks are reflowed independently. The output lines
2852/// receive blockquote prefixes according to `continuation_style`: the first line and
2853/// any line that would start a new block structure always get an explicit prefix;
2854/// other lines follow the detected style.
2855///
2856/// Returns the styled, reflowed lines (without a trailing newline).
2857pub fn reflow_blockquote_content(
2858    lines: &[BlockquoteLineData],
2859    explicit_prefix: &str,
2860    continuation_style: BlockquoteContinuationStyle,
2861    options: &ReflowOptions,
2862) -> Vec<String> {
2863    let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
2864    let segments = split_into_segments_strs(&content_strs);
2865    let mut reflowed_content_lines: Vec<String> = Vec::new();
2866
2867    for segment in segments {
2868        let hard_break_type = segment.last().and_then(|&line| {
2869            let line = line.strip_suffix('\r').unwrap_or(line);
2870            if line.ends_with('\\') {
2871                Some("\\")
2872            } else if line.ends_with("  ") {
2873                Some("  ")
2874            } else {
2875                None
2876            }
2877        });
2878
2879        let pieces: Vec<&str> = segment
2880            .iter()
2881            .map(|&line| {
2882                if let Some(l) = line.strip_suffix('\\') {
2883                    l.trim_end()
2884                } else if let Some(l) = line.strip_suffix("  ") {
2885                    l.trim_end()
2886                } else {
2887                    line.trim_end()
2888                }
2889            })
2890            .collect();
2891
2892        let segment_text = pieces.join(" ");
2893        let segment_text = segment_text.trim();
2894        if segment_text.is_empty() {
2895            continue;
2896        }
2897
2898        let mut reflowed = reflow_line(segment_text, options);
2899        if let Some(break_marker) = hard_break_type
2900            && !reflowed.is_empty()
2901        {
2902            let last_idx = reflowed.len() - 1;
2903            if !has_hard_break(&reflowed[last_idx]) {
2904                reflowed[last_idx].push_str(break_marker);
2905            }
2906        }
2907        reflowed_content_lines.extend(reflowed);
2908    }
2909
2910    let mut styled_lines: Vec<String> = Vec::new();
2911    for (idx, line) in reflowed_content_lines.iter().enumerate() {
2912        let force_explicit = idx == 0
2913            || continuation_style == BlockquoteContinuationStyle::Explicit
2914            || should_force_explicit_blockquote_line(line);
2915        if force_explicit {
2916            styled_lines.push(format!("{explicit_prefix}{line}"));
2917        } else {
2918            styled_lines.push(line.clone());
2919        }
2920    }
2921
2922    styled_lines
2923}
2924
2925fn is_blockquote_content_boundary(content: &str) -> bool {
2926    let trimmed = content.trim();
2927    trimmed.is_empty()
2928        || is_block_boundary(trimmed)
2929        || crate::utils::table_utils::TableUtils::is_potential_table_row(content)
2930        || trimmed.starts_with(":::")
2931        || crate::utils::is_template_directive_only(content)
2932        || is_standalone_attr_list(content)
2933        || is_snippet_block_delimiter(content)
2934}
2935
2936fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
2937    let mut segments = Vec::new();
2938    let mut current = Vec::new();
2939
2940    for &line in lines {
2941        current.push(line);
2942        if has_hard_break(line) {
2943            segments.push(current);
2944            current = Vec::new();
2945        }
2946    }
2947
2948    if !current.is_empty() {
2949        segments.push(current);
2950    }
2951
2952    segments
2953}
2954
2955fn reflow_blockquote_paragraph_at_line(
2956    content: &str,
2957    lines: &[&str],
2958    target_idx: usize,
2959    options: &ReflowOptions,
2960) -> Option<ParagraphReflow> {
2961    let mut anchor_idx = target_idx;
2962    let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
2963        parsed.nesting_level
2964    } else {
2965        let mut found = None;
2966        let mut idx = target_idx;
2967        loop {
2968            if lines[idx].trim().is_empty() {
2969                break;
2970            }
2971            if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
2972                found = Some((idx, parsed.nesting_level));
2973                break;
2974            }
2975            if idx == 0 {
2976                break;
2977            }
2978            idx -= 1;
2979        }
2980        let (idx, level) = found?;
2981        anchor_idx = idx;
2982        level
2983    };
2984
2985    // Expand backward to capture prior quote content at the same nesting level.
2986    let mut para_start = anchor_idx;
2987    while para_start > 0 {
2988        let prev_idx = para_start - 1;
2989        let prev_line = lines[prev_idx];
2990
2991        if prev_line.trim().is_empty() {
2992            break;
2993        }
2994
2995        if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
2996            if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2997                break;
2998            }
2999            para_start = prev_idx;
3000            continue;
3001        }
3002
3003        let prev_lazy = prev_line.trim_start();
3004        if is_blockquote_content_boundary(prev_lazy) {
3005            break;
3006        }
3007        para_start = prev_idx;
3008    }
3009
3010    // Lazy continuation cannot precede the first explicit marker.
3011    while para_start < lines.len() {
3012        let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
3013            para_start += 1;
3014            continue;
3015        };
3016        target_level = parsed.nesting_level;
3017        break;
3018    }
3019
3020    if para_start >= lines.len() || para_start > target_idx {
3021        return None;
3022    }
3023
3024    // Collect explicit lines at target level and lazy continuation lines.
3025    // Each entry is (original_line_idx, BlockquoteLineData).
3026    let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
3027    let mut idx = para_start;
3028    while idx < lines.len() {
3029        if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
3030            break;
3031        }
3032
3033        let line = lines[idx];
3034        if line.trim().is_empty() {
3035            break;
3036        }
3037
3038        if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
3039            if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
3040                break;
3041            }
3042            collected.push((
3043                idx,
3044                BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
3045            ));
3046            idx += 1;
3047            continue;
3048        }
3049
3050        let lazy_content = line.trim_start();
3051        if is_blockquote_content_boundary(lazy_content) {
3052            break;
3053        }
3054
3055        collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
3056        idx += 1;
3057    }
3058
3059    if collected.is_empty() {
3060        return None;
3061    }
3062
3063    let para_end = collected[collected.len() - 1].0;
3064    if target_idx < para_start || target_idx > para_end {
3065        return None;
3066    }
3067
3068    let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();
3069
3070    let fallback_prefix = line_data
3071        .iter()
3072        .find_map(|d| d.prefix.clone())
3073        .unwrap_or_else(|| "> ".to_string());
3074    let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
3075    let continuation_style = blockquote_continuation_style(&line_data);
3076
3077    let adjusted_line_length = options
3078        .line_length
3079        .saturating_sub(display_len(&explicit_prefix, options.length_mode))
3080        .max(1);
3081
3082    let adjusted_options = ReflowOptions {
3083        line_length: adjusted_line_length,
3084        ..options.clone()
3085    };
3086
3087    let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);
3088
3089    if styled_lines.is_empty() {
3090        return None;
3091    }
3092
3093    // Calculate byte offsets.
3094    let mut start_byte = 0;
3095    for line in lines.iter().take(para_start) {
3096        start_byte += line.len() + 1;
3097    }
3098
3099    let mut end_byte = start_byte;
3100    for line in lines.iter().take(para_end + 1).skip(para_start) {
3101        end_byte += line.len() + 1;
3102    }
3103
3104    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3105    if !includes_trailing_newline {
3106        end_byte -= 1;
3107    }
3108
3109    let reflowed_joined = styled_lines.join("\n");
3110    let reflowed_text = if includes_trailing_newline {
3111        if reflowed_joined.ends_with('\n') {
3112            reflowed_joined
3113        } else {
3114            format!("{reflowed_joined}\n")
3115        }
3116    } else if reflowed_joined.ends_with('\n') {
3117        reflowed_joined.trim_end_matches('\n').to_string()
3118    } else {
3119        reflowed_joined
3120    };
3121
3122    Some(ParagraphReflow {
3123        start_byte,
3124        end_byte,
3125        reflowed_text,
3126    })
3127}
3128
3129/// Reflow a single paragraph at the specified line number
3130///
3131/// This function finds the paragraph containing the given line number,
3132/// reflows it according to the specified line length, and returns
3133/// information about the paragraph location and its reflowed text.
3134///
3135/// # Arguments
3136///
3137/// * `content` - The full document content
3138/// * `line_number` - The 1-based line number within the paragraph to reflow
3139/// * `line_length` - The target line length for reflowing
3140///
3141/// # Returns
3142///
3143/// Returns `Some(ParagraphReflow)` if a paragraph was found and reflowed,
3144/// or `None` if the line number is out of bounds or the content at that
3145/// line shouldn't be reflowed (e.g., code blocks, headings, etc.)
3146pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
3147    reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
3148}
3149
3150/// Reflow a paragraph at the given line with a specific length mode.
3151pub fn reflow_paragraph_at_line_with_mode(
3152    content: &str,
3153    line_number: usize,
3154    line_length: usize,
3155    length_mode: ReflowLengthMode,
3156) -> Option<ParagraphReflow> {
3157    let options = ReflowOptions {
3158        line_length,
3159        length_mode,
3160        ..Default::default()
3161    };
3162    reflow_paragraph_at_line_with_options(content, line_number, &options)
3163}
3164
3165/// Reflow a paragraph at the given line using the provided options.
3166///
3167/// This is the canonical implementation used by both the rule's fix mode and the
3168/// LSP "Reflow paragraph" action. Passing a fully configured `ReflowOptions` allows
3169/// the LSP action to respect user-configured reflow mode, abbreviations, etc.
3170///
3171/// # Returns
3172///
3173/// Returns `Some(ParagraphReflow)` with byte offsets and reflowed text, or `None`
3174/// if the line is out of bounds or sits inside a non-reflow-able construct.
3175pub fn reflow_paragraph_at_line_with_options(
3176    content: &str,
3177    line_number: usize,
3178    options: &ReflowOptions,
3179) -> Option<ParagraphReflow> {
3180    if line_number == 0 {
3181        return None;
3182    }
3183
3184    let lines: Vec<&str> = content.lines().collect();
3185
3186    // Check if line number is valid (1-based)
3187    if line_number > lines.len() {
3188        return None;
3189    }
3190
3191    let target_idx = line_number - 1; // Convert to 0-based
3192    let target_line = lines[target_idx];
3193    let trimmed = target_line.trim();
3194
3195    // Handle blockquote paragraphs (including lazy continuation lines) with
3196    // style-preserving output.
3197    if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
3198        return Some(blockquote_reflow);
3199    }
3200
3201    // Don't reflow special blocks
3202    if is_paragraph_boundary(trimmed, target_line) {
3203        return None;
3204    }
3205
3206    // Find paragraph start - scan backward until blank line or special block
3207    let mut para_start = target_idx;
3208    while para_start > 0 {
3209        let prev_idx = para_start - 1;
3210        let prev_line = lines[prev_idx];
3211        let prev_trimmed = prev_line.trim();
3212
3213        // Stop at blank line or special blocks
3214        if is_paragraph_boundary(prev_trimmed, prev_line) {
3215            break;
3216        }
3217
3218        para_start = prev_idx;
3219    }
3220
3221    // Find paragraph end - scan forward until blank line or special block
3222    let mut para_end = target_idx;
3223    while para_end + 1 < lines.len() {
3224        let next_idx = para_end + 1;
3225        let next_line = lines[next_idx];
3226        let next_trimmed = next_line.trim();
3227
3228        // Stop at blank line or special blocks
3229        if is_paragraph_boundary(next_trimmed, next_line) {
3230            break;
3231        }
3232
3233        para_end = next_idx;
3234    }
3235
3236    // Extract paragraph lines
3237    let paragraph_lines = &lines[para_start..=para_end];
3238
3239    // Calculate byte offsets
3240    let mut start_byte = 0;
3241    for line in lines.iter().take(para_start) {
3242        start_byte += line.len() + 1; // +1 for newline
3243    }
3244
3245    let mut end_byte = start_byte;
3246    for line in paragraph_lines.iter() {
3247        end_byte += line.len() + 1; // +1 for newline
3248    }
3249
3250    // Track whether the byte range includes a trailing newline
3251    // (it doesn't if this is the last line and the file doesn't end with newline)
3252    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3253
3254    // Adjust end_byte if the last line doesn't have a newline
3255    if !includes_trailing_newline {
3256        end_byte -= 1;
3257    }
3258
3259    // Join paragraph lines and reflow
3260    let paragraph_text = paragraph_lines.join("\n");
3261
3262    // Reflow the paragraph using reflow_markdown to handle it properly
3263    let reflowed = reflow_markdown(&paragraph_text, options);
3264
3265    // Ensure reflowed text matches whether the byte range includes a trailing newline
3266    // This is critical: if the range includes a newline, the replacement must too,
3267    // otherwise the next line will get appended to the reflowed paragraph
3268    let reflowed_text = if includes_trailing_newline {
3269        // Range includes newline - ensure reflowed text has one
3270        if reflowed.ends_with('\n') {
3271            reflowed
3272        } else {
3273            format!("{reflowed}\n")
3274        }
3275    } else {
3276        // Range doesn't include newline - ensure reflowed text doesn't have one
3277        if reflowed.ends_with('\n') {
3278            reflowed.trim_end_matches('\n').to_string()
3279        } else {
3280            reflowed
3281        }
3282    };
3283
3284    Some(ParagraphReflow {
3285        start_byte,
3286        end_byte,
3287        reflowed_text,
3288    })
3289}
3290
3291#[cfg(test)]
3292mod tests {
3293    use super::*;
3294
3295    /// Unit test for private helper function text_ends_with_abbreviation()
3296    ///
3297    /// This test stays inline because it tests a private function.
3298    /// All other tests (public API, integration tests) are in tests/utils/text_reflow_test.rs
3299    #[test]
3300    fn test_helper_function_text_ends_with_abbreviation() {
3301        // Test the helper function directly
3302        let abbreviations = get_abbreviations(&None);
3303
3304        // True cases - built-in abbreviations (titles and i.e./e.g.)
3305        assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
3306        assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
3307        assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
3308        assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
3309        assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
3310        assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
3311        assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
3312        assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
3313
3314        // False cases - NOT in built-in list (etc doesn't always have period)
3315        assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
3316        assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
3317        assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
3318        assert!(!text_ends_with_abbreviation("items.", &abbreviations));
3319        assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
3320        assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); // question mark, not period
3321        assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); // exclamation, not period
3322        assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); // question mark
3323        assert!(!text_ends_with_abbreviation("word", &abbreviations)); // no punctuation
3324        assert!(!text_ends_with_abbreviation("", &abbreviations)); // empty string
3325    }
3326
3327    #[test]
3328    fn test_is_unordered_list_marker() {
3329        // Valid unordered list markers
3330        assert!(is_unordered_list_marker("- item"));
3331        assert!(is_unordered_list_marker("* item"));
3332        assert!(is_unordered_list_marker("+ item"));
3333        assert!(is_unordered_list_marker("-")); // lone marker
3334        assert!(is_unordered_list_marker("*"));
3335        assert!(is_unordered_list_marker("+"));
3336
3337        // Not list markers
3338        assert!(!is_unordered_list_marker("---")); // horizontal rule
3339        assert!(!is_unordered_list_marker("***")); // horizontal rule
3340        assert!(!is_unordered_list_marker("- - -")); // horizontal rule
3341        assert!(!is_unordered_list_marker("* * *")); // horizontal rule
3342        assert!(!is_unordered_list_marker("*emphasis*")); // emphasis, not list
3343        assert!(!is_unordered_list_marker("-word")); // no space after marker
3344        assert!(!is_unordered_list_marker("")); // empty
3345        assert!(!is_unordered_list_marker("text")); // plain text
3346        assert!(!is_unordered_list_marker("# heading")); // heading
3347    }
3348
3349    #[test]
3350    fn test_is_block_boundary() {
3351        // Block boundaries
3352        assert!(is_block_boundary("")); // empty line
3353        assert!(is_block_boundary("# Heading")); // ATX heading
3354        assert!(is_block_boundary("## Level 2")); // ATX heading
3355        assert!(is_block_boundary("```rust")); // code fence
3356        assert!(is_block_boundary("~~~")); // tilde code fence
3357        assert!(is_block_boundary("> quote")); // blockquote
3358        assert!(is_block_boundary("| cell |")); // table
3359        assert!(is_block_boundary("[link]: http://example.com")); // reference def
3360        assert!(is_block_boundary("---")); // horizontal rule
3361        assert!(is_block_boundary("***")); // horizontal rule
3362        assert!(is_block_boundary("- item")); // unordered list
3363        assert!(is_block_boundary("* item")); // unordered list
3364        assert!(is_block_boundary("+ item")); // unordered list
3365        assert!(is_block_boundary("1. item")); // ordered list
3366        assert!(is_block_boundary("10. item")); // ordered list
3367        assert!(is_block_boundary(": definition")); // definition list
3368        assert!(is_block_boundary(":::")); // div marker
3369        assert!(is_block_boundary("::::: {.callout-note}")); // div marker with attrs
3370
3371        // NOT block boundaries (paragraph continuation)
3372        assert!(!is_block_boundary("regular text"));
3373        assert!(!is_block_boundary("*emphasis*")); // emphasis, not list
3374        assert!(!is_block_boundary("[link](url)")); // inline link, not reference def
3375        assert!(!is_block_boundary("some words here"));
3376    }
3377
3378    #[test]
3379    fn test_definition_list_boundary_in_single_line_paragraph() {
3380        // Verifies that a definition list item after a single-line paragraph
3381        // is treated as a block boundary, not merged into the paragraph
3382        let options = ReflowOptions {
3383            line_length: 80,
3384            ..Default::default()
3385        };
3386        let input = "Term\n: Definition of the term";
3387        let result = reflow_markdown(input, &options);
3388        // The definition list marker should remain on its own line
3389        assert!(
3390            result.contains(": Definition"),
3391            "Definition list item should not be merged into previous line. Got: {result:?}"
3392        );
3393        let lines: Vec<&str> = result.lines().collect();
3394        assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
3395        assert_eq!(lines[0], "Term");
3396        assert_eq!(lines[1], ": Definition of the term");
3397    }
3398
3399    #[test]
3400    fn test_is_paragraph_boundary() {
3401        // Core block boundary checks are inherited
3402        assert!(is_paragraph_boundary("# Heading", "# Heading"));
3403        assert!(is_paragraph_boundary("- item", "- item"));
3404        assert!(is_paragraph_boundary(":::", ":::"));
3405        assert!(is_paragraph_boundary(": definition", ": definition"));
3406
3407        // Indented code blocks (≥4 spaces or tab)
3408        assert!(is_paragraph_boundary("code", "    code"));
3409        assert!(is_paragraph_boundary("code", "\tcode"));
3410
3411        // Table rows via is_potential_table_row
3412        assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
3413        assert!(is_paragraph_boundary("a | b", "a | b")); // pipe-delimited without leading pipe
3414
3415        // Not paragraph boundaries
3416        assert!(!is_paragraph_boundary("regular text", "regular text"));
3417        assert!(!is_paragraph_boundary("text", "  text")); // 2-space indent is not code
3418    }
3419
3420    #[test]
3421    fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
3422        // Verifies that div markers (:::) are treated as paragraph boundaries
3423        // in reflow_paragraph_at_line, preventing reflow across div boundaries
3424        let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
3425        // Line 3 is the div marker — should not be reflowed
3426        let result = reflow_paragraph_at_line(content, 3, 80);
3427        assert!(result.is_none(), "Div marker line should not be reflowed");
3428    }
3429}
rumdl_lib/utils/text_reflow.rs

rumdl_lib/utils/
text_reflow.rs