rumdl_lib/utils/
text_reflow.rs

1//! Text reflow utilities for MD013
2//!
3//! This module implements text wrapping/reflow functionality that preserves
4//! Markdown elements like links, emphasis, code spans, etc.
5
6use crate::utils::calculate_indentation_width_default;
7use crate::utils::is_definition_list_item;
8use crate::utils::mkdocs_attr_list::{ATTR_LIST_PATTERN, is_standalone_attr_list};
9use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
10use crate::utils::regex_cache::{
11    DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
12    HUGO_SHORTCODE_REGEX, INLINE_IMAGE_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, LINKED_IMAGE_INLINE_INLINE,
13    LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF, REF_IMAGE_REGEX, REF_LINK_REGEX,
14    SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
15};
16use crate::utils::sentence_utils::{
17    get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
18    text_ends_with_abbreviation,
19};
20use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
21use std::collections::HashSet;
22use unicode_width::UnicodeWidthStr;
23
24/// Length calculation mode for reflow
25#[derive(Clone, Copy, Debug, Default, PartialEq)]
26pub enum ReflowLengthMode {
27    /// Count Unicode characters (grapheme clusters)
28    Chars,
29    /// Count visual display width (CJK = 2 columns, emoji = 2, etc.)
30    #[default]
31    Visual,
32    /// Count raw bytes
33    Bytes,
34}
35
36/// Calculate the display length of a string based on the length mode
37fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
38    match mode {
39        ReflowLengthMode::Chars => s.chars().count(),
40        ReflowLengthMode::Visual => s.width(),
41        ReflowLengthMode::Bytes => s.len(),
42    }
43}
44
45/// Options for reflowing text
46#[derive(Clone)]
47pub struct ReflowOptions {
48    /// Target line length
49    pub line_length: usize,
50    /// Whether to break on sentence boundaries when possible
51    pub break_on_sentences: bool,
52    /// Whether to preserve existing line breaks in paragraphs
53    pub preserve_breaks: bool,
54    /// Whether to enforce one sentence per line
55    pub sentence_per_line: bool,
56    /// Whether to use semantic line breaks (cascading split strategy)
57    pub semantic_line_breaks: bool,
58    /// Custom abbreviations for sentence detection
59    /// Periods are optional - both "Dr" and "Dr." work the same
60    /// Custom abbreviations are always added to the built-in defaults
61    pub abbreviations: Option<Vec<String>>,
62    /// How to measure string length for line-length comparisons
63    pub length_mode: ReflowLengthMode,
64    /// Whether to treat {#id .class key="value"} as atomic (unsplittable) elements.
65    /// Enabled for MkDocs and Kramdown flavors.
66    pub attr_lists: bool,
67    /// Whether to require uppercase after periods for sentence detection.
68    /// When true (default), only "word. Capital" is a sentence boundary.
69    /// When false, "word. lowercase" is also treated as a sentence boundary.
70    /// Does not affect ! and ? which are always treated as sentence boundaries.
71    pub require_sentence_capital: bool,
72    /// Cap list continuation indent to this value when set.
73    /// Used by mkdocs flavor where continuation is always 4 spaces
74    /// regardless of checkbox markers.
75    pub max_list_continuation_indent: Option<usize>,
76}
77
78impl Default for ReflowOptions {
79    fn default() -> Self {
80        Self {
81            line_length: 80,
82            break_on_sentences: true,
83            preserve_breaks: false,
84            sentence_per_line: false,
85            semantic_line_breaks: false,
86            abbreviations: None,
87            length_mode: ReflowLengthMode::default(),
88            attr_lists: false,
89            require_sentence_capital: true,
90            max_list_continuation_indent: None,
91        }
92    }
93}
94
95/// Build a boolean mask indicating which character positions are inside inline code spans.
96/// Handles single, double, and triple backtick delimiters.
97fn compute_inline_code_mask(text: &str) -> Vec<bool> {
98    let chars: Vec<char> = text.chars().collect();
99    let len = chars.len();
100    let mut mask = vec![false; len];
101    let mut i = 0;
102
103    while i < len {
104        if chars[i] == '`' {
105            // Count opening backticks
106            let open_start = i;
107            let mut backtick_count = 0;
108            while i < len && chars[i] == '`' {
109                backtick_count += 1;
110                i += 1;
111            }
112
113            // Find matching closing backticks (same count)
114            let mut found_close = false;
115            let content_start = i;
116            while i < len {
117                if chars[i] == '`' {
118                    let close_start = i;
119                    let mut close_count = 0;
120                    while i < len && chars[i] == '`' {
121                        close_count += 1;
122                        i += 1;
123                    }
124                    if close_count == backtick_count {
125                        // Mark the content between the delimiters (not the backticks themselves)
126                        for item in mask.iter_mut().take(close_start).skip(content_start) {
127                            *item = true;
128                        }
129                        // Also mark the opening and closing backticks
130                        for item in mask.iter_mut().take(content_start).skip(open_start) {
131                            *item = true;
132                        }
133                        for item in mask.iter_mut().take(i).skip(close_start) {
134                            *item = true;
135                        }
136                        found_close = true;
137                        break;
138                    }
139                } else {
140                    i += 1;
141                }
142            }
143
144            if !found_close {
145                // No matching close — backticks are literal, not code span
146                i = open_start + backtick_count;
147            }
148        } else {
149            i += 1;
150        }
151    }
152
153    mask
154}
155
156/// Detect if a character position is a sentence boundary
157/// Based on the approach from github.com/JoshuaKGoldberg/sentences-per-line
158/// Supports both ASCII punctuation (. ! ?) and CJK punctuation (。 ！ ？)
159fn is_sentence_boundary(
160    text: &str,
161    pos: usize,
162    abbreviations: &HashSet<String>,
163    require_sentence_capital: bool,
164) -> bool {
165    let chars: Vec<char> = text.chars().collect();
166
167    if pos + 1 >= chars.len() {
168        return false;
169    }
170
171    let c = chars[pos];
172    let next_char = chars[pos + 1];
173
174    // Check for CJK sentence-ending punctuation (。, ！, ？)
175    // CJK punctuation doesn't require space or uppercase after it
176    if is_cjk_sentence_ending(c) {
177        // Skip any trailing emphasis/strikethrough markers
178        let mut after_punct_pos = pos + 1;
179        while after_punct_pos < chars.len()
180            && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
181        {
182            after_punct_pos += 1;
183        }
184
185        // Skip whitespace
186        while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
187            after_punct_pos += 1;
188        }
189
190        // Check if we have more content (any non-whitespace)
191        if after_punct_pos >= chars.len() {
192            return false;
193        }
194
195        // Skip leading emphasis/strikethrough markers
196        while after_punct_pos < chars.len()
197            && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
198        {
199            after_punct_pos += 1;
200        }
201
202        if after_punct_pos >= chars.len() {
203            return false;
204        }
205
206        // For CJK, we accept any character as the start of the next sentence
207        // (no uppercase requirement, since CJK doesn't have case)
208        return true;
209    }
210
211    // Check for ASCII sentence-ending punctuation
212    if c != '.' && c != '!' && c != '?' {
213        return false;
214    }
215
216    // Must be followed by space, closing quote, or emphasis/strikethrough marker followed by space
217    let (_space_pos, after_space_pos) = if next_char == ' ' {
218        // Normal case: punctuation followed by space
219        (pos + 1, pos + 2)
220    } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
221        // Sentence ends with quote - check what follows the quote
222        if chars[pos + 2] == ' ' {
223            // Just quote followed by space: 'sentence." '
224            (pos + 2, pos + 3)
225        } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
226            // Quote followed by emphasis: 'sentence."* '
227            (pos + 3, pos + 4)
228        } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
229            && pos + 4 < chars.len()
230            && chars[pos + 3] == chars[pos + 2]
231            && chars[pos + 4] == ' '
232        {
233            // Quote followed by bold: 'sentence."** '
234            (pos + 4, pos + 5)
235        } else {
236            return false;
237        }
238    } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
239        // Sentence ends with emphasis: "sentence.* " or "sentence._ "
240        (pos + 2, pos + 3)
241    } else if (next_char == '*' || next_char == '_')
242        && pos + 3 < chars.len()
243        && chars[pos + 2] == next_char
244        && chars[pos + 3] == ' '
245    {
246        // Sentence ends with bold: "sentence.** " or "sentence.__ "
247        (pos + 3, pos + 4)
248    } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
249        // Sentence ends with strikethrough: "sentence.~~ "
250        (pos + 3, pos + 4)
251    } else {
252        return false;
253    };
254
255    // Skip all whitespace after the space to find the start of the next sentence
256    let mut next_char_pos = after_space_pos;
257    while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
258        next_char_pos += 1;
259    }
260
261    // Check if we reached the end of the string
262    if next_char_pos >= chars.len() {
263        return false;
264    }
265
266    // Skip leading emphasis/strikethrough markers and opening quotes to find the actual first letter
267    let mut first_letter_pos = next_char_pos;
268    while first_letter_pos < chars.len()
269        && (chars[first_letter_pos] == '*'
270            || chars[first_letter_pos] == '_'
271            || chars[first_letter_pos] == '~'
272            || is_opening_quote(chars[first_letter_pos]))
273    {
274        first_letter_pos += 1;
275    }
276
277    // Check if we reached the end after skipping emphasis
278    if first_letter_pos >= chars.len() {
279        return false;
280    }
281
282    let first_char = chars[first_letter_pos];
283
284    // For ! and ?, sentence boundaries are unambiguous — no uppercase requirement
285    if c == '!' || c == '?' {
286        return true;
287    }
288
289    // Period-specific checks: periods are ambiguous (abbreviations, decimals, initials)
290    // so we apply additional guards before accepting a sentence boundary.
291
292    if pos > 0 {
293        // Check for common abbreviations
294        let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
295        if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
296            return false;
297        }
298
299        // Check for decimal numbers (e.g., "3.14 is pi")
300        if chars[pos - 1].is_numeric() && first_char.is_ascii_digit() {
301            return false;
302        }
303
304        // Check for single-letter initials (e.g., "J. K. Rowling")
305        // A single uppercase letter before the period preceded by whitespace or start
306        // is likely an initial, not a sentence ending.
307        if chars[pos - 1].is_ascii_uppercase() && (pos == 1 || (pos >= 2 && chars[pos - 2].is_whitespace())) {
308            return false;
309        }
310    }
311
312    // In strict mode, require uppercase or CJK to start the next sentence after a period.
313    // In relaxed mode, accept any alphanumeric character.
314    if require_sentence_capital && !first_char.is_uppercase() && !is_cjk_char(first_char) {
315        return false;
316    }
317
318    true
319}
320
321/// Split text into sentences
322pub fn split_into_sentences(text: &str) -> Vec<String> {
323    split_into_sentences_custom(text, &None)
324}
325
326/// Split text into sentences with custom abbreviations
327pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
328    let abbreviations = get_abbreviations(custom_abbreviations);
329    split_into_sentences_with_set(text, &abbreviations, true)
330}
331
332/// Internal function to split text into sentences with a pre-computed abbreviations set
333/// Use this when calling multiple times in a loop to avoid repeatedly computing the set
334fn split_into_sentences_with_set(
335    text: &str,
336    abbreviations: &HashSet<String>,
337    require_sentence_capital: bool,
338) -> Vec<String> {
339    // Pre-compute which character positions are inside inline code spans
340    let in_code = compute_inline_code_mask(text);
341
342    let mut sentences = Vec::new();
343    let mut current_sentence = String::new();
344    let mut chars = text.chars().peekable();
345    let mut pos = 0;
346
347    while let Some(c) = chars.next() {
348        current_sentence.push(c);
349
350        if !in_code[pos] && is_sentence_boundary(text, pos, abbreviations, require_sentence_capital) {
351            // Consume any trailing emphasis/strikethrough markers and quotes (they belong to the current sentence)
352            while let Some(&next) = chars.peek() {
353                if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
354                    current_sentence.push(chars.next().unwrap());
355                    pos += 1;
356                } else {
357                    break;
358                }
359            }
360
361            // Consume the space after the sentence
362            if chars.peek() == Some(&' ') {
363                chars.next();
364                pos += 1;
365            }
366
367            sentences.push(current_sentence.trim().to_string());
368            current_sentence.clear();
369        }
370
371        pos += 1;
372    }
373
374    // Add any remaining text as the last sentence
375    if !current_sentence.trim().is_empty() {
376        sentences.push(current_sentence.trim().to_string());
377    }
378    sentences
379}
380
381/// Check if a line is a horizontal rule (---, ___, ***)
382fn is_horizontal_rule(line: &str) -> bool {
383    if line.len() < 3 {
384        return false;
385    }
386
387    // Check if line consists only of -, _, or * characters (at least 3)
388    let chars: Vec<char> = line.chars().collect();
389    if chars.is_empty() {
390        return false;
391    }
392
393    let first_char = chars[0];
394    if first_char != '-' && first_char != '_' && first_char != '*' {
395        return false;
396    }
397
398    // All characters should be the same (allowing spaces between)
399    for c in &chars {
400        if *c != first_char && *c != ' ' {
401            return false;
402        }
403    }
404
405    // Count non-space characters
406    let non_space_count = chars.iter().filter(|c| **c != ' ').count();
407    non_space_count >= 3
408}
409
410/// Check if a line is a numbered list item (e.g., "1. ", "10. ")
411fn is_numbered_list_item(line: &str) -> bool {
412    let mut chars = line.chars();
413
414    // Must start with a digit
415    if !chars.next().is_some_and(char::is_numeric) {
416        return false;
417    }
418
419    // Can have more digits
420    while let Some(c) = chars.next() {
421        if c == '.' {
422            // After period, must have a space (consistent with list marker extraction)
423            // "2019." alone is NOT treated as a list item to avoid false positives
424            return chars.next() == Some(' ');
425        }
426        if !c.is_numeric() {
427            return false;
428        }
429    }
430
431    false
432}
433
434/// Check if a trimmed line is an unordered list item (-, *, + followed by space)
435fn is_unordered_list_marker(s: &str) -> bool {
436    matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
437        && !is_horizontal_rule(s)
438        && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
439}
440
441/// Shared structural checks for block boundary detection.
442/// Checks elements that only depend on the trimmed line content.
443fn is_block_boundary_core(trimmed: &str) -> bool {
444    trimmed.is_empty()
445        || trimmed.starts_with('#')
446        || trimmed.starts_with("```")
447        || trimmed.starts_with("~~~")
448        || trimmed.starts_with('>')
449        || (trimmed.starts_with('[') && trimmed.contains("]:"))
450        || is_horizontal_rule(trimmed)
451        || is_unordered_list_marker(trimmed)
452        || is_numbered_list_item(trimmed)
453        || is_definition_list_item(trimmed)
454        || trimmed.starts_with(":::")
455}
456
457/// Check if a trimmed line starts a new structural block element.
458/// Used for paragraph boundary detection in `reflow_markdown()`.
459fn is_block_boundary(trimmed: &str) -> bool {
460    is_block_boundary_core(trimmed) || trimmed.starts_with('|')
461}
462
463/// Check if a line starts a new structural block for paragraph boundary detection
464/// in `reflow_paragraph_at_line()`. Extends the core checks with indented code blocks
465/// (≥4 spaces) and table row detection via `is_potential_table_row`.
466fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
467    is_block_boundary_core(trimmed)
468        || calculate_indentation_width_default(line) >= 4
469        || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
470}
471
472/// Check if a line ends with a hard break (either two spaces or backslash)
473///
474/// CommonMark supports two formats for hard line breaks:
475/// 1. Two or more trailing spaces
476/// 2. A backslash at the end of the line
477fn has_hard_break(line: &str) -> bool {
478    let line = line.strip_suffix('\r').unwrap_or(line);
479    line.ends_with("  ") || line.ends_with('\\')
480}
481
482/// Check if text ends with sentence-terminating punctuation (. ! ?)
483fn ends_with_sentence_punct(text: &str) -> bool {
484    text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
485}
486
487/// Trim trailing whitespace while preserving hard breaks (two trailing spaces or backslash)
488///
489/// Hard breaks in Markdown can be indicated by:
490/// 1. Two trailing spaces before a newline (traditional)
491/// 2. A backslash at the end of the line (mdformat style)
492fn trim_preserving_hard_break(s: &str) -> String {
493    // Strip trailing \r from CRLF line endings first to handle Windows files
494    let s = s.strip_suffix('\r').unwrap_or(s);
495
496    // Check for backslash hard break (mdformat style)
497    if s.ends_with('\\') {
498        // Preserve the backslash exactly as-is
499        return s.to_string();
500    }
501
502    // Check if there are at least 2 trailing spaces (traditional hard break)
503    if s.ends_with("  ") {
504        // Find the position where non-space content ends
505        let content_end = s.trim_end().len();
506        if content_end == 0 {
507            // String is all whitespace
508            return String::new();
509        }
510        // Preserve exactly 2 trailing spaces for hard break
511        format!("{}  ", &s[..content_end])
512    } else {
513        // No hard break, just trim all trailing whitespace
514        s.trim_end().to_string()
515    }
516}
517
518/// Parse markdown elements using the appropriate parser based on options.
519fn parse_elements(text: &str, options: &ReflowOptions) -> Vec<Element> {
520    if options.attr_lists {
521        parse_markdown_elements_with_attr_lists(text)
522    } else {
523        parse_markdown_elements(text)
524    }
525}
526
527pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
528    // For sentence-per-line mode, always process regardless of length
529    if options.sentence_per_line {
530        let elements = parse_elements(line, options);
531        return reflow_elements_sentence_per_line(&elements, &options.abbreviations, options.require_sentence_capital);
532    }
533
534    // For semantic line breaks mode, use cascading split strategy
535    if options.semantic_line_breaks {
536        let elements = parse_elements(line, options);
537        return reflow_elements_semantic(&elements, options);
538    }
539
540    // Quick check: if line is already short enough or no wrapping requested, return as-is
541    // line_length = 0 means no wrapping (unlimited line length)
542    if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
543        return vec![line.to_string()];
544    }
545
546    // Parse the markdown to identify elements
547    let elements = parse_elements(line, options);
548
549    // Reflow the elements into lines
550    reflow_elements(&elements, options)
551}
552
553/// Image source in a linked image structure
554#[derive(Debug, Clone)]
555enum LinkedImageSource {
556    /// Inline image URL: ![alt](url)
557    Inline(String),
558    /// Reference image: ![alt][ref]
559    Reference(String),
560}
561
562/// Link target in a linked image structure
563#[derive(Debug, Clone)]
564enum LinkedImageTarget {
565    /// Inline link URL: ](url)
566    Inline(String),
567    /// Reference link: ][ref]
568    Reference(String),
569}
570
571/// Represents a piece of content in the markdown
572#[derive(Debug, Clone)]
573enum Element {
574    /// Plain text that can be wrapped
575    Text(String),
576    /// A complete markdown inline link [text](url)
577    Link { text: String, url: String },
578    /// A complete markdown reference link [text][ref]
579    ReferenceLink { text: String, reference: String },
580    /// A complete markdown empty reference link [text][]
581    EmptyReferenceLink { text: String },
582    /// A complete markdown shortcut reference link [ref]
583    ShortcutReference { reference: String },
584    /// A complete markdown inline image ![alt](url)
585    InlineImage { alt: String, url: String },
586    /// A complete markdown reference image ![alt][ref]
587    ReferenceImage { alt: String, reference: String },
588    /// A complete markdown empty reference image ![alt][]
589    EmptyReferenceImage { alt: String },
590    /// A clickable image badge in any of 4 forms:
591    /// - [![alt](img-url)](link-url)
592    /// - [![alt][img-ref]](link-url)
593    /// - [![alt](img-url)][link-ref]
594    /// - [![alt][img-ref]][link-ref]
595    LinkedImage {
596        alt: String,
597        img_source: LinkedImageSource,
598        link_target: LinkedImageTarget,
599    },
600    /// Footnote reference [^note]
601    FootnoteReference { note: String },
602    /// Strikethrough text ~~text~~
603    Strikethrough(String),
604    /// Wiki-style link [[wiki]] or [[wiki|text]]
605    WikiLink(String),
606    /// Inline math $math$
607    InlineMath(String),
608    /// Display math $$math$$
609    DisplayMath(String),
610    /// Emoji shortcode :emoji:
611    EmojiShortcode(String),
612    /// Autolink <https://...> or <mailto:...> or <user@domain.com>
613    Autolink(String),
614    /// HTML tag <tag> or </tag> or <tag/>
615    HtmlTag(String),
616    /// HTML entity &nbsp; or &#123;
617    HtmlEntity(String),
618    /// Hugo/Go template shortcode {{< ... >}} or {{% ... %}}
619    HugoShortcode(String),
620    /// MkDocs/kramdown attribute list {#id .class key="value"}
621    AttrList(String),
622    /// Inline code `code`
623    Code(String),
624    /// Bold text **text** or __text__
625    Bold {
626        content: String,
627        /// True if underscore markers (__), false for asterisks (**)
628        underscore: bool,
629    },
630    /// Italic text *text* or _text_
631    Italic {
632        content: String,
633        /// True if underscore marker (_), false for asterisk (*)
634        underscore: bool,
635    },
636}
637
638impl std::fmt::Display for Element {
639    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
640        match self {
641            Element::Text(s) => write!(f, "{s}"),
642            Element::Link { text, url } => write!(f, "[{text}]({url})"),
643            Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
644            Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
645            Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
646            Element::InlineImage { alt, url } => write!(f, "![{alt}]({url})"),
647            Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
648            Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
649            Element::LinkedImage {
650                alt,
651                img_source,
652                link_target,
653            } => {
654                // Build the image part: ![alt](url) or ![alt][ref]
655                let img_part = match img_source {
656                    LinkedImageSource::Inline(url) => format!("![{alt}]({url})"),
657                    LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
658                };
659                // Build the link part: (url) or [ref]
660                match link_target {
661                    LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
662                    LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
663                }
664            }
665            Element::FootnoteReference { note } => write!(f, "[^{note}]"),
666            Element::Strikethrough(s) => write!(f, "~~{s}~~"),
667            Element::WikiLink(s) => write!(f, "[[{s}]]"),
668            Element::InlineMath(s) => write!(f, "${s}$"),
669            Element::DisplayMath(s) => write!(f, "$${s}$$"),
670            Element::EmojiShortcode(s) => write!(f, ":{s}:"),
671            Element::Autolink(s) => write!(f, "{s}"),
672            Element::HtmlTag(s) => write!(f, "{s}"),
673            Element::HtmlEntity(s) => write!(f, "{s}"),
674            Element::HugoShortcode(s) => write!(f, "{s}"),
675            Element::AttrList(s) => write!(f, "{s}"),
676            Element::Code(s) => write!(f, "`{s}`"),
677            Element::Bold { content, underscore } => {
678                if *underscore {
679                    write!(f, "__{content}__")
680                } else {
681                    write!(f, "**{content}**")
682                }
683            }
684            Element::Italic { content, underscore } => {
685                if *underscore {
686                    write!(f, "_{content}_")
687                } else {
688                    write!(f, "*{content}*")
689                }
690            }
691        }
692    }
693}
694
695impl Element {
696    /// Calculate the display width of this element using the given length mode.
697    /// This formats the element and computes its width, correctly handling
698    /// visual width for CJK characters and other wide glyphs.
699    fn display_width(&self, mode: ReflowLengthMode) -> usize {
700        let formatted = format!("{self}");
701        display_len(&formatted, mode)
702    }
703}
704
705/// An emphasis or formatting span parsed by pulldown-cmark
706#[derive(Debug, Clone)]
707struct EmphasisSpan {
708    /// Byte offset where the emphasis starts (including markers)
709    start: usize,
710    /// Byte offset where the emphasis ends (after closing markers)
711    end: usize,
712    /// The content inside the emphasis markers
713    content: String,
714    /// Whether this is strong (bold) emphasis
715    is_strong: bool,
716    /// Whether this is strikethrough (~~text~~)
717    is_strikethrough: bool,
718    /// Whether the original used underscore markers (for emphasis only)
719    uses_underscore: bool,
720}
721
722/// Extract emphasis and strikethrough spans from text using pulldown-cmark
723///
724/// This provides CommonMark-compliant emphasis parsing, correctly handling:
725/// - Nested emphasis like `*text **bold** more*`
726/// - Left/right flanking delimiter rules
727/// - Underscore vs asterisk markers
728/// - GFM strikethrough (~~text~~)
729///
730/// Returns spans sorted by start position.
731fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
732    let mut spans = Vec::new();
733    let mut options = Options::empty();
734    options.insert(Options::ENABLE_STRIKETHROUGH);
735
736    // Stacks to track nested formatting with their start positions
737    let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); // (start_byte, uses_underscore)
738    let mut strong_stack: Vec<(usize, bool)> = Vec::new();
739    let mut strikethrough_stack: Vec<usize> = Vec::new();
740
741    let parser = Parser::new_ext(text, options).into_offset_iter();
742
743    for (event, range) in parser {
744        match event {
745            Event::Start(Tag::Emphasis) => {
746                // Check if this uses underscore by looking at the original text
747                let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
748                emphasis_stack.push((range.start, uses_underscore));
749            }
750            Event::End(TagEnd::Emphasis) => {
751                if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
752                    // Extract content between the markers (1 char marker on each side)
753                    let content_start = start_byte + 1;
754                    let content_end = range.end - 1;
755                    if content_end > content_start
756                        && let Some(content) = text.get(content_start..content_end)
757                    {
758                        spans.push(EmphasisSpan {
759                            start: start_byte,
760                            end: range.end,
761                            content: content.to_string(),
762                            is_strong: false,
763                            is_strikethrough: false,
764                            uses_underscore,
765                        });
766                    }
767                }
768            }
769            Event::Start(Tag::Strong) => {
770                // Check if this uses underscore by looking at the original text
771                let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
772                strong_stack.push((range.start, uses_underscore));
773            }
774            Event::End(TagEnd::Strong) => {
775                if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
776                    // Extract content between the markers (2 char marker on each side)
777                    let content_start = start_byte + 2;
778                    let content_end = range.end - 2;
779                    if content_end > content_start
780                        && let Some(content) = text.get(content_start..content_end)
781                    {
782                        spans.push(EmphasisSpan {
783                            start: start_byte,
784                            end: range.end,
785                            content: content.to_string(),
786                            is_strong: true,
787                            is_strikethrough: false,
788                            uses_underscore,
789                        });
790                    }
791                }
792            }
793            Event::Start(Tag::Strikethrough) => {
794                strikethrough_stack.push(range.start);
795            }
796            Event::End(TagEnd::Strikethrough) => {
797                if let Some(start_byte) = strikethrough_stack.pop() {
798                    // Extract content between the ~~ markers (2 char marker on each side)
799                    let content_start = start_byte + 2;
800                    let content_end = range.end - 2;
801                    if content_end > content_start
802                        && let Some(content) = text.get(content_start..content_end)
803                    {
804                        spans.push(EmphasisSpan {
805                            start: start_byte,
806                            end: range.end,
807                            content: content.to_string(),
808                            is_strong: false,
809                            is_strikethrough: true,
810                            uses_underscore: false,
811                        });
812                    }
813                }
814            }
815            _ => {}
816        }
817    }
818
819    // Sort by start position
820    spans.sort_by_key(|s| s.start);
821    spans
822}
823
824/// Parse markdown elements from text preserving the raw syntax
825///
826/// Detection order is critical:
827/// 1. Linked images [![alt](img)](link) - must be detected first as atomic units
828/// 2. Inline images ![alt](url) - before links to handle ! prefix
829/// 3. Reference images ![alt][ref] - before reference links
830/// 4. Inline links [text](url) - before reference links
831/// 5. Reference links [text][ref] - before shortcut references
832/// 6. Shortcut reference links [ref] - detected last to avoid false positives
833/// 7. Other elements (code, bold, italic, etc.) - processed normally
834fn parse_markdown_elements(text: &str) -> Vec<Element> {
835    parse_markdown_elements_inner(text, false)
836}
837
838fn parse_markdown_elements_with_attr_lists(text: &str) -> Vec<Element> {
839    parse_markdown_elements_inner(text, true)
840}
841
842fn parse_markdown_elements_inner(text: &str, attr_lists: bool) -> Vec<Element> {
843    let mut elements = Vec::new();
844    let mut remaining = text;
845
846    // Pre-extract emphasis spans using pulldown-cmark for CommonMark-compliant parsing
847    let emphasis_spans = extract_emphasis_spans(text);
848
849    while !remaining.is_empty() {
850        // Calculate current byte offset in original text
851        let current_offset = text.len() - remaining.len();
852        // Find the earliest occurrence of any markdown pattern
853        // Store (start, end, pattern_name) to unify standard Regex and FancyRegex match results
854        let mut earliest_match: Option<(usize, usize, &str)> = None;
855
856        // Check for linked images FIRST (all 4 variants)
857        // Quick literal check: only run expensive regexes if we might have a linked image
858        // Pattern starts with "[!" so check for that first
859        if remaining.contains("[!") {
860            // Pattern 1: [![alt](img)](link) - inline image in inline link
861            if let Some(m) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
862                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
863            {
864                earliest_match = Some((m.start(), m.end(), "linked_image_ii"));
865            }
866
867            // Pattern 2: [![alt][ref]](link) - reference image in inline link
868            if let Some(m) = LINKED_IMAGE_REF_INLINE.find(remaining)
869                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
870            {
871                earliest_match = Some((m.start(), m.end(), "linked_image_ri"));
872            }
873
874            // Pattern 3: [![alt](img)][ref] - inline image in reference link
875            if let Some(m) = LINKED_IMAGE_INLINE_REF.find(remaining)
876                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
877            {
878                earliest_match = Some((m.start(), m.end(), "linked_image_ir"));
879            }
880
881            // Pattern 4: [![alt][ref]][ref] - reference image in reference link
882            if let Some(m) = LINKED_IMAGE_REF_REF.find(remaining)
883                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
884            {
885                earliest_match = Some((m.start(), m.end(), "linked_image_rr"));
886            }
887        }
888
889        // Check for images (they start with ! so should be detected before links)
890        // Inline images - ![alt](url)
891        if let Some(m) = INLINE_IMAGE_REGEX.find(remaining)
892            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
893        {
894            earliest_match = Some((m.start(), m.end(), "inline_image"));
895        }
896
897        // Reference images - ![alt][ref]
898        if let Some(m) = REF_IMAGE_REGEX.find(remaining)
899            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
900        {
901            earliest_match = Some((m.start(), m.end(), "ref_image"));
902        }
903
904        // Check for footnote references - [^note]
905        if let Some(m) = FOOTNOTE_REF_REGEX.find(remaining)
906            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
907        {
908            earliest_match = Some((m.start(), m.end(), "footnote_ref"));
909        }
910
911        // Check for inline links - [text](url)
912        if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
913            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
914        {
915            earliest_match = Some((m.start(), m.end(), "inline_link"));
916        }
917
918        // Check for reference links - [text][ref]
919        if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
920            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
921        {
922            earliest_match = Some((m.start(), m.end(), "ref_link"));
923        }
924
925        // Check for shortcut reference links - [ref]
926        // Only check if we haven't found an earlier pattern that would conflict
927        if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
928            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
929        {
930            earliest_match = Some((m.start(), m.end(), "shortcut_ref"));
931        }
932
933        // Check for wiki-style links - [[wiki]]
934        if let Some(m) = WIKI_LINK_REGEX.find(remaining)
935            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
936        {
937            earliest_match = Some((m.start(), m.end(), "wiki_link"));
938        }
939
940        // Check for display math first (before inline) - $$math$$
941        if let Some(m) = DISPLAY_MATH_REGEX.find(remaining)
942            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
943        {
944            earliest_match = Some((m.start(), m.end(), "display_math"));
945        }
946
947        // Check for inline math - $math$
948        if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
949            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
950        {
951            earliest_match = Some((m.start(), m.end(), "inline_math"));
952        }
953
954        // Note: Strikethrough is now handled by pulldown-cmark in extract_emphasis_spans
955
956        // Check for emoji shortcodes - :emoji:
957        if let Some(m) = EMOJI_SHORTCODE_REGEX.find(remaining)
958            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
959        {
960            earliest_match = Some((m.start(), m.end(), "emoji"));
961        }
962
963        // Check for HTML entities - &nbsp; etc
964        if let Some(m) = HTML_ENTITY_REGEX.find(remaining)
965            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
966        {
967            earliest_match = Some((m.start(), m.end(), "html_entity"));
968        }
969
970        // Check for Hugo shortcodes - {{< ... >}} or {{% ... %}}
971        // Must be checked before other patterns to avoid false sentence breaks
972        if let Some(m) = HUGO_SHORTCODE_REGEX.find(remaining)
973            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
974        {
975            earliest_match = Some((m.start(), m.end(), "hugo_shortcode"));
976        }
977
978        // Check for HTML tags - <tag> </tag> <tag/>
979        // But exclude autolinks like <https://...> or <mailto:...> or email autolinks <user@domain.com>
980        if let Some(m) = HTML_TAG_PATTERN.find(remaining)
981            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
982        {
983            // Check if this is an autolink (starts with protocol or mailto:)
984            let matched_text = &remaining[m.start()..m.end()];
985            let is_url_autolink = matched_text.starts_with("<http://")
986                || matched_text.starts_with("<https://")
987                || matched_text.starts_with("<mailto:")
988                || matched_text.starts_with("<ftp://")
989                || matched_text.starts_with("<ftps://");
990
991            // Check if this is an email autolink (per CommonMark spec: <local@domain.tld>)
992            // Use centralized EMAIL_PATTERN for consistency with MD034 and other rules
993            let is_email_autolink = {
994                let content = matched_text.trim_start_matches('<').trim_end_matches('>');
995                EMAIL_PATTERN.is_match(content)
996            };
997
998            if is_url_autolink || is_email_autolink {
999                earliest_match = Some((m.start(), m.end(), "autolink"));
1000            } else {
1001                earliest_match = Some((m.start(), m.end(), "html_tag"));
1002            }
1003        }
1004
1005        // Find earliest non-link special characters
1006        let mut next_special = remaining.len();
1007        let mut special_type = "";
1008        let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
1009        let mut attr_list_len: usize = 0;
1010
1011        // Check for code spans (not handled by pulldown-cmark in this context)
1012        if let Some(pos) = remaining.find('`')
1013            && pos < next_special
1014        {
1015            next_special = pos;
1016            special_type = "code";
1017        }
1018
1019        // Check for MkDocs/kramdown attr lists - {#id .class key="value"}
1020        if attr_lists
1021            && let Some(pos) = remaining.find('{')
1022            && pos < next_special
1023            && let Some(m) = ATTR_LIST_PATTERN.find(&remaining[pos..])
1024            && m.start() == 0
1025        {
1026            next_special = pos;
1027            special_type = "attr_list";
1028            attr_list_len = m.end();
1029        }
1030
1031        // Check for emphasis using pulldown-cmark's pre-extracted spans
1032        // Find the earliest emphasis span that starts within remaining text
1033        for span in &emphasis_spans {
1034            if span.start >= current_offset && span.start < current_offset + remaining.len() {
1035                let pos_in_remaining = span.start - current_offset;
1036                if pos_in_remaining < next_special {
1037                    next_special = pos_in_remaining;
1038                    special_type = "pulldown_emphasis";
1039                    pulldown_emphasis = Some(span);
1040                }
1041                break; // Spans are sorted by start position, so first match is earliest
1042            }
1043        }
1044
1045        // Determine which pattern to process first
1046        let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
1047            pos < next_special
1048        } else {
1049            false
1050        };
1051
1052        if should_process_markdown_link {
1053            let (pos, match_end, pattern_type) = earliest_match.unwrap();
1054
1055            // Add any text before the match
1056            if pos > 0 {
1057                elements.push(Element::Text(remaining[..pos].to_string()));
1058            }
1059
1060            // Process the matched pattern
1061            match pattern_type {
1062                // Pattern 1: [![alt](img)](link) - inline image in inline link
1063                "linked_image_ii" => {
1064                    if let Some(caps) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
1065                        let alt = caps.get(1).map_or("", |m| m.as_str());
1066                        let img_url = caps.get(2).map_or("", |m| m.as_str());
1067                        let link_url = caps.get(3).map_or("", |m| m.as_str());
1068                        elements.push(Element::LinkedImage {
1069                            alt: alt.to_string(),
1070                            img_source: LinkedImageSource::Inline(img_url.to_string()),
1071                            link_target: LinkedImageTarget::Inline(link_url.to_string()),
1072                        });
1073                        remaining = &remaining[match_end..];
1074                    } else {
1075                        elements.push(Element::Text("[".to_string()));
1076                        remaining = &remaining[1..];
1077                    }
1078                }
1079                // Pattern 2: [![alt][ref]](link) - reference image in inline link
1080                "linked_image_ri" => {
1081                    if let Some(caps) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
1082                        let alt = caps.get(1).map_or("", |m| m.as_str());
1083                        let img_ref = caps.get(2).map_or("", |m| m.as_str());
1084                        let link_url = caps.get(3).map_or("", |m| m.as_str());
1085                        elements.push(Element::LinkedImage {
1086                            alt: alt.to_string(),
1087                            img_source: LinkedImageSource::Reference(img_ref.to_string()),
1088                            link_target: LinkedImageTarget::Inline(link_url.to_string()),
1089                        });
1090                        remaining = &remaining[match_end..];
1091                    } else {
1092                        elements.push(Element::Text("[".to_string()));
1093                        remaining = &remaining[1..];
1094                    }
1095                }
1096                // Pattern 3: [![alt](img)][ref] - inline image in reference link
1097                "linked_image_ir" => {
1098                    if let Some(caps) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
1099                        let alt = caps.get(1).map_or("", |m| m.as_str());
1100                        let img_url = caps.get(2).map_or("", |m| m.as_str());
1101                        let link_ref = caps.get(3).map_or("", |m| m.as_str());
1102                        elements.push(Element::LinkedImage {
1103                            alt: alt.to_string(),
1104                            img_source: LinkedImageSource::Inline(img_url.to_string()),
1105                            link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1106                        });
1107                        remaining = &remaining[match_end..];
1108                    } else {
1109                        elements.push(Element::Text("[".to_string()));
1110                        remaining = &remaining[1..];
1111                    }
1112                }
1113                // Pattern 4: [![alt][ref]][ref] - reference image in reference link
1114                "linked_image_rr" => {
1115                    if let Some(caps) = LINKED_IMAGE_REF_REF.captures(remaining) {
1116                        let alt = caps.get(1).map_or("", |m| m.as_str());
1117                        let img_ref = caps.get(2).map_or("", |m| m.as_str());
1118                        let link_ref = caps.get(3).map_or("", |m| m.as_str());
1119                        elements.push(Element::LinkedImage {
1120                            alt: alt.to_string(),
1121                            img_source: LinkedImageSource::Reference(img_ref.to_string()),
1122                            link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1123                        });
1124                        remaining = &remaining[match_end..];
1125                    } else {
1126                        elements.push(Element::Text("[".to_string()));
1127                        remaining = &remaining[1..];
1128                    }
1129                }
1130                "inline_image" => {
1131                    if let Some(caps) = INLINE_IMAGE_REGEX.captures(remaining) {
1132                        let alt = caps.get(1).map_or("", |m| m.as_str());
1133                        let url = caps.get(2).map_or("", |m| m.as_str());
1134                        elements.push(Element::InlineImage {
1135                            alt: alt.to_string(),
1136                            url: url.to_string(),
1137                        });
1138                        remaining = &remaining[match_end..];
1139                    } else {
1140                        elements.push(Element::Text("!".to_string()));
1141                        remaining = &remaining[1..];
1142                    }
1143                }
1144                "ref_image" => {
1145                    if let Some(caps) = REF_IMAGE_REGEX.captures(remaining) {
1146                        let alt = caps.get(1).map_or("", |m| m.as_str());
1147                        let reference = caps.get(2).map_or("", |m| m.as_str());
1148
1149                        if reference.is_empty() {
1150                            elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1151                        } else {
1152                            elements.push(Element::ReferenceImage {
1153                                alt: alt.to_string(),
1154                                reference: reference.to_string(),
1155                            });
1156                        }
1157                        remaining = &remaining[match_end..];
1158                    } else {
1159                        elements.push(Element::Text("!".to_string()));
1160                        remaining = &remaining[1..];
1161                    }
1162                }
1163                "footnote_ref" => {
1164                    if let Some(caps) = FOOTNOTE_REF_REGEX.captures(remaining) {
1165                        let note = caps.get(1).map_or("", |m| m.as_str());
1166                        elements.push(Element::FootnoteReference { note: note.to_string() });
1167                        remaining = &remaining[match_end..];
1168                    } else {
1169                        elements.push(Element::Text("[".to_string()));
1170                        remaining = &remaining[1..];
1171                    }
1172                }
1173                "inline_link" => {
1174                    if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1175                        let text = caps.get(1).map_or("", |m| m.as_str());
1176                        let url = caps.get(2).map_or("", |m| m.as_str());
1177                        elements.push(Element::Link {
1178                            text: text.to_string(),
1179                            url: url.to_string(),
1180                        });
1181                        remaining = &remaining[match_end..];
1182                    } else {
1183                        // Fallback - shouldn't happen
1184                        elements.push(Element::Text("[".to_string()));
1185                        remaining = &remaining[1..];
1186                    }
1187                }
1188                "ref_link" => {
1189                    if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1190                        let text = caps.get(1).map_or("", |m| m.as_str());
1191                        let reference = caps.get(2).map_or("", |m| m.as_str());
1192
1193                        if reference.is_empty() {
1194                            // Empty reference link [text][]
1195                            elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1196                        } else {
1197                            // Regular reference link [text][ref]
1198                            elements.push(Element::ReferenceLink {
1199                                text: text.to_string(),
1200                                reference: reference.to_string(),
1201                            });
1202                        }
1203                        remaining = &remaining[match_end..];
1204                    } else {
1205                        // Fallback - shouldn't happen
1206                        elements.push(Element::Text("[".to_string()));
1207                        remaining = &remaining[1..];
1208                    }
1209                }
1210                "shortcut_ref" => {
1211                    if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1212                        let reference = caps.get(1).map_or("", |m| m.as_str());
1213                        elements.push(Element::ShortcutReference {
1214                            reference: reference.to_string(),
1215                        });
1216                        remaining = &remaining[match_end..];
1217                    } else {
1218                        // Fallback - shouldn't happen
1219                        elements.push(Element::Text("[".to_string()));
1220                        remaining = &remaining[1..];
1221                    }
1222                }
1223                "wiki_link" => {
1224                    if let Some(caps) = WIKI_LINK_REGEX.captures(remaining) {
1225                        let content = caps.get(1).map_or("", |m| m.as_str());
1226                        elements.push(Element::WikiLink(content.to_string()));
1227                        remaining = &remaining[match_end..];
1228                    } else {
1229                        elements.push(Element::Text("[[".to_string()));
1230                        remaining = &remaining[2..];
1231                    }
1232                }
1233                "display_math" => {
1234                    if let Some(caps) = DISPLAY_MATH_REGEX.captures(remaining) {
1235                        let math = caps.get(1).map_or("", |m| m.as_str());
1236                        elements.push(Element::DisplayMath(math.to_string()));
1237                        remaining = &remaining[match_end..];
1238                    } else {
1239                        elements.push(Element::Text("$$".to_string()));
1240                        remaining = &remaining[2..];
1241                    }
1242                }
1243                "inline_math" => {
1244                    if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1245                        let math = caps.get(1).map_or("", |m| m.as_str());
1246                        elements.push(Element::InlineMath(math.to_string()));
1247                        remaining = &remaining[match_end..];
1248                    } else {
1249                        elements.push(Element::Text("$".to_string()));
1250                        remaining = &remaining[1..];
1251                    }
1252                }
1253                // Note: "strikethrough" case removed - now handled by pulldown-cmark
1254                "emoji" => {
1255                    if let Some(caps) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1256                        let emoji = caps.get(1).map_or("", |m| m.as_str());
1257                        elements.push(Element::EmojiShortcode(emoji.to_string()));
1258                        remaining = &remaining[match_end..];
1259                    } else {
1260                        elements.push(Element::Text(":".to_string()));
1261                        remaining = &remaining[1..];
1262                    }
1263                }
1264                "html_entity" => {
1265                    // HTML entities are captured whole
1266                    elements.push(Element::HtmlEntity(remaining[pos..match_end].to_string()));
1267                    remaining = &remaining[match_end..];
1268                }
1269                "hugo_shortcode" => {
1270                    // Hugo shortcodes are atomic elements - preserve them exactly
1271                    elements.push(Element::HugoShortcode(remaining[pos..match_end].to_string()));
1272                    remaining = &remaining[match_end..];
1273                }
1274                "autolink" => {
1275                    // Autolinks are atomic elements - preserve them exactly
1276                    elements.push(Element::Autolink(remaining[pos..match_end].to_string()));
1277                    remaining = &remaining[match_end..];
1278                }
1279                "html_tag" => {
1280                    // HTML tags are captured whole
1281                    elements.push(Element::HtmlTag(remaining[pos..match_end].to_string()));
1282                    remaining = &remaining[match_end..];
1283                }
1284                _ => {
1285                    // Unknown pattern, treat as text
1286                    elements.push(Element::Text("[".to_string()));
1287                    remaining = &remaining[1..];
1288                }
1289            }
1290        } else {
1291            // Process non-link special characters
1292
1293            // Add any text before the special character
1294            if next_special > 0 && next_special < remaining.len() {
1295                elements.push(Element::Text(remaining[..next_special].to_string()));
1296                remaining = &remaining[next_special..];
1297            }
1298
1299            // Process the special element
1300            match special_type {
1301                "code" => {
1302                    // Find end of code
1303                    if let Some(code_end) = remaining[1..].find('`') {
1304                        let code = &remaining[1..=code_end];
1305                        elements.push(Element::Code(code.to_string()));
1306                        remaining = &remaining[1 + code_end + 1..];
1307                    } else {
1308                        // No closing backtick, treat as text
1309                        elements.push(Element::Text(remaining.to_string()));
1310                        break;
1311                    }
1312                }
1313                "attr_list" => {
1314                    elements.push(Element::AttrList(remaining[..attr_list_len].to_string()));
1315                    remaining = &remaining[attr_list_len..];
1316                }
1317                "pulldown_emphasis" => {
1318                    // Use pre-extracted emphasis/strikethrough span from pulldown-cmark
1319                    if let Some(span) = pulldown_emphasis {
1320                        let span_len = span.end - span.start;
1321                        if span.is_strikethrough {
1322                            elements.push(Element::Strikethrough(span.content.clone()));
1323                        } else if span.is_strong {
1324                            elements.push(Element::Bold {
1325                                content: span.content.clone(),
1326                                underscore: span.uses_underscore,
1327                            });
1328                        } else {
1329                            elements.push(Element::Italic {
1330                                content: span.content.clone(),
1331                                underscore: span.uses_underscore,
1332                            });
1333                        }
1334                        remaining = &remaining[span_len..];
1335                    } else {
1336                        // Fallback - shouldn't happen
1337                        elements.push(Element::Text(remaining[..1].to_string()));
1338                        remaining = &remaining[1..];
1339                    }
1340                }
1341                _ => {
1342                    // No special elements found, add all remaining text
1343                    elements.push(Element::Text(remaining.to_string()));
1344                    break;
1345                }
1346            }
1347        }
1348    }
1349
1350    elements
1351}
1352
1353fn should_insert_space_before_join(current: &str) -> bool {
1354    !current.is_empty()
1355        && !current.ends_with(' ')
1356        && !current.ends_with('(')
1357        && !current.ends_with('[')
1358        && !current.ends_with('-')
1359}
1360
1361/// Reflow elements for sentence-per-line mode
1362fn reflow_elements_sentence_per_line(
1363    elements: &[Element],
1364    custom_abbreviations: &Option<Vec<String>>,
1365    require_sentence_capital: bool,
1366) -> Vec<String> {
1367    let abbreviations = get_abbreviations(custom_abbreviations);
1368    let mut lines = Vec::new();
1369    let mut current_line = String::new();
1370
1371    for (idx, element) in elements.iter().enumerate() {
1372        let element_str = format!("{element}");
1373
1374        // For text elements, split into sentences
1375        if let Element::Text(text) = element {
1376            // Simply append text - it already has correct spacing from tokenization
1377            let combined = format!("{current_line}{text}");
1378            // Use the pre-computed abbreviations set to avoid redundant computation
1379            let sentences = split_into_sentences_with_set(&combined, &abbreviations, require_sentence_capital);
1380
1381            if sentences.len() > 1 {
1382                // We found sentence boundaries
1383                for (i, sentence) in sentences.iter().enumerate() {
1384                    if i == 0 {
1385                        // First sentence might continue from previous elements
1386                        // But check if it ends with an abbreviation
1387                        let trimmed = sentence.trim();
1388
1389                        if text_ends_with_abbreviation(trimmed, &abbreviations) {
1390                            // Don't emit yet - this sentence ends with abbreviation, continue accumulating
1391                            current_line.clone_from(sentence);
1392                        } else {
1393                            // Normal case - emit the first sentence
1394                            lines.push(sentence.clone());
1395                            current_line.clear();
1396                        }
1397                    } else if i == sentences.len() - 1 {
1398                        // Last sentence: check if it's complete or incomplete
1399                        let trimmed = sentence.trim();
1400                        let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1401
1402                        if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1403                            // Complete sentence - emit it immediately
1404                            lines.push(sentence.clone());
1405                            current_line.clear();
1406                        } else {
1407                            // Incomplete sentence - save for next iteration
1408                            current_line.clone_from(sentence);
1409                        }
1410                    } else {
1411                        // Complete sentences in the middle
1412                        lines.push(sentence.clone());
1413                    }
1414                }
1415            } else {
1416                // Single sentence - check if it's complete
1417                let trimmed = combined.trim();
1418
1419                // If the combined result is only whitespace, don't accumulate it.
1420                // This prevents leading spaces on subsequent elements when lines
1421                // are joined with spaces during reflow iteration.
1422                if trimmed.is_empty() {
1423                    continue;
1424                }
1425
1426                let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1427
1428                if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1429                    // Complete single sentence - emit it
1430                    lines.push(trimmed.to_string());
1431                    current_line.clear();
1432                } else {
1433                    // Incomplete sentence - continue accumulating
1434                    current_line = combined;
1435                }
1436            }
1437        } else if let Element::Italic { content, underscore } = element {
1438            // Handle italic elements - may contain multiple sentences that need continuation
1439            let marker = if *underscore { "_" } else { "*" };
1440            handle_emphasis_sentence_split(
1441                content,
1442                marker,
1443                &abbreviations,
1444                require_sentence_capital,
1445                &mut current_line,
1446                &mut lines,
1447            );
1448        } else if let Element::Bold { content, underscore } = element {
1449            // Handle bold elements - may contain multiple sentences that need continuation
1450            let marker = if *underscore { "__" } else { "**" };
1451            handle_emphasis_sentence_split(
1452                content,
1453                marker,
1454                &abbreviations,
1455                require_sentence_capital,
1456                &mut current_line,
1457                &mut lines,
1458            );
1459        } else if let Element::Strikethrough(content) = element {
1460            // Handle strikethrough elements - may contain multiple sentences that need continuation
1461            handle_emphasis_sentence_split(
1462                content,
1463                "~~",
1464                &abbreviations,
1465                require_sentence_capital,
1466                &mut current_line,
1467                &mut lines,
1468            );
1469        } else {
1470            // Non-text, non-emphasis elements (Code, Links, etc.)
1471            // Check if this element is adjacent to the preceding text (no space between)
1472            let is_adjacent = if idx > 0 {
1473                match &elements[idx - 1] {
1474                    Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1475                    _ => true,
1476                }
1477            } else {
1478                false
1479            };
1480
1481            // Add space before element if needed, but not for adjacent elements
1482            if !is_adjacent && should_insert_space_before_join(&current_line) {
1483                current_line.push(' ');
1484            }
1485            current_line.push_str(&element_str);
1486        }
1487    }
1488
1489    // Add any remaining content
1490    if !current_line.is_empty() {
1491        lines.push(current_line.trim().to_string());
1492    }
1493    lines
1494}
1495
1496/// Handle splitting emphasis content at sentence boundaries while preserving markers
1497fn handle_emphasis_sentence_split(
1498    content: &str,
1499    marker: &str,
1500    abbreviations: &HashSet<String>,
1501    require_sentence_capital: bool,
1502    current_line: &mut String,
1503    lines: &mut Vec<String>,
1504) {
1505    // Split the emphasis content into sentences
1506    let sentences = split_into_sentences_with_set(content, abbreviations, require_sentence_capital);
1507
1508    if sentences.len() <= 1 {
1509        // Single sentence or no boundaries - treat as atomic
1510        if should_insert_space_before_join(current_line) {
1511            current_line.push(' ');
1512        }
1513        current_line.push_str(marker);
1514        current_line.push_str(content);
1515        current_line.push_str(marker);
1516
1517        // Check if the emphasis content ends with sentence punctuation - if so, emit
1518        let trimmed = content.trim();
1519        let ends_with_punct = ends_with_sentence_punct(trimmed);
1520        if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1521            lines.push(current_line.clone());
1522            current_line.clear();
1523        }
1524    } else {
1525        // Multiple sentences - each gets its own emphasis markers
1526        for (i, sentence) in sentences.iter().enumerate() {
1527            let trimmed = sentence.trim();
1528            if trimmed.is_empty() {
1529                continue;
1530            }
1531
1532            if i == 0 {
1533                // First sentence: combine with current_line and emit
1534                if should_insert_space_before_join(current_line) {
1535                    current_line.push(' ');
1536                }
1537                current_line.push_str(marker);
1538                current_line.push_str(trimmed);
1539                current_line.push_str(marker);
1540
1541                // Check if this is a complete sentence
1542                let ends_with_punct = ends_with_sentence_punct(trimmed);
1543                if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1544                    lines.push(current_line.clone());
1545                    current_line.clear();
1546                }
1547            } else if i == sentences.len() - 1 {
1548                // Last sentence: check if complete
1549                let ends_with_punct = ends_with_sentence_punct(trimmed);
1550
1551                let mut line = String::new();
1552                line.push_str(marker);
1553                line.push_str(trimmed);
1554                line.push_str(marker);
1555
1556                if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1557                    lines.push(line);
1558                } else {
1559                    // Incomplete - keep in current_line for potential continuation
1560                    *current_line = line;
1561                }
1562            } else {
1563                // Middle sentences: emit with markers
1564                let mut line = String::new();
1565                line.push_str(marker);
1566                line.push_str(trimmed);
1567                line.push_str(marker);
1568                lines.push(line);
1569            }
1570        }
1571    }
1572}
1573
1574/// English break-words used for semantic line break splitting.
1575/// These are conjunctions and relative pronouns where a line break
1576/// reads naturally.
1577const BREAK_WORDS: &[&str] = &[
1578    "and",
1579    "or",
1580    "but",
1581    "nor",
1582    "yet",
1583    "so",
1584    "for",
1585    "which",
1586    "that",
1587    "because",
1588    "when",
1589    "if",
1590    "while",
1591    "where",
1592    "although",
1593    "though",
1594    "unless",
1595    "since",
1596    "after",
1597    "before",
1598    "until",
1599    "as",
1600    "once",
1601    "whether",
1602    "however",
1603    "therefore",
1604    "moreover",
1605    "furthermore",
1606    "nevertheless",
1607    "whereas",
1608];
1609
1610/// Check if a character is clause punctuation for semantic line breaks
1611fn is_clause_punctuation(c: char) -> bool {
1612    matches!(c, ',' | ';' | ':' | '\u{2014}') // comma, semicolon, colon, em dash
1613}
1614
1615/// Find the closing `)` that balances the `(` at the start of `slice`.
1616///
1617/// `offset` is the byte position of the `(` in the original full-line string;
1618/// it is used to translate local byte positions into global positions for
1619/// element-span lookups.  Parens inside markdown element spans are skipped so
1620/// that, e.g., the closing `)` of an inline link does not prematurely end the
1621/// scan.  The char's *start* byte (not byte-after) is used for the span check
1622/// so that closing element delimiters — which sit exactly at the span's
1623/// exclusive-end boundary — are correctly excluded.
1624///
1625/// Returns `(end_local, inner)` where `end_local` is the byte offset within
1626/// `slice` just past the closing `)`, and `inner` is the content between the
1627/// outermost `(` and `)`.
1628fn paren_group_end<'a>(slice: &'a str, element_spans: &[(usize, usize)], offset: usize) -> Option<(usize, &'a str)> {
1629    debug_assert!(slice.starts_with('('));
1630    let mut depth: i32 = 0;
1631    for (local_byte, c) in slice.char_indices() {
1632        let global_byte = offset + local_byte;
1633        // When depth > 0, skip parens that belong to a markdown element.
1634        // Use the char's start byte so that a closing element delimiter
1635        // (whose byte_after equals the span's exclusive end) is treated as
1636        // inside the element rather than outside it.
1637        if depth > 0 && is_inside_element(global_byte, element_spans) {
1638            continue;
1639        }
1640        match c {
1641            '(' => depth += 1,
1642            ')' => {
1643                depth -= 1;
1644                if depth == 0 {
1645                    let end = local_byte + 1;
1646                    let inner = &slice[1..local_byte];
1647                    return Some((end, inner));
1648                }
1649            }
1650            _ => {}
1651        }
1652    }
1653    None
1654}
1655
1656/// Split a line at a parenthetical boundary for semantic line breaks.
1657///
1658/// Two strategies are tried in order:
1659///
1660/// 1. **Leading parenthetical** — if the line begins with `(`, isolate the
1661///    entire balanced group on this line and start the rest on the next.
1662///    This handles lines produced by a prior split that placed a `(` at the
1663///    very beginning.
1664///
1665/// 2. **Mid-line parenthetical** — find the rightmost balanced `(…)` whose
1666///    content spans multiple words and whose preceding text fits within
1667///    `[min_first_len, line_length]`.  Split just before the `(` so the
1668///    parenthetical begins the following line.
1669///
1670/// Parentheses that fall inside markdown element spans (links, code, etc.)
1671/// are ignored in both strategies.
1672fn split_at_parenthetical(
1673    text: &str,
1674    line_length: usize,
1675    element_spans: &[(usize, usize)],
1676    length_mode: ReflowLengthMode,
1677) -> Option<(String, String)> {
1678    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1679
1680    // Strategy 1: text starts with '(' — isolate the parenthetical as its own line.
1681    if text.starts_with('(')
1682        && let Some((end_local, inner)) = paren_group_end(text, element_spans, 0)
1683        && inner.contains(' ')
1684    {
1685        // If closing quotes or clause punctuation immediately follow the closing
1686        // ')', attach them to the parenthetical so the continuation line does
1687        // not start with a bare quote, comma, or semicolon.
1688        let tail = &text[end_local..];
1689        let attached_len = tail
1690            .char_indices()
1691            .take_while(|(_, c)| is_closing_quote(*c) || is_clause_punctuation(*c))
1692            .last()
1693            .map_or(0, |(idx, c)| idx + c.len_utf8());
1694        let first_end = end_local + attached_len;
1695        let rest_start = first_end;
1696        let first = &text[..first_end];
1697        let first_len = display_len(first, length_mode);
1698        // No MIN_SPLIT_RATIO check: a parenthetical unit is always a valid
1699        // semantic line regardless of its length.
1700        if first_len <= line_length {
1701            let rest = text[rest_start..].trim_start();
1702            if !rest.is_empty() {
1703                return Some((first.to_string(), rest.to_string()));
1704            }
1705        }
1706    }
1707
1708    // Strategy 2: find the rightmost multi-word '(' whose preceding text fits.
1709    let mut best_open_byte: Option<usize> = None;
1710    let mut pos = 0usize;
1711    while pos < text.len() {
1712        // '(' is ASCII so a single-byte comparison is safe in UTF-8.
1713        if text.as_bytes()[pos] != b'(' {
1714            let c = text[pos..].chars().next().unwrap();
1715            pos += c.len_utf8();
1716            continue;
1717        }
1718        // Skip '(' that are part of a markdown element (use start byte).
1719        if is_inside_element(pos, element_spans) {
1720            pos += 1;
1721            continue;
1722        }
1723        if let Some((end_local, inner)) = paren_group_end(&text[pos..], element_spans, pos) {
1724            let first = text[..pos].trim_end();
1725            let first_len = display_len(first, length_mode);
1726            if !first.is_empty()
1727                && first_len >= min_first_len
1728                && first_len <= line_length
1729                && inner.contains(' ')
1730                && best_open_byte.is_none_or(|prev| pos > prev)
1731            {
1732                best_open_byte = Some(pos);
1733            }
1734            pos += end_local;
1735        } else {
1736            pos += 1;
1737        }
1738    }
1739
1740    let open_byte = best_open_byte?;
1741    let first = text[..open_byte].trim_end().to_string();
1742    let rest = text[open_byte..].to_string();
1743    if first.is_empty() || rest.trim().is_empty() {
1744        return None;
1745    }
1746    Some((first, rest))
1747}
1748
1749/// Compute element spans for a flat text representation of elements.
1750/// Returns Vec of (start, end) byte offsets for non-Text elements,
1751/// so we can check that a split position doesn't fall inside them.
1752fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1753    let mut spans = Vec::new();
1754    let mut offset = 0;
1755    for element in elements {
1756        let rendered = format!("{element}");
1757        let len = rendered.len();
1758        if !matches!(element, Element::Text(_)) {
1759            spans.push((offset, offset + len));
1760        }
1761        offset += len;
1762    }
1763    spans
1764}
1765
1766/// Check if a byte position falls inside any non-Text element span
1767fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1768    spans.iter().any(|(start, end)| pos > *start && pos < *end)
1769}
1770
1771/// Minimum fraction of line_length that the first part of a split must occupy.
1772/// Prevents awkwardly short first lines like "A," or "Note:" on their own.
1773const MIN_SPLIT_RATIO: f64 = 0.3;
1774
1775/// Split a line at the latest clause punctuation that keeps the first part
1776/// within `line_length`. Returns None if no valid split point exists or if
1777/// the split would create an unreasonably short first line.
1778fn split_at_clause_punctuation(
1779    text: &str,
1780    line_length: usize,
1781    element_spans: &[(usize, usize)],
1782    length_mode: ReflowLengthMode,
1783) -> Option<(String, String)> {
1784    let chars: Vec<char> = text.chars().collect();
1785    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1786
1787    // Find the char index where accumulated display width exceeds line_length
1788    let mut width_acc = 0;
1789    let mut search_end_char = 0;
1790    for (idx, &c) in chars.iter().enumerate() {
1791        let c_width = display_len(&c.to_string(), length_mode);
1792        if width_acc + c_width > line_length {
1793            break;
1794        }
1795        width_acc += c_width;
1796        search_end_char = idx + 1;
1797    }
1798
1799    // Scan backwards tracking parenthesis depth to skip clause punctuation
1800    // inside plain-text parenthetical groups.  Scanning right-to-left means
1801    // ')' opens a depth level and '(' closes it.  Parens that belong to a
1802    // markdown element are excluded using the char's start byte (not byte-after)
1803    // so that closing element delimiters at the span boundary are correctly
1804    // treated as part of the element.
1805    let mut paren_depth: i32 = 0;
1806    let mut best_pos = None;
1807    for i in (0..search_end_char).rev() {
1808        // Start byte of char i (for paren element check)
1809        let byte_start: usize = chars[..i].iter().map(|c| c.len_utf8()).sum();
1810        // Byte just after char i (for clause punctuation element check — existing convention)
1811        let byte_after: usize = byte_start + chars[i].len_utf8();
1812
1813        if !is_inside_element(byte_start, element_spans) {
1814            match chars[i] {
1815                ')' => paren_depth += 1,
1816                '(' => paren_depth = paren_depth.saturating_sub(1),
1817                _ => {}
1818            }
1819        }
1820
1821        if paren_depth == 0 && is_clause_punctuation(chars[i]) && !is_inside_element(byte_after, element_spans) {
1822            best_pos = Some(i);
1823            break;
1824        }
1825    }
1826
1827    let pos = best_pos?;
1828
1829    // Reject splits that create very short first lines
1830    let first: String = chars[..=pos].iter().collect();
1831    let first_display_len = display_len(&first, length_mode);
1832    if first_display_len < min_first_len {
1833        return None;
1834    }
1835
1836    // Split after the punctuation character
1837    let rest: String = chars[pos + 1..].iter().collect();
1838    let rest = rest.trim_start().to_string();
1839
1840    if rest.is_empty() {
1841        return None;
1842    }
1843
1844    Some((first, rest))
1845}
1846
1847/// Compute plain-text paren-depth at each byte offset in `text`.
1848///
1849/// Returns a `Vec<i32>` of length `text.len()` where entry `i` is the
1850/// nesting depth at byte `i` — counting only `(` and `)` that fall
1851/// outside markdown element spans.  This lets callers quickly check
1852/// whether a byte position lies inside a plain-text parenthetical group.
1853fn paren_depth_map(text: &str, element_spans: &[(usize, usize)]) -> Vec<i32> {
1854    let mut map = vec![0i32; text.len()];
1855    let mut depth = 0i32;
1856    for (byte, c) in text.char_indices() {
1857        if !is_inside_element(byte, element_spans) {
1858            match c {
1859                '(' => depth += 1,
1860                ')' => depth = depth.saturating_sub(1),
1861                _ => {}
1862            }
1863        }
1864        // Fill the depth value for every byte of this (possibly multi-byte) char.
1865        let end = (byte + c.len_utf8()).min(map.len());
1866        for slot in &mut map[byte..end] {
1867            *slot = depth;
1868        }
1869    }
1870    map
1871}
1872
1873/// Return `true` if `line` is a complete, balanced, multi-word parenthetical
1874/// group — i.e. it starts with `(`, ends with `)` (possibly followed by
1875/// clause punctuation), has balanced parens throughout, and the inner content
1876/// contains at least one space (matching the ≥2-word threshold used by
1877/// `split_at_parenthetical` when deciding to split).
1878///
1879/// Used to prevent the short-line merge step from collapsing intentional
1880/// parenthetical splits back into the previous line.
1881fn is_standalone_parenthetical(line: &str) -> bool {
1882    let trimmed = line.trim();
1883    if !trimmed.starts_with('(') {
1884        return false;
1885    }
1886    // Strip optional trailing clause punctuation to find the real end.
1887    let core = trimmed.trim_end_matches(|c: char| is_clause_punctuation(c));
1888    if !core.ends_with(')') {
1889        return false;
1890    }
1891    // Inner content must span multiple words (same threshold as split_at_parenthetical).
1892    let inner = &core[1..core.len() - 1];
1893    if !inner.contains(' ') {
1894        return false;
1895    }
1896    // Verify the parens are balanced (depth returns to 0 at the last ')').
1897    let mut depth = 0i32;
1898    for c in core.chars() {
1899        match c {
1900            '(' => depth += 1,
1901            ')' => depth -= 1,
1902            _ => {}
1903        }
1904        if depth < 0 {
1905            return false;
1906        }
1907    }
1908    depth == 0
1909}
1910
1911/// Split a line before the latest break-word that keeps the first part
1912/// within `line_length`. Returns None if no valid split point exists or if
1913/// the split would create an unreasonably short first line.
1914fn split_at_break_word(
1915    text: &str,
1916    line_length: usize,
1917    element_spans: &[(usize, usize)],
1918    length_mode: ReflowLengthMode,
1919) -> Option<(String, String)> {
1920    let lower = text.to_lowercase();
1921    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1922    let mut best_split: Option<(usize, usize)> = None; // (byte_start, word_len_bytes)
1923
1924    // Build a paren-depth map so we can skip break-words inside plain-text
1925    // parenthetical groups (matching the protection added to split_at_clause_punctuation).
1926    let depth_map = paren_depth_map(text, element_spans);
1927
1928    for &word in BREAK_WORDS {
1929        let mut search_start = 0;
1930        while let Some(pos) = lower[search_start..].find(word) {
1931            let abs_pos = search_start + pos;
1932
1933            // Verify it's a word boundary: preceded by space, followed by space
1934            let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1935            let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1936
1937            if preceded_by_space && followed_by_space {
1938                // The break goes BEFORE the word, so first part ends at abs_pos - 1
1939                let first_part = text[..abs_pos].trim_end();
1940                let first_part_len = display_len(first_part, length_mode);
1941
1942                // Skip break-words inside plain-text parenthetical groups.
1943                let inside_paren = depth_map.get(abs_pos).is_some_and(|&d| d > 0);
1944
1945                if first_part_len >= min_first_len
1946                    && first_part_len <= line_length
1947                    && !is_inside_element(abs_pos, element_spans)
1948                    && !inside_paren
1949                {
1950                    // Prefer the latest valid split point
1951                    if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1952                        best_split = Some((abs_pos, word.len()));
1953                    }
1954                }
1955            }
1956
1957            search_start = abs_pos + word.len();
1958        }
1959    }
1960
1961    let (byte_start, _word_len) = best_split?;
1962
1963    let first = text[..byte_start].trim_end().to_string();
1964    let rest = text[byte_start..].to_string();
1965
1966    if first.is_empty() || rest.trim().is_empty() {
1967        return None;
1968    }
1969
1970    Some((first, rest))
1971}
1972
1973/// Recursively cascade-split a line that exceeds line_length.
1974/// Tries clause punctuation first, then break-words, then word wrap.
1975fn cascade_split_line(
1976    text: &str,
1977    line_length: usize,
1978    abbreviations: &Option<Vec<String>>,
1979    length_mode: ReflowLengthMode,
1980    attr_lists: bool,
1981) -> Vec<String> {
1982    if line_length == 0 || display_len(text, length_mode) <= line_length {
1983        return vec![text.to_string()];
1984    }
1985
1986    let elements = parse_markdown_elements_inner(text, attr_lists);
1987    let element_spans = compute_element_spans(&elements);
1988
1989    // Try parenthetical boundary split (before clause punctuation so that
1990    // multi-word parentheticals are kept intact as semantic units)
1991    if let Some((first, rest)) = split_at_parenthetical(text, line_length, &element_spans, length_mode) {
1992        let mut result = vec![first];
1993        result.extend(cascade_split_line(
1994            &rest,
1995            line_length,
1996            abbreviations,
1997            length_mode,
1998            attr_lists,
1999        ));
2000        return result;
2001    }
2002
2003    // Try clause punctuation split
2004    if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
2005        let mut result = vec![first];
2006        result.extend(cascade_split_line(
2007            &rest,
2008            line_length,
2009            abbreviations,
2010            length_mode,
2011            attr_lists,
2012        ));
2013        return result;
2014    }
2015
2016    // Try break-word split
2017    if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
2018        let mut result = vec![first];
2019        result.extend(cascade_split_line(
2020            &rest,
2021            line_length,
2022            abbreviations,
2023            length_mode,
2024            attr_lists,
2025        ));
2026        return result;
2027    }
2028
2029    // Fallback: word wrap using existing reflow_elements
2030    let options = ReflowOptions {
2031        line_length,
2032        break_on_sentences: false,
2033        preserve_breaks: false,
2034        sentence_per_line: false,
2035        semantic_line_breaks: false,
2036        abbreviations: abbreviations.clone(),
2037        length_mode,
2038        attr_lists,
2039        require_sentence_capital: true,
2040        max_list_continuation_indent: None,
2041    };
2042    reflow_elements(&elements, &options)
2043}
2044
2045/// Reflow elements using semantic line breaks strategy:
2046/// 1. Split at sentence boundaries (always)
2047/// 2. For lines exceeding line_length, cascade through clause punct → break-words → word wrap
2048fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
2049    // Step 1: Split into sentences using existing sentence-per-line logic
2050    let sentence_lines =
2051        reflow_elements_sentence_per_line(elements, &options.abbreviations, options.require_sentence_capital);
2052
2053    // Step 2: For each sentence line, apply cascading splits if it exceeds line_length
2054    // When line_length is 0 (unlimited), skip cascading — sentence splits only
2055    if options.line_length == 0 {
2056        return sentence_lines;
2057    }
2058
2059    let length_mode = options.length_mode;
2060    let mut result = Vec::new();
2061    for line in sentence_lines {
2062        if display_len(&line, length_mode) <= options.line_length {
2063            result.push(line);
2064        } else {
2065            result.extend(cascade_split_line(
2066                &line,
2067                options.line_length,
2068                &options.abbreviations,
2069                length_mode,
2070                options.attr_lists,
2071            ));
2072        }
2073    }
2074
2075    // Step 3: Merge very short trailing lines back into the previous line.
2076    // Word wrap can produce lines like "was" or "see" on their own, which reads poorly.
2077    let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
2078    let mut merged: Vec<String> = Vec::with_capacity(result.len());
2079    for line in result {
2080        if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
2081            // Don't merge a line that is itself a standalone parenthetical group —
2082            // it was placed on its own line intentionally by split_at_parenthetical.
2083            if is_standalone_parenthetical(&line) {
2084                merged.push(line);
2085                continue;
2086            }
2087
2088            // Don't merge across sentence boundaries — sentence splits are intentional
2089            let prev_ends_at_sentence = {
2090                let trimmed = merged.last().unwrap().trim_end();
2091                trimmed
2092                    .chars()
2093                    .rev()
2094                    .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
2095                    .is_some_and(|c| matches!(c, '.' | '!' | '?'))
2096            };
2097
2098            if !prev_ends_at_sentence {
2099                let prev = merged.last_mut().unwrap();
2100                let combined = format!("{prev} {line}");
2101                // Only merge if the combined line fits within the limit
2102                if display_len(&combined, length_mode) <= options.line_length {
2103                    *prev = combined;
2104                    continue;
2105                }
2106            }
2107        }
2108        merged.push(line);
2109    }
2110    merged
2111}
2112
2113/// Find the last space in `line` that is safe to split at.
2114/// Safe spaces are those NOT inside rendered non-Text elements.
2115/// `element_spans` contains (start, end) byte ranges of non-Text elements in the line.
2116/// Find the last space in `line` that is not inside any element span.
2117/// Spans use exclusive bounds (pos > start && pos < end) because element
2118/// delimiters (e.g., `[`, `]`, `(`, `)`, `<`, `>`, `` ` ``) are never
2119/// spaces, so only interior positions need protection.
2120fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
2121    line.char_indices()
2122        .rev()
2123        .map(|(pos, _)| pos)
2124        .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
2125}
2126
2127/// Reflow elements into lines that fit within the line length
2128fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
2129    let mut lines = Vec::new();
2130    let mut current_line = String::new();
2131    let mut current_length = 0;
2132    // Track byte spans of non-Text elements in current_line for safe splitting
2133    let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
2134    let length_mode = options.length_mode;
2135
2136    for (idx, element) in elements.iter().enumerate() {
2137        let element_str = format!("{element}");
2138        let element_len = element.display_width(length_mode);
2139
2140        // Determine adjacency from the original elements, not from current_line.
2141        // Elements are adjacent when there's no whitespace between them in the source:
2142        // - Text("v") → HugoShortcode("{{<...>}}") = adjacent (text has no trailing space)
2143        // - Text(" and ") → InlineLink("[a](url)") = NOT adjacent (text has trailing space)
2144        // - HugoShortcode("{{<...>}}") → Text(",") = adjacent (text has no leading space)
2145        let is_adjacent_to_prev = if idx > 0 {
2146            match (&elements[idx - 1], element) {
2147                (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
2148                (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
2149                _ => true,
2150            }
2151        } else {
2152            false
2153        };
2154
2155        // For text elements that might need breaking
2156        if let Element::Text(text) = element {
2157            // Check if original text had leading whitespace
2158            let has_leading_space = text.starts_with(char::is_whitespace);
2159            // If this is a text element, always process it word by word
2160            let words: Vec<&str> = text.split_whitespace().collect();
2161
2162            for (i, word) in words.iter().enumerate() {
2163                let word_len = display_len(word, length_mode);
2164                // Check if this "word" is just punctuation that should stay attached
2165                let is_trailing_punct = word
2166                    .chars()
2167                    .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
2168
2169                // First word of text adjacent to preceding non-text element
2170                // must stay attached (e.g., shortcode followed by punctuation or text)
2171                let is_first_adjacent = i == 0 && is_adjacent_to_prev;
2172
2173                if is_first_adjacent {
2174                    // Attach directly without space, preventing line break
2175                    if current_length + word_len > options.line_length && current_length > 0 {
2176                        // Would exceed — break before the adjacent group
2177                        // Use element-aware space search to avoid splitting inside links/code/etc.
2178                        if let Some(last_space) = rfind_safe_space(&current_line, &current_line_element_spans) {
2179                            let before = current_line[..last_space].trim_end().to_string();
2180                            let after = current_line[last_space + 1..].to_string();
2181                            lines.push(before);
2182                            current_line = format!("{after}{word}");
2183                            current_length = display_len(&current_line, length_mode);
2184                            current_line_element_spans.clear();
2185                        } else {
2186                            current_line.push_str(word);
2187                            current_length += word_len;
2188                        }
2189                    } else {
2190                        current_line.push_str(word);
2191                        current_length += word_len;
2192                    }
2193                } else if current_length > 0
2194                    && current_length + 1 + word_len > options.line_length
2195                    && !is_trailing_punct
2196                {
2197                    // Start a new line (but never for trailing punctuation)
2198                    lines.push(current_line.trim().to_string());
2199                    current_line = word.to_string();
2200                    current_length = word_len;
2201                    current_line_element_spans.clear();
2202                } else {
2203                    // Add word to current line
2204                    // Only add space if: we have content AND (this isn't the first word OR original had leading space)
2205                    // AND this isn't trailing punctuation (which attaches directly)
2206                    if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
2207                        current_line.push(' ');
2208                        current_length += 1;
2209                    }
2210                    current_line.push_str(word);
2211                    current_length += word_len;
2212                }
2213            }
2214        } else if matches!(
2215            element,
2216            Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
2217        ) && element_len > options.line_length
2218        {
2219            // Italic, bold, and strikethrough with content longer than line_length need word wrapping.
2220            // Split content word-by-word, attach the opening marker to the first word
2221            // and the closing marker to the last word.
2222            let (content, marker): (&str, &str) = match element {
2223                Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
2224                Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
2225                Element::Strikethrough(content) => (content.as_str(), "~~"),
2226                _ => unreachable!(),
2227            };
2228
2229            let words: Vec<&str> = content.split_whitespace().collect();
2230            let n = words.len();
2231
2232            if n == 0 {
2233                // Empty span — treat as atomic
2234                let full = format!("{marker}{marker}");
2235                let full_len = display_len(&full, length_mode);
2236                if !is_adjacent_to_prev && current_length > 0 {
2237                    current_line.push(' ');
2238                    current_length += 1;
2239                }
2240                current_line.push_str(&full);
2241                current_length += full_len;
2242            } else {
2243                for (i, word) in words.iter().enumerate() {
2244                    let is_first = i == 0;
2245                    let is_last = i == n - 1;
2246                    let word_str: String = match (is_first, is_last) {
2247                        (true, true) => format!("{marker}{word}{marker}"),
2248                        (true, false) => format!("{marker}{word}"),
2249                        (false, true) => format!("{word}{marker}"),
2250                        (false, false) => word.to_string(),
2251                    };
2252                    let word_len = display_len(&word_str, length_mode);
2253
2254                    let needs_space = if is_first {
2255                        !is_adjacent_to_prev && current_length > 0
2256                    } else {
2257                        current_length > 0
2258                    };
2259
2260                    if needs_space && current_length + 1 + word_len > options.line_length {
2261                        lines.push(current_line.trim_end().to_string());
2262                        current_line = word_str;
2263                        current_length = word_len;
2264                        current_line_element_spans.clear();
2265                    } else {
2266                        if needs_space {
2267                            current_line.push(' ');
2268                            current_length += 1;
2269                        }
2270                        current_line.push_str(&word_str);
2271                        current_length += word_len;
2272                    }
2273                }
2274            }
2275        } else {
2276            // For non-text elements (code, links, references), treat as atomic units
2277            // These should never be broken across lines
2278
2279            if is_adjacent_to_prev {
2280                // Adjacent to preceding text — attach directly without space
2281                if current_length + element_len > options.line_length {
2282                    // Would exceed limit — break before the adjacent word group
2283                    // Use element-aware space search to avoid splitting inside links/code/etc.
2284                    if let Some(last_space) = rfind_safe_space(&current_line, &current_line_element_spans) {
2285                        let before = current_line[..last_space].trim_end().to_string();
2286                        let after = current_line[last_space + 1..].to_string();
2287                        lines.push(before);
2288                        current_line = format!("{after}{element_str}");
2289                        current_length = display_len(&current_line, length_mode);
2290                        current_line_element_spans.clear();
2291                        // Record the element span in the new current_line
2292                        let start = after.len();
2293                        current_line_element_spans.push((start, start + element_str.len()));
2294                    } else {
2295                        // No safe space to break at — accept the long line
2296                        let start = current_line.len();
2297                        current_line.push_str(&element_str);
2298                        current_length += element_len;
2299                        current_line_element_spans.push((start, current_line.len()));
2300                    }
2301                } else {
2302                    let start = current_line.len();
2303                    current_line.push_str(&element_str);
2304                    current_length += element_len;
2305                    current_line_element_spans.push((start, current_line.len()));
2306                }
2307            } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
2308                // Not adjacent, would exceed — start new line
2309                lines.push(current_line.trim().to_string());
2310                current_line.clone_from(&element_str);
2311                current_length = element_len;
2312                current_line_element_spans.clear();
2313                current_line_element_spans.push((0, element_str.len()));
2314            } else {
2315                // Not adjacent, fits — add with space
2316                let ends_with_opener =
2317                    current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
2318                if current_length > 0 && !ends_with_opener {
2319                    current_line.push(' ');
2320                    current_length += 1;
2321                }
2322                let start = current_line.len();
2323                current_line.push_str(&element_str);
2324                current_length += element_len;
2325                current_line_element_spans.push((start, current_line.len()));
2326            }
2327        }
2328    }
2329
2330    // Don't forget the last line
2331    if !current_line.is_empty() {
2332        lines.push(current_line.trim_end().to_string());
2333    }
2334
2335    lines
2336}
2337
2338/// Reflow markdown content preserving structure
2339pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
2340    let lines: Vec<&str> = content.lines().collect();
2341    let mut result = Vec::new();
2342    let mut i = 0;
2343
2344    while i < lines.len() {
2345        let line = lines[i];
2346        let trimmed = line.trim();
2347
2348        // Preserve empty lines
2349        if trimmed.is_empty() {
2350            result.push(String::new());
2351            i += 1;
2352            continue;
2353        }
2354
2355        // Preserve headings as-is
2356        if trimmed.starts_with('#') {
2357            result.push(line.to_string());
2358            i += 1;
2359            continue;
2360        }
2361
2362        // Preserve Quarto/Pandoc div markers (:::) as-is
2363        if trimmed.starts_with(":::") {
2364            result.push(line.to_string());
2365            i += 1;
2366            continue;
2367        }
2368
2369        // Preserve fenced code blocks
2370        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2371            result.push(line.to_string());
2372            i += 1;
2373            // Copy lines until closing fence
2374            while i < lines.len() {
2375                result.push(lines[i].to_string());
2376                if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
2377                    i += 1;
2378                    break;
2379                }
2380                i += 1;
2381            }
2382            continue;
2383        }
2384
2385        // Preserve indented code blocks (4+ columns accounting for tab expansion)
2386        if calculate_indentation_width_default(line) >= 4 {
2387            // Collect all consecutive indented lines
2388            result.push(line.to_string());
2389            i += 1;
2390            while i < lines.len() {
2391                let next_line = lines[i];
2392                // Continue if next line is also indented or empty (empty lines in code blocks are ok)
2393                if calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
2394                    result.push(next_line.to_string());
2395                    i += 1;
2396                } else {
2397                    break;
2398                }
2399            }
2400            continue;
2401        }
2402
2403        // Preserve block quotes (but reflow their content)
2404        if trimmed.starts_with('>') {
2405            // find() returns byte position which is correct for str slicing
2406            // The unwrap is safe because we already verified trimmed starts with '>'
2407            let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
2408            let quote_prefix = line[0..=gt_pos].to_string();
2409            let quote_content = &line[quote_prefix.len()..].trim_start();
2410
2411            let reflowed = reflow_line(quote_content, options);
2412            for reflowed_line in &reflowed {
2413                result.push(format!("{quote_prefix} {reflowed_line}"));
2414            }
2415            i += 1;
2416            continue;
2417        }
2418
2419        // Preserve horizontal rules first (before checking for lists)
2420        if is_horizontal_rule(trimmed) {
2421            result.push(line.to_string());
2422            i += 1;
2423            continue;
2424        }
2425
2426        // Preserve lists (but not horizontal rules)
2427        if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
2428            // Find the list marker and preserve indentation
2429            let indent = line.len() - line.trim_start().len();
2430            let indent_str = " ".repeat(indent);
2431
2432            // For numbered lists, find the period and the space after it
2433            // For bullet lists, find the marker and the space after it
2434            let mut marker_end = indent;
2435            let mut content_start = indent;
2436
2437            if trimmed.chars().next().is_some_and(char::is_numeric) {
2438                // Numbered list: find the period
2439                if let Some(period_pos) = line[indent..].find('.') {
2440                    marker_end = indent + period_pos + 1; // Include the period
2441                    content_start = marker_end;
2442                    // Skip any spaces after the period to find content start
2443                    // Use byte-based check since content_start is a byte index
2444                    // This is safe because space is ASCII (single byte)
2445                    while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2446                        content_start += 1;
2447                    }
2448                }
2449            } else {
2450                // Bullet list: marker is single character
2451                marker_end = indent + 1; // Just the marker character
2452                content_start = marker_end;
2453                // Skip any spaces after the marker
2454                // Use byte-based check since content_start is a byte index
2455                // This is safe because space is ASCII (single byte)
2456                while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2457                    content_start += 1;
2458                }
2459            }
2460
2461            // Minimum indent for continuation lines (based on list marker, before checkbox)
2462            let min_continuation_indent = content_start;
2463
2464            // Detect checkbox/task list markers: [ ], [x], [X]
2465            // GFM task lists work with both unordered and ordered lists
2466            let rest = &line[content_start..];
2467            if rest.starts_with("[ ] ") || rest.starts_with("[x] ") || rest.starts_with("[X] ") {
2468                marker_end = content_start + 3; // Include the checkbox `[ ]`
2469                content_start += 4; // Skip past `[ ] `
2470            }
2471
2472            let marker = &line[indent..marker_end];
2473
2474            // Collect all content for this list item (including continuation lines)
2475            // Preserve hard breaks (2 trailing spaces) while trimming excessive whitespace
2476            let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
2477            i += 1;
2478
2479            // Collect continuation lines (indented lines that are part of this list item)
2480            // Use the base marker indent (not checkbox-extended) for collection,
2481            // since users may indent continuations to the bullet level, not the checkbox level
2482            while i < lines.len() {
2483                let next_line = lines[i];
2484                let next_trimmed = next_line.trim();
2485
2486                // Stop if we hit an empty line or another list item or special block
2487                if is_block_boundary(next_trimmed) {
2488                    break;
2489                }
2490
2491                // Check if this line is indented (continuation of list item)
2492                let next_indent = next_line.len() - next_line.trim_start().len();
2493                if next_indent >= min_continuation_indent {
2494                    // This is a continuation line - add its content
2495                    // Preserve hard breaks while trimming excessive whitespace
2496                    let trimmed_start = next_line.trim_start();
2497                    list_content.push(trim_preserving_hard_break(trimmed_start));
2498                    i += 1;
2499                } else {
2500                    // Not indented enough, not part of this list item
2501                    break;
2502                }
2503            }
2504
2505            // Join content, but respect hard breaks (lines ending with 2 spaces or backslash)
2506            // Hard breaks should prevent joining with the next line
2507            let combined_content = if options.preserve_breaks {
2508                list_content[0].clone()
2509            } else {
2510                // Check if any lines have hard breaks - if so, preserve the structure
2511                let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2512                if has_hard_breaks {
2513                    // Don't join lines with hard breaks - keep them separate with newlines
2514                    list_content.join("\n")
2515                } else {
2516                    // No hard breaks, safe to join with spaces
2517                    list_content.join(" ")
2518                }
2519            };
2520
2521            // Calculate the proper indentation for continuation lines
2522            let trimmed_marker = marker;
2523            let continuation_spaces = if let Some(max_indent) = options.max_list_continuation_indent {
2524                // Cap the relative indent (past the nesting level) to max_indent,
2525                // then add back the nesting indent so nested items stay correct
2526                indent + (content_start - indent).min(max_indent)
2527            } else {
2528                content_start
2529            };
2530
2531            // Adjust line length to account for list marker and space
2532            let prefix_length = indent + trimmed_marker.len() + 1;
2533
2534            // Create adjusted options with reduced line length
2535            let adjusted_options = ReflowOptions {
2536                line_length: options.line_length.saturating_sub(prefix_length),
2537                ..options.clone()
2538            };
2539
2540            let reflowed = reflow_line(&combined_content, &adjusted_options);
2541            for (j, reflowed_line) in reflowed.iter().enumerate() {
2542                if j == 0 {
2543                    result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2544                } else {
2545                    // Continuation lines aligned with text after marker
2546                    let continuation_indent = " ".repeat(continuation_spaces);
2547                    result.push(format!("{continuation_indent}{reflowed_line}"));
2548                }
2549            }
2550            continue;
2551        }
2552
2553        // Preserve tables
2554        if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2555            result.push(line.to_string());
2556            i += 1;
2557            continue;
2558        }
2559
2560        // Preserve reference definitions
2561        if trimmed.starts_with('[') && line.contains("]:") {
2562            result.push(line.to_string());
2563            i += 1;
2564            continue;
2565        }
2566
2567        // Preserve definition list items (extended markdown)
2568        if is_definition_list_item(trimmed) {
2569            result.push(line.to_string());
2570            i += 1;
2571            continue;
2572        }
2573
2574        // Check if this is a single line that doesn't need processing
2575        let mut is_single_line_paragraph = true;
2576        if i + 1 < lines.len() {
2577            let next_trimmed = lines[i + 1].trim();
2578            // Check if next line continues this paragraph
2579            if !is_block_boundary(next_trimmed) {
2580                is_single_line_paragraph = false;
2581            }
2582        }
2583
2584        // If it's a single line that fits, just add it as-is
2585        if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2586            result.push(line.to_string());
2587            i += 1;
2588            continue;
2589        }
2590
2591        // For regular paragraphs, collect consecutive lines
2592        let mut paragraph_parts = Vec::new();
2593        let mut current_part = vec![line];
2594        i += 1;
2595
2596        // If preserve_breaks is true, treat each line separately
2597        if options.preserve_breaks {
2598            // Don't collect consecutive lines - just reflow this single line
2599            let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2600                Some("\\")
2601            } else if line.ends_with("  ") {
2602                Some("  ")
2603            } else {
2604                None
2605            };
2606            let reflowed = reflow_line(line, options);
2607
2608            // Preserve hard breaks (two trailing spaces or backslash)
2609            if let Some(break_marker) = hard_break_type {
2610                if !reflowed.is_empty() {
2611                    let mut reflowed_with_break = reflowed;
2612                    let last_idx = reflowed_with_break.len() - 1;
2613                    if !has_hard_break(&reflowed_with_break[last_idx]) {
2614                        reflowed_with_break[last_idx].push_str(break_marker);
2615                    }
2616                    result.extend(reflowed_with_break);
2617                }
2618            } else {
2619                result.extend(reflowed);
2620            }
2621        } else {
2622            // Original behavior: collect consecutive lines into a paragraph
2623            while i < lines.len() {
2624                let prev_line = if !current_part.is_empty() {
2625                    current_part.last().unwrap()
2626                } else {
2627                    ""
2628                };
2629                let next_line = lines[i];
2630                let next_trimmed = next_line.trim();
2631
2632                // Stop at empty lines or special blocks
2633                if is_block_boundary(next_trimmed) {
2634                    break;
2635                }
2636
2637                // Check if previous line ends with hard break (two spaces or backslash)
2638                // or is a complete sentence in sentence_per_line mode
2639                let prev_trimmed = prev_line.trim();
2640                let abbreviations = get_abbreviations(&options.abbreviations);
2641                let ends_with_sentence = (prev_trimmed.ends_with('.')
2642                    || prev_trimmed.ends_with('!')
2643                    || prev_trimmed.ends_with('?')
2644                    || prev_trimmed.ends_with(".*")
2645                    || prev_trimmed.ends_with("!*")
2646                    || prev_trimmed.ends_with("?*")
2647                    || prev_trimmed.ends_with("._")
2648                    || prev_trimmed.ends_with("!_")
2649                    || prev_trimmed.ends_with("?_")
2650                    // Quote-terminated sentences (straight and curly quotes)
2651                    || prev_trimmed.ends_with(".\"")
2652                    || prev_trimmed.ends_with("!\"")
2653                    || prev_trimmed.ends_with("?\"")
2654                    || prev_trimmed.ends_with(".'")
2655                    || prev_trimmed.ends_with("!'")
2656                    || prev_trimmed.ends_with("?'")
2657                    || prev_trimmed.ends_with(".\u{201D}")
2658                    || prev_trimmed.ends_with("!\u{201D}")
2659                    || prev_trimmed.ends_with("?\u{201D}")
2660                    || prev_trimmed.ends_with(".\u{2019}")
2661                    || prev_trimmed.ends_with("!\u{2019}")
2662                    || prev_trimmed.ends_with("?\u{2019}"))
2663                    && !text_ends_with_abbreviation(
2664                        prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2665                        &abbreviations,
2666                    );
2667
2668                if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2669                    // Start a new part after hard break or complete sentence
2670                    paragraph_parts.push(current_part.join(" "));
2671                    current_part = vec![next_line];
2672                } else {
2673                    current_part.push(next_line);
2674                }
2675                i += 1;
2676            }
2677
2678            // Add the last part
2679            if !current_part.is_empty() {
2680                if current_part.len() == 1 {
2681                    // Single line, don't add trailing space
2682                    paragraph_parts.push(current_part[0].to_string());
2683                } else {
2684                    paragraph_parts.push(current_part.join(" "));
2685                }
2686            }
2687
2688            // Reflow each part separately, preserving hard breaks
2689            for (j, part) in paragraph_parts.iter().enumerate() {
2690                let reflowed = reflow_line(part, options);
2691                result.extend(reflowed);
2692
2693                // Preserve hard break by ensuring last line of part ends with hard break marker
2694                // Use two spaces as the default hard break format for reflows
2695                // But don't add hard breaks in sentence_per_line mode - lines are already separate
2696                if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2697                    let last_idx = result.len() - 1;
2698                    if !has_hard_break(&result[last_idx]) {
2699                        result[last_idx].push_str("  ");
2700                    }
2701                }
2702            }
2703        }
2704    }
2705
2706    // Preserve trailing newline if the original content had one
2707    let result_text = result.join("\n");
2708    if content.ends_with('\n') && !result_text.ends_with('\n') {
2709        format!("{result_text}\n")
2710    } else {
2711        result_text
2712    }
2713}
2714
2715/// Information about a reflowed paragraph
2716#[derive(Debug, Clone)]
2717pub struct ParagraphReflow {
2718    /// Starting byte offset of the paragraph in the original content
2719    pub start_byte: usize,
2720    /// Ending byte offset of the paragraph in the original content
2721    pub end_byte: usize,
2722    /// The reflowed text for this paragraph
2723    pub reflowed_text: String,
2724}
2725
2726/// A collected blockquote line used for style-preserving reflow.
2727///
2728/// The invariant `is_explicit == true` iff `prefix.is_some()` is enforced by the
2729/// constructors. Use [`BlockquoteLineData::explicit`] or [`BlockquoteLineData::lazy`]
2730/// rather than constructing the struct directly.
2731#[derive(Debug, Clone)]
2732pub struct BlockquoteLineData {
2733    /// Trimmed content without the `> ` prefix.
2734    pub(crate) content: String,
2735    /// Whether this line carries an explicit blockquote marker.
2736    pub(crate) is_explicit: bool,
2737    /// Full blockquote prefix (e.g. `"> "`, `"> > "`). `None` for lazy continuation lines.
2738    pub(crate) prefix: Option<String>,
2739}
2740
2741impl BlockquoteLineData {
2742    /// Create an explicit (marker-bearing) blockquote line.
2743    pub fn explicit(content: String, prefix: String) -> Self {
2744        Self {
2745            content,
2746            is_explicit: true,
2747            prefix: Some(prefix),
2748        }
2749    }
2750
2751    /// Create a lazy continuation line (no blockquote marker).
2752    pub fn lazy(content: String) -> Self {
2753        Self {
2754            content,
2755            is_explicit: false,
2756            prefix: None,
2757        }
2758    }
2759}
2760
2761/// Style for blockquote continuation lines after reflow.
2762#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2763pub enum BlockquoteContinuationStyle {
2764    Explicit,
2765    Lazy,
2766}
2767
2768/// Determine the continuation style for a blockquote paragraph from its collected lines.
2769///
2770/// The first line is always explicit (it carries the marker), so only continuation
2771/// lines (index 1+) are counted. Ties resolve to `Explicit`.
2772///
2773/// When the slice has only one element (no continuation lines to inspect), both
2774/// counts are zero and the tie-breaking rule returns `Explicit`.
2775pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
2776    let mut explicit_count = 0usize;
2777    let mut lazy_count = 0usize;
2778
2779    for line in lines.iter().skip(1) {
2780        if line.is_explicit {
2781            explicit_count += 1;
2782        } else {
2783            lazy_count += 1;
2784        }
2785    }
2786
2787    if explicit_count > 0 && lazy_count == 0 {
2788        BlockquoteContinuationStyle::Explicit
2789    } else if lazy_count > 0 && explicit_count == 0 {
2790        BlockquoteContinuationStyle::Lazy
2791    } else if explicit_count >= lazy_count {
2792        BlockquoteContinuationStyle::Explicit
2793    } else {
2794        BlockquoteContinuationStyle::Lazy
2795    }
2796}
2797
2798/// Determine the dominant blockquote prefix for a paragraph.
2799///
2800/// The most frequently occurring explicit prefix wins. Ties are broken by earliest
2801/// first appearance. Falls back to `fallback` when no explicit lines are present.
2802pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
2803    let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
2804
2805    for (idx, line) in lines.iter().enumerate() {
2806        let Some(prefix) = line.prefix.as_ref() else {
2807            continue;
2808        };
2809        counts
2810            .entry(prefix.clone())
2811            .and_modify(|entry| entry.0 += 1)
2812            .or_insert((1, idx));
2813    }
2814
2815    counts
2816        .into_iter()
2817        .max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
2818            count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
2819        })
2820        .map_or_else(|| fallback.to_string(), |(prefix, _)| prefix)
2821}
2822
2823/// Whether a reflowed blockquote content line must carry an explicit prefix.
2824///
2825/// Lines that would start a new block structure (headings, fences, lists, etc.)
2826/// cannot safely use lazy continuation syntax.
2827pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
2828    let trimmed = content_line.trim_start();
2829    trimmed.starts_with('>')
2830        || trimmed.starts_with('#')
2831        || trimmed.starts_with("```")
2832        || trimmed.starts_with("~~~")
2833        || is_unordered_list_marker(trimmed)
2834        || is_numbered_list_item(trimmed)
2835        || is_horizontal_rule(trimmed)
2836        || is_definition_list_item(trimmed)
2837        || (trimmed.starts_with('[') && trimmed.contains("]:"))
2838        || trimmed.starts_with(":::")
2839        || (trimmed.starts_with('<')
2840            && !trimmed.starts_with("<http")
2841            && !trimmed.starts_with("<https")
2842            && !trimmed.starts_with("<mailto:"))
2843}
2844
2845/// Reflow blockquote content lines and apply continuation style.
2846///
2847/// Segments separated by hard breaks are reflowed independently. The output lines
2848/// receive blockquote prefixes according to `continuation_style`: the first line and
2849/// any line that would start a new block structure always get an explicit prefix;
2850/// other lines follow the detected style.
2851///
2852/// Returns the styled, reflowed lines (without a trailing newline).
2853pub fn reflow_blockquote_content(
2854    lines: &[BlockquoteLineData],
2855    explicit_prefix: &str,
2856    continuation_style: BlockquoteContinuationStyle,
2857    options: &ReflowOptions,
2858) -> Vec<String> {
2859    let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
2860    let segments = split_into_segments_strs(&content_strs);
2861    let mut reflowed_content_lines: Vec<String> = Vec::new();
2862
2863    for segment in segments {
2864        let hard_break_type = segment.last().and_then(|&line| {
2865            let line = line.strip_suffix('\r').unwrap_or(line);
2866            if line.ends_with('\\') {
2867                Some("\\")
2868            } else if line.ends_with("  ") {
2869                Some("  ")
2870            } else {
2871                None
2872            }
2873        });
2874
2875        let pieces: Vec<&str> = segment
2876            .iter()
2877            .map(|&line| {
2878                if let Some(l) = line.strip_suffix('\\') {
2879                    l.trim_end()
2880                } else if let Some(l) = line.strip_suffix("  ") {
2881                    l.trim_end()
2882                } else {
2883                    line.trim_end()
2884                }
2885            })
2886            .collect();
2887
2888        let segment_text = pieces.join(" ");
2889        let segment_text = segment_text.trim();
2890        if segment_text.is_empty() {
2891            continue;
2892        }
2893
2894        let mut reflowed = reflow_line(segment_text, options);
2895        if let Some(break_marker) = hard_break_type
2896            && !reflowed.is_empty()
2897        {
2898            let last_idx = reflowed.len() - 1;
2899            if !has_hard_break(&reflowed[last_idx]) {
2900                reflowed[last_idx].push_str(break_marker);
2901            }
2902        }
2903        reflowed_content_lines.extend(reflowed);
2904    }
2905
2906    let mut styled_lines: Vec<String> = Vec::new();
2907    for (idx, line) in reflowed_content_lines.iter().enumerate() {
2908        let force_explicit = idx == 0
2909            || continuation_style == BlockquoteContinuationStyle::Explicit
2910            || should_force_explicit_blockquote_line(line);
2911        if force_explicit {
2912            styled_lines.push(format!("{explicit_prefix}{line}"));
2913        } else {
2914            styled_lines.push(line.clone());
2915        }
2916    }
2917
2918    styled_lines
2919}
2920
2921fn is_blockquote_content_boundary(content: &str) -> bool {
2922    let trimmed = content.trim();
2923    trimmed.is_empty()
2924        || is_block_boundary(trimmed)
2925        || crate::utils::table_utils::TableUtils::is_potential_table_row(content)
2926        || trimmed.starts_with(":::")
2927        || crate::utils::is_template_directive_only(content)
2928        || is_standalone_attr_list(content)
2929        || is_snippet_block_delimiter(content)
2930}
2931
2932fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
2933    let mut segments = Vec::new();
2934    let mut current = Vec::new();
2935
2936    for &line in lines {
2937        current.push(line);
2938        if has_hard_break(line) {
2939            segments.push(current);
2940            current = Vec::new();
2941        }
2942    }
2943
2944    if !current.is_empty() {
2945        segments.push(current);
2946    }
2947
2948    segments
2949}
2950
2951fn reflow_blockquote_paragraph_at_line(
2952    content: &str,
2953    lines: &[&str],
2954    target_idx: usize,
2955    options: &ReflowOptions,
2956) -> Option<ParagraphReflow> {
2957    let mut anchor_idx = target_idx;
2958    let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
2959        parsed.nesting_level
2960    } else {
2961        let mut found = None;
2962        let mut idx = target_idx;
2963        loop {
2964            if lines[idx].trim().is_empty() {
2965                break;
2966            }
2967            if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
2968                found = Some((idx, parsed.nesting_level));
2969                break;
2970            }
2971            if idx == 0 {
2972                break;
2973            }
2974            idx -= 1;
2975        }
2976        let (idx, level) = found?;
2977        anchor_idx = idx;
2978        level
2979    };
2980
2981    // Expand backward to capture prior quote content at the same nesting level.
2982    let mut para_start = anchor_idx;
2983    while para_start > 0 {
2984        let prev_idx = para_start - 1;
2985        let prev_line = lines[prev_idx];
2986
2987        if prev_line.trim().is_empty() {
2988            break;
2989        }
2990
2991        if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
2992            if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2993                break;
2994            }
2995            para_start = prev_idx;
2996            continue;
2997        }
2998
2999        let prev_lazy = prev_line.trim_start();
3000        if is_blockquote_content_boundary(prev_lazy) {
3001            break;
3002        }
3003        para_start = prev_idx;
3004    }
3005
3006    // Lazy continuation cannot precede the first explicit marker.
3007    while para_start < lines.len() {
3008        let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
3009            para_start += 1;
3010            continue;
3011        };
3012        target_level = parsed.nesting_level;
3013        break;
3014    }
3015
3016    if para_start >= lines.len() || para_start > target_idx {
3017        return None;
3018    }
3019
3020    // Collect explicit lines at target level and lazy continuation lines.
3021    // Each entry is (original_line_idx, BlockquoteLineData).
3022    let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
3023    let mut idx = para_start;
3024    while idx < lines.len() {
3025        if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
3026            break;
3027        }
3028
3029        let line = lines[idx];
3030        if line.trim().is_empty() {
3031            break;
3032        }
3033
3034        if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
3035            if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
3036                break;
3037            }
3038            collected.push((
3039                idx,
3040                BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
3041            ));
3042            idx += 1;
3043            continue;
3044        }
3045
3046        let lazy_content = line.trim_start();
3047        if is_blockquote_content_boundary(lazy_content) {
3048            break;
3049        }
3050
3051        collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
3052        idx += 1;
3053    }
3054
3055    if collected.is_empty() {
3056        return None;
3057    }
3058
3059    let para_end = collected[collected.len() - 1].0;
3060    if target_idx < para_start || target_idx > para_end {
3061        return None;
3062    }
3063
3064    let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();
3065
3066    let fallback_prefix = line_data
3067        .iter()
3068        .find_map(|d| d.prefix.clone())
3069        .unwrap_or_else(|| "> ".to_string());
3070    let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
3071    let continuation_style = blockquote_continuation_style(&line_data);
3072
3073    let adjusted_line_length = options
3074        .line_length
3075        .saturating_sub(display_len(&explicit_prefix, options.length_mode))
3076        .max(1);
3077
3078    let adjusted_options = ReflowOptions {
3079        line_length: adjusted_line_length,
3080        ..options.clone()
3081    };
3082
3083    let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);
3084
3085    if styled_lines.is_empty() {
3086        return None;
3087    }
3088
3089    // Calculate byte offsets.
3090    let mut start_byte = 0;
3091    for line in lines.iter().take(para_start) {
3092        start_byte += line.len() + 1;
3093    }
3094
3095    let mut end_byte = start_byte;
3096    for line in lines.iter().take(para_end + 1).skip(para_start) {
3097        end_byte += line.len() + 1;
3098    }
3099
3100    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3101    if !includes_trailing_newline {
3102        end_byte -= 1;
3103    }
3104
3105    let reflowed_joined = styled_lines.join("\n");
3106    let reflowed_text = if includes_trailing_newline {
3107        if reflowed_joined.ends_with('\n') {
3108            reflowed_joined
3109        } else {
3110            format!("{reflowed_joined}\n")
3111        }
3112    } else if reflowed_joined.ends_with('\n') {
3113        reflowed_joined.trim_end_matches('\n').to_string()
3114    } else {
3115        reflowed_joined
3116    };
3117
3118    Some(ParagraphReflow {
3119        start_byte,
3120        end_byte,
3121        reflowed_text,
3122    })
3123}
3124
3125/// Reflow a single paragraph at the specified line number
3126///
3127/// This function finds the paragraph containing the given line number,
3128/// reflows it according to the specified line length, and returns
3129/// information about the paragraph location and its reflowed text.
3130///
3131/// # Arguments
3132///
3133/// * `content` - The full document content
3134/// * `line_number` - The 1-based line number within the paragraph to reflow
3135/// * `line_length` - The target line length for reflowing
3136///
3137/// # Returns
3138///
3139/// Returns `Some(ParagraphReflow)` if a paragraph was found and reflowed,
3140/// or `None` if the line number is out of bounds or the content at that
3141/// line shouldn't be reflowed (e.g., code blocks, headings, etc.)
3142pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
3143    reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
3144}
3145
3146/// Reflow a paragraph at the given line with a specific length mode.
3147pub fn reflow_paragraph_at_line_with_mode(
3148    content: &str,
3149    line_number: usize,
3150    line_length: usize,
3151    length_mode: ReflowLengthMode,
3152) -> Option<ParagraphReflow> {
3153    let options = ReflowOptions {
3154        line_length,
3155        length_mode,
3156        ..Default::default()
3157    };
3158    reflow_paragraph_at_line_with_options(content, line_number, &options)
3159}
3160
3161/// Reflow a paragraph at the given line using the provided options.
3162///
3163/// This is the canonical implementation used by both the rule's fix mode and the
3164/// LSP "Reflow paragraph" action. Passing a fully configured `ReflowOptions` allows
3165/// the LSP action to respect user-configured reflow mode, abbreviations, etc.
3166///
3167/// # Returns
3168///
3169/// Returns `Some(ParagraphReflow)` with byte offsets and reflowed text, or `None`
3170/// if the line is out of bounds or sits inside a non-reflow-able construct.
3171pub fn reflow_paragraph_at_line_with_options(
3172    content: &str,
3173    line_number: usize,
3174    options: &ReflowOptions,
3175) -> Option<ParagraphReflow> {
3176    if line_number == 0 {
3177        return None;
3178    }
3179
3180    let lines: Vec<&str> = content.lines().collect();
3181
3182    // Check if line number is valid (1-based)
3183    if line_number > lines.len() {
3184        return None;
3185    }
3186
3187    let target_idx = line_number - 1; // Convert to 0-based
3188    let target_line = lines[target_idx];
3189    let trimmed = target_line.trim();
3190
3191    // Handle blockquote paragraphs (including lazy continuation lines) with
3192    // style-preserving output.
3193    if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
3194        return Some(blockquote_reflow);
3195    }
3196
3197    // Don't reflow special blocks
3198    if is_paragraph_boundary(trimmed, target_line) {
3199        return None;
3200    }
3201
3202    // Find paragraph start - scan backward until blank line or special block
3203    let mut para_start = target_idx;
3204    while para_start > 0 {
3205        let prev_idx = para_start - 1;
3206        let prev_line = lines[prev_idx];
3207        let prev_trimmed = prev_line.trim();
3208
3209        // Stop at blank line or special blocks
3210        if is_paragraph_boundary(prev_trimmed, prev_line) {
3211            break;
3212        }
3213
3214        para_start = prev_idx;
3215    }
3216
3217    // Find paragraph end - scan forward until blank line or special block
3218    let mut para_end = target_idx;
3219    while para_end + 1 < lines.len() {
3220        let next_idx = para_end + 1;
3221        let next_line = lines[next_idx];
3222        let next_trimmed = next_line.trim();
3223
3224        // Stop at blank line or special blocks
3225        if is_paragraph_boundary(next_trimmed, next_line) {
3226            break;
3227        }
3228
3229        para_end = next_idx;
3230    }
3231
3232    // Extract paragraph lines
3233    let paragraph_lines = &lines[para_start..=para_end];
3234
3235    // Calculate byte offsets
3236    let mut start_byte = 0;
3237    for line in lines.iter().take(para_start) {
3238        start_byte += line.len() + 1; // +1 for newline
3239    }
3240
3241    let mut end_byte = start_byte;
3242    for line in paragraph_lines {
3243        end_byte += line.len() + 1; // +1 for newline
3244    }
3245
3246    // Track whether the byte range includes a trailing newline
3247    // (it doesn't if this is the last line and the file doesn't end with newline)
3248    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3249
3250    // Adjust end_byte if the last line doesn't have a newline
3251    if !includes_trailing_newline {
3252        end_byte -= 1;
3253    }
3254
3255    // Join paragraph lines and reflow
3256    let paragraph_text = paragraph_lines.join("\n");
3257
3258    // Reflow the paragraph using reflow_markdown to handle it properly
3259    let reflowed = reflow_markdown(&paragraph_text, options);
3260
3261    // Ensure reflowed text matches whether the byte range includes a trailing newline
3262    // This is critical: if the range includes a newline, the replacement must too,
3263    // otherwise the next line will get appended to the reflowed paragraph
3264    let reflowed_text = if includes_trailing_newline {
3265        // Range includes newline - ensure reflowed text has one
3266        if reflowed.ends_with('\n') {
3267            reflowed
3268        } else {
3269            format!("{reflowed}\n")
3270        }
3271    } else {
3272        // Range doesn't include newline - ensure reflowed text doesn't have one
3273        if reflowed.ends_with('\n') {
3274            reflowed.trim_end_matches('\n').to_string()
3275        } else {
3276            reflowed
3277        }
3278    };
3279
3280    Some(ParagraphReflow {
3281        start_byte,
3282        end_byte,
3283        reflowed_text,
3284    })
3285}
3286
3287#[cfg(test)]
3288mod tests {
3289    use super::*;
3290
3291    /// Unit test for private helper function text_ends_with_abbreviation()
3292    ///
3293    /// This test stays inline because it tests a private function.
3294    /// All other tests (public API, integration tests) are in tests/utils/text_reflow_test.rs
3295    #[test]
3296    fn test_helper_function_text_ends_with_abbreviation() {
3297        // Test the helper function directly
3298        let abbreviations = get_abbreviations(&None);
3299
3300        // True cases - built-in abbreviations (titles and i.e./e.g.)
3301        assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
3302        assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
3303        assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
3304        assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
3305        assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
3306        assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
3307        assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
3308        assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
3309
3310        // False cases - NOT in built-in list (etc doesn't always have period)
3311        assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
3312        assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
3313        assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
3314        assert!(!text_ends_with_abbreviation("items.", &abbreviations));
3315        assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
3316        assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); // question mark, not period
3317        assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); // exclamation, not period
3318        assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); // question mark
3319        assert!(!text_ends_with_abbreviation("word", &abbreviations)); // no punctuation
3320        assert!(!text_ends_with_abbreviation("", &abbreviations)); // empty string
3321    }
3322
3323    #[test]
3324    fn test_is_unordered_list_marker() {
3325        // Valid unordered list markers
3326        assert!(is_unordered_list_marker("- item"));
3327        assert!(is_unordered_list_marker("* item"));
3328        assert!(is_unordered_list_marker("+ item"));
3329        assert!(is_unordered_list_marker("-")); // lone marker
3330        assert!(is_unordered_list_marker("*"));
3331        assert!(is_unordered_list_marker("+"));
3332
3333        // Not list markers
3334        assert!(!is_unordered_list_marker("---")); // horizontal rule
3335        assert!(!is_unordered_list_marker("***")); // horizontal rule
3336        assert!(!is_unordered_list_marker("- - -")); // horizontal rule
3337        assert!(!is_unordered_list_marker("* * *")); // horizontal rule
3338        assert!(!is_unordered_list_marker("*emphasis*")); // emphasis, not list
3339        assert!(!is_unordered_list_marker("-word")); // no space after marker
3340        assert!(!is_unordered_list_marker("")); // empty
3341        assert!(!is_unordered_list_marker("text")); // plain text
3342        assert!(!is_unordered_list_marker("# heading")); // heading
3343    }
3344
3345    #[test]
3346    fn test_is_block_boundary() {
3347        // Block boundaries
3348        assert!(is_block_boundary("")); // empty line
3349        assert!(is_block_boundary("# Heading")); // ATX heading
3350        assert!(is_block_boundary("## Level 2")); // ATX heading
3351        assert!(is_block_boundary("```rust")); // code fence
3352        assert!(is_block_boundary("~~~")); // tilde code fence
3353        assert!(is_block_boundary("> quote")); // blockquote
3354        assert!(is_block_boundary("| cell |")); // table
3355        assert!(is_block_boundary("[link]: http://example.com")); // reference def
3356        assert!(is_block_boundary("---")); // horizontal rule
3357        assert!(is_block_boundary("***")); // horizontal rule
3358        assert!(is_block_boundary("- item")); // unordered list
3359        assert!(is_block_boundary("* item")); // unordered list
3360        assert!(is_block_boundary("+ item")); // unordered list
3361        assert!(is_block_boundary("1. item")); // ordered list
3362        assert!(is_block_boundary("10. item")); // ordered list
3363        assert!(is_block_boundary(": definition")); // definition list
3364        assert!(is_block_boundary(":::")); // div marker
3365        assert!(is_block_boundary("::::: {.callout-note}")); // div marker with attrs
3366
3367        // NOT block boundaries (paragraph continuation)
3368        assert!(!is_block_boundary("regular text"));
3369        assert!(!is_block_boundary("*emphasis*")); // emphasis, not list
3370        assert!(!is_block_boundary("[link](url)")); // inline link, not reference def
3371        assert!(!is_block_boundary("some words here"));
3372    }
3373
3374    #[test]
3375    fn test_definition_list_boundary_in_single_line_paragraph() {
3376        // Verifies that a definition list item after a single-line paragraph
3377        // is treated as a block boundary, not merged into the paragraph
3378        let options = ReflowOptions {
3379            line_length: 80,
3380            ..Default::default()
3381        };
3382        let input = "Term\n: Definition of the term";
3383        let result = reflow_markdown(input, &options);
3384        // The definition list marker should remain on its own line
3385        assert!(
3386            result.contains(": Definition"),
3387            "Definition list item should not be merged into previous line. Got: {result:?}"
3388        );
3389        let lines: Vec<&str> = result.lines().collect();
3390        assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
3391        assert_eq!(lines[0], "Term");
3392        assert_eq!(lines[1], ": Definition of the term");
3393    }
3394
3395    #[test]
3396    fn test_is_paragraph_boundary() {
3397        // Core block boundary checks are inherited
3398        assert!(is_paragraph_boundary("# Heading", "# Heading"));
3399        assert!(is_paragraph_boundary("- item", "- item"));
3400        assert!(is_paragraph_boundary(":::", ":::"));
3401        assert!(is_paragraph_boundary(": definition", ": definition"));
3402
3403        // Indented code blocks (≥4 spaces or tab)
3404        assert!(is_paragraph_boundary("code", "    code"));
3405        assert!(is_paragraph_boundary("code", "\tcode"));
3406
3407        // Table rows via is_potential_table_row
3408        assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
3409        assert!(is_paragraph_boundary("a | b", "a | b")); // pipe-delimited without leading pipe
3410
3411        // Not paragraph boundaries
3412        assert!(!is_paragraph_boundary("regular text", "regular text"));
3413        assert!(!is_paragraph_boundary("text", "  text")); // 2-space indent is not code
3414    }
3415
3416    #[test]
3417    fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
3418        // Verifies that div markers (:::) are treated as paragraph boundaries
3419        // in reflow_paragraph_at_line, preventing reflow across div boundaries
3420        let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
3421        // Line 3 is the div marker — should not be reflowed
3422        let result = reflow_paragraph_at_line(content, 3, 80);
3423        assert!(result.is_none(), "Div marker line should not be reflowed");
3424    }
3425}
rumdl_lib/utils/text_reflow.rs

rumdl_lib/utils/
text_reflow.rs