Skip to main content

rumdl_lib/utils/
text_reflow.rs

1//! Text reflow utilities for MD013
2//!
3//! This module implements text wrapping/reflow functionality that preserves
4//! Markdown elements like links, emphasis, code spans, etc.
5
6use crate::utils::calculate_indentation_width_default;
7use crate::utils::is_definition_list_item;
8use crate::utils::mkdocs_attr_list::{ATTR_LIST_PATTERN, is_standalone_attr_list};
9use crate::utils::mkdocs_snippets::is_snippet_block_delimiter;
10use crate::utils::regex_cache::{
11    DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
12    HUGO_SHORTCODE_REGEX, INLINE_IMAGE_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, LINKED_IMAGE_INLINE_INLINE,
13    LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF, REF_IMAGE_REGEX, REF_LINK_REGEX,
14    SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
15};
16use crate::utils::sentence_utils::{
17    get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
18    text_ends_with_abbreviation,
19};
20use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
21use std::collections::HashSet;
22use unicode_width::UnicodeWidthStr;
23
24/// Length calculation mode for reflow
25#[derive(Clone, Copy, Debug, Default, PartialEq)]
26pub enum ReflowLengthMode {
27    /// Count Unicode characters (grapheme clusters)
28    Chars,
29    /// Count visual display width (CJK = 2 columns, emoji = 2, etc.)
30    #[default]
31    Visual,
32    /// Count raw bytes
33    Bytes,
34}
35
36/// Calculate the display length of a string based on the length mode
37fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
38    match mode {
39        ReflowLengthMode::Chars => s.chars().count(),
40        ReflowLengthMode::Visual => s.width(),
41        ReflowLengthMode::Bytes => s.len(),
42    }
43}
44
45/// Options for reflowing text
46#[derive(Clone)]
47pub struct ReflowOptions {
48    /// Target line length
49    pub line_length: usize,
50    /// Whether to break on sentence boundaries when possible
51    pub break_on_sentences: bool,
52    /// Whether to preserve existing line breaks in paragraphs
53    pub preserve_breaks: bool,
54    /// Whether to enforce one sentence per line
55    pub sentence_per_line: bool,
56    /// Whether to use semantic line breaks (cascading split strategy)
57    pub semantic_line_breaks: bool,
58    /// Custom abbreviations for sentence detection
59    /// Periods are optional - both "Dr" and "Dr." work the same
60    /// Custom abbreviations are always added to the built-in defaults
61    pub abbreviations: Option<Vec<String>>,
62    /// How to measure string length for line-length comparisons
63    pub length_mode: ReflowLengthMode,
64    /// Whether to treat {#id .class key="value"} as atomic (unsplittable) elements.
65    /// Enabled for MkDocs and Kramdown flavors.
66    pub attr_lists: bool,
67    /// Whether to require uppercase after periods for sentence detection.
68    /// When true (default), only "word. Capital" is a sentence boundary.
69    /// When false, "word. lowercase" is also treated as a sentence boundary.
70    /// Does not affect ! and ? which are always treated as sentence boundaries.
71    pub require_sentence_capital: bool,
72    /// Cap list continuation indent to this value when set.
73    /// Used by mkdocs flavor where continuation is always 4 spaces
74    /// regardless of checkbox markers.
75    pub max_list_continuation_indent: Option<usize>,
76}
77
78impl Default for ReflowOptions {
79    fn default() -> Self {
80        Self {
81            line_length: 80,
82            break_on_sentences: true,
83            preserve_breaks: false,
84            sentence_per_line: false,
85            semantic_line_breaks: false,
86            abbreviations: None,
87            length_mode: ReflowLengthMode::default(),
88            attr_lists: false,
89            require_sentence_capital: true,
90            max_list_continuation_indent: None,
91        }
92    }
93}
94
95/// Build a boolean mask indicating which character positions are inside inline code spans.
96/// Handles single, double, and triple backtick delimiters.
97fn compute_inline_code_mask(text: &str) -> Vec<bool> {
98    let chars: Vec<char> = text.chars().collect();
99    let len = chars.len();
100    let mut mask = vec![false; len];
101    let mut i = 0;
102
103    while i < len {
104        if chars[i] == '`' {
105            // Count opening backticks
106            let open_start = i;
107            let mut backtick_count = 0;
108            while i < len && chars[i] == '`' {
109                backtick_count += 1;
110                i += 1;
111            }
112
113            // Find matching closing backticks (same count)
114            let mut found_close = false;
115            let content_start = i;
116            while i < len {
117                if chars[i] == '`' {
118                    let close_start = i;
119                    let mut close_count = 0;
120                    while i < len && chars[i] == '`' {
121                        close_count += 1;
122                        i += 1;
123                    }
124                    if close_count == backtick_count {
125                        // Mark the content between the delimiters (not the backticks themselves)
126                        for item in mask.iter_mut().take(close_start).skip(content_start) {
127                            *item = true;
128                        }
129                        // Also mark the opening and closing backticks
130                        for item in mask.iter_mut().take(content_start).skip(open_start) {
131                            *item = true;
132                        }
133                        for item in mask.iter_mut().take(i).skip(close_start) {
134                            *item = true;
135                        }
136                        found_close = true;
137                        break;
138                    }
139                } else {
140                    i += 1;
141                }
142            }
143
144            if !found_close {
145                // No matching close — backticks are literal, not code span
146                i = open_start + backtick_count;
147            }
148        } else {
149            i += 1;
150        }
151    }
152
153    mask
154}
155
156/// Detect if a character position is a sentence boundary
157/// Based on the approach from github.com/JoshuaKGoldberg/sentences-per-line
158/// Supports both ASCII punctuation (. ! ?) and CJK punctuation (。 ! ?)
159fn is_sentence_boundary(
160    text: &str,
161    pos: usize,
162    abbreviations: &HashSet<String>,
163    require_sentence_capital: bool,
164) -> bool {
165    let chars: Vec<char> = text.chars().collect();
166
167    if pos + 1 >= chars.len() {
168        return false;
169    }
170
171    let c = chars[pos];
172    let next_char = chars[pos + 1];
173
174    // Check for CJK sentence-ending punctuation (。, !, ?)
175    // CJK punctuation doesn't require space or uppercase after it
176    if is_cjk_sentence_ending(c) {
177        // Skip any trailing emphasis/strikethrough markers
178        let mut after_punct_pos = pos + 1;
179        while after_punct_pos < chars.len()
180            && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
181        {
182            after_punct_pos += 1;
183        }
184
185        // Skip whitespace
186        while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
187            after_punct_pos += 1;
188        }
189
190        // Check if we have more content (any non-whitespace)
191        if after_punct_pos >= chars.len() {
192            return false;
193        }
194
195        // Skip leading emphasis/strikethrough markers
196        while after_punct_pos < chars.len()
197            && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
198        {
199            after_punct_pos += 1;
200        }
201
202        if after_punct_pos >= chars.len() {
203            return false;
204        }
205
206        // For CJK, we accept any character as the start of the next sentence
207        // (no uppercase requirement, since CJK doesn't have case)
208        return true;
209    }
210
211    // Check for ASCII sentence-ending punctuation
212    if c != '.' && c != '!' && c != '?' {
213        return false;
214    }
215
216    // Must be followed by space, closing quote, or emphasis/strikethrough marker followed by space
217    let (_space_pos, after_space_pos) = if next_char == ' ' {
218        // Normal case: punctuation followed by space
219        (pos + 1, pos + 2)
220    } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
221        // Sentence ends with quote - check what follows the quote
222        if chars[pos + 2] == ' ' {
223            // Just quote followed by space: 'sentence." '
224            (pos + 2, pos + 3)
225        } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
226            // Quote followed by emphasis: 'sentence."* '
227            (pos + 3, pos + 4)
228        } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
229            && pos + 4 < chars.len()
230            && chars[pos + 3] == chars[pos + 2]
231            && chars[pos + 4] == ' '
232        {
233            // Quote followed by bold: 'sentence."** '
234            (pos + 4, pos + 5)
235        } else {
236            return false;
237        }
238    } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
239        // Sentence ends with emphasis: "sentence.* " or "sentence._ "
240        (pos + 2, pos + 3)
241    } else if (next_char == '*' || next_char == '_')
242        && pos + 3 < chars.len()
243        && chars[pos + 2] == next_char
244        && chars[pos + 3] == ' '
245    {
246        // Sentence ends with bold: "sentence.** " or "sentence.__ "
247        (pos + 3, pos + 4)
248    } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
249        // Sentence ends with strikethrough: "sentence.~~ "
250        (pos + 3, pos + 4)
251    } else {
252        return false;
253    };
254
255    // Skip all whitespace after the space to find the start of the next sentence
256    let mut next_char_pos = after_space_pos;
257    while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
258        next_char_pos += 1;
259    }
260
261    // Check if we reached the end of the string
262    if next_char_pos >= chars.len() {
263        return false;
264    }
265
266    // Skip leading emphasis/strikethrough markers and opening quotes to find the actual first letter
267    let mut first_letter_pos = next_char_pos;
268    while first_letter_pos < chars.len()
269        && (chars[first_letter_pos] == '*'
270            || chars[first_letter_pos] == '_'
271            || chars[first_letter_pos] == '~'
272            || is_opening_quote(chars[first_letter_pos]))
273    {
274        first_letter_pos += 1;
275    }
276
277    // Check if we reached the end after skipping emphasis
278    if first_letter_pos >= chars.len() {
279        return false;
280    }
281
282    let first_char = chars[first_letter_pos];
283
284    // For ! and ?, sentence boundaries are unambiguous — no uppercase requirement
285    if c == '!' || c == '?' {
286        return true;
287    }
288
289    // Period-specific checks: periods are ambiguous (abbreviations, decimals, initials)
290    // so we apply additional guards before accepting a sentence boundary.
291
292    if pos > 0 {
293        // Check for common abbreviations
294        let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
295        if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
296            return false;
297        }
298
299        // Check for decimal numbers (e.g., "3.14 is pi")
300        if chars[pos - 1].is_numeric() && first_char.is_ascii_digit() {
301            return false;
302        }
303
304        // Check for single-letter initials (e.g., "J. K. Rowling")
305        // A single uppercase letter before the period preceded by whitespace or start
306        // is likely an initial, not a sentence ending.
307        if chars[pos - 1].is_ascii_uppercase() && (pos == 1 || (pos >= 2 && chars[pos - 2].is_whitespace())) {
308            return false;
309        }
310    }
311
312    // In strict mode, require uppercase or CJK to start the next sentence after a period.
313    // In relaxed mode, accept any alphanumeric character.
314    if require_sentence_capital && !first_char.is_uppercase() && !is_cjk_char(first_char) {
315        return false;
316    }
317
318    true
319}
320
321/// Split text into sentences
322pub fn split_into_sentences(text: &str) -> Vec<String> {
323    split_into_sentences_custom(text, &None)
324}
325
326/// Split text into sentences with custom abbreviations
327pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
328    let abbreviations = get_abbreviations(custom_abbreviations);
329    split_into_sentences_with_set(text, &abbreviations, true)
330}
331
332/// Internal function to split text into sentences with a pre-computed abbreviations set
333/// Use this when calling multiple times in a loop to avoid repeatedly computing the set
334fn split_into_sentences_with_set(
335    text: &str,
336    abbreviations: &HashSet<String>,
337    require_sentence_capital: bool,
338) -> Vec<String> {
339    // Pre-compute which character positions are inside inline code spans
340    let in_code = compute_inline_code_mask(text);
341
342    let mut sentences = Vec::new();
343    let mut current_sentence = String::new();
344    let mut chars = text.chars().peekable();
345    let mut pos = 0;
346
347    while let Some(c) = chars.next() {
348        current_sentence.push(c);
349
350        if !in_code[pos] && is_sentence_boundary(text, pos, abbreviations, require_sentence_capital) {
351            // Consume any trailing emphasis/strikethrough markers and quotes (they belong to the current sentence)
352            while let Some(&next) = chars.peek() {
353                if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
354                    current_sentence.push(chars.next().unwrap());
355                    pos += 1;
356                } else {
357                    break;
358                }
359            }
360
361            // Consume the space after the sentence
362            if chars.peek() == Some(&' ') {
363                chars.next();
364                pos += 1;
365            }
366
367            sentences.push(current_sentence.trim().to_string());
368            current_sentence.clear();
369        }
370
371        pos += 1;
372    }
373
374    // Add any remaining text as the last sentence
375    if !current_sentence.trim().is_empty() {
376        sentences.push(current_sentence.trim().to_string());
377    }
378    sentences
379}
380
381/// Check if a line is a horizontal rule (---, ___, ***)
382fn is_horizontal_rule(line: &str) -> bool {
383    if line.len() < 3 {
384        return false;
385    }
386
387    // Check if line consists only of -, _, or * characters (at least 3)
388    let chars: Vec<char> = line.chars().collect();
389    if chars.is_empty() {
390        return false;
391    }
392
393    let first_char = chars[0];
394    if first_char != '-' && first_char != '_' && first_char != '*' {
395        return false;
396    }
397
398    // All characters should be the same (allowing spaces between)
399    for c in &chars {
400        if *c != first_char && *c != ' ' {
401            return false;
402        }
403    }
404
405    // Count non-space characters
406    let non_space_count = chars.iter().filter(|c| **c != ' ').count();
407    non_space_count >= 3
408}
409
410/// Check if a line is a numbered list item (e.g., "1. ", "10. ")
411fn is_numbered_list_item(line: &str) -> bool {
412    let mut chars = line.chars();
413
414    // Must start with a digit
415    if !chars.next().is_some_and(|c| c.is_numeric()) {
416        return false;
417    }
418
419    // Can have more digits
420    while let Some(c) = chars.next() {
421        if c == '.' {
422            // After period, must have a space (consistent with list marker extraction)
423            // "2019." alone is NOT treated as a list item to avoid false positives
424            return chars.next() == Some(' ');
425        }
426        if !c.is_numeric() {
427            return false;
428        }
429    }
430
431    false
432}
433
434/// Check if a trimmed line is an unordered list item (-, *, + followed by space)
435fn is_unordered_list_marker(s: &str) -> bool {
436    matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
437        && !is_horizontal_rule(s)
438        && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
439}
440
441/// Shared structural checks for block boundary detection.
442/// Checks elements that only depend on the trimmed line content.
443fn is_block_boundary_core(trimmed: &str) -> bool {
444    trimmed.is_empty()
445        || trimmed.starts_with('#')
446        || trimmed.starts_with("```")
447        || trimmed.starts_with("~~~")
448        || trimmed.starts_with('>')
449        || (trimmed.starts_with('[') && trimmed.contains("]:"))
450        || is_horizontal_rule(trimmed)
451        || is_unordered_list_marker(trimmed)
452        || is_numbered_list_item(trimmed)
453        || is_definition_list_item(trimmed)
454        || trimmed.starts_with(":::")
455}
456
457/// Check if a trimmed line starts a new structural block element.
458/// Used for paragraph boundary detection in `reflow_markdown()`.
459fn is_block_boundary(trimmed: &str) -> bool {
460    is_block_boundary_core(trimmed) || trimmed.starts_with('|')
461}
462
463/// Check if a line starts a new structural block for paragraph boundary detection
464/// in `reflow_paragraph_at_line()`. Extends the core checks with indented code blocks
465/// (≥4 spaces) and table row detection via `is_potential_table_row`.
466fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
467    is_block_boundary_core(trimmed)
468        || calculate_indentation_width_default(line) >= 4
469        || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
470}
471
472/// Check if a line ends with a hard break (either two spaces or backslash)
473///
474/// CommonMark supports two formats for hard line breaks:
475/// 1. Two or more trailing spaces
476/// 2. A backslash at the end of the line
477fn has_hard_break(line: &str) -> bool {
478    let line = line.strip_suffix('\r').unwrap_or(line);
479    line.ends_with("  ") || line.ends_with('\\')
480}
481
482/// Check if text ends with sentence-terminating punctuation (. ! ?)
483fn ends_with_sentence_punct(text: &str) -> bool {
484    text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
485}
486
487/// Trim trailing whitespace while preserving hard breaks (two trailing spaces or backslash)
488///
489/// Hard breaks in Markdown can be indicated by:
490/// 1. Two trailing spaces before a newline (traditional)
491/// 2. A backslash at the end of the line (mdformat style)
492fn trim_preserving_hard_break(s: &str) -> String {
493    // Strip trailing \r from CRLF line endings first to handle Windows files
494    let s = s.strip_suffix('\r').unwrap_or(s);
495
496    // Check for backslash hard break (mdformat style)
497    if s.ends_with('\\') {
498        // Preserve the backslash exactly as-is
499        return s.to_string();
500    }
501
502    // Check if there are at least 2 trailing spaces (traditional hard break)
503    if s.ends_with("  ") {
504        // Find the position where non-space content ends
505        let content_end = s.trim_end().len();
506        if content_end == 0 {
507            // String is all whitespace
508            return String::new();
509        }
510        // Preserve exactly 2 trailing spaces for hard break
511        format!("{}  ", &s[..content_end])
512    } else {
513        // No hard break, just trim all trailing whitespace
514        s.trim_end().to_string()
515    }
516}
517
518/// Parse markdown elements using the appropriate parser based on options.
519fn parse_elements(text: &str, options: &ReflowOptions) -> Vec<Element> {
520    if options.attr_lists {
521        parse_markdown_elements_with_attr_lists(text)
522    } else {
523        parse_markdown_elements(text)
524    }
525}
526
527pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
528    // For sentence-per-line mode, always process regardless of length
529    if options.sentence_per_line {
530        let elements = parse_elements(line, options);
531        return reflow_elements_sentence_per_line(&elements, &options.abbreviations, options.require_sentence_capital);
532    }
533
534    // For semantic line breaks mode, use cascading split strategy
535    if options.semantic_line_breaks {
536        let elements = parse_elements(line, options);
537        return reflow_elements_semantic(&elements, options);
538    }
539
540    // Quick check: if line is already short enough or no wrapping requested, return as-is
541    // line_length = 0 means no wrapping (unlimited line length)
542    if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
543        return vec![line.to_string()];
544    }
545
546    // Parse the markdown to identify elements
547    let elements = parse_elements(line, options);
548
549    // Reflow the elements into lines
550    reflow_elements(&elements, options)
551}
552
553/// Image source in a linked image structure
554#[derive(Debug, Clone)]
555enum LinkedImageSource {
556    /// Inline image URL: ![alt](url)
557    Inline(String),
558    /// Reference image: ![alt][ref]
559    Reference(String),
560}
561
562/// Link target in a linked image structure
563#[derive(Debug, Clone)]
564enum LinkedImageTarget {
565    /// Inline link URL: ](url)
566    Inline(String),
567    /// Reference link: ][ref]
568    Reference(String),
569}
570
571/// Represents a piece of content in the markdown
572#[derive(Debug, Clone)]
573enum Element {
574    /// Plain text that can be wrapped
575    Text(String),
576    /// A complete markdown inline link [text](url)
577    Link { text: String, url: String },
578    /// A complete markdown reference link [text][ref]
579    ReferenceLink { text: String, reference: String },
580    /// A complete markdown empty reference link [text][]
581    EmptyReferenceLink { text: String },
582    /// A complete markdown shortcut reference link [ref]
583    ShortcutReference { reference: String },
584    /// A complete markdown inline image ![alt](url)
585    InlineImage { alt: String, url: String },
586    /// A complete markdown reference image ![alt][ref]
587    ReferenceImage { alt: String, reference: String },
588    /// A complete markdown empty reference image ![alt][]
589    EmptyReferenceImage { alt: String },
590    /// A clickable image badge in any of 4 forms:
591    /// - [![alt](img-url)](link-url)
592    /// - [![alt][img-ref]](link-url)
593    /// - [![alt](img-url)][link-ref]
594    /// - [![alt][img-ref]][link-ref]
595    LinkedImage {
596        alt: String,
597        img_source: LinkedImageSource,
598        link_target: LinkedImageTarget,
599    },
600    /// Footnote reference [^note]
601    FootnoteReference { note: String },
602    /// Strikethrough text ~~text~~
603    Strikethrough(String),
604    /// Wiki-style link [[wiki]] or [[wiki|text]]
605    WikiLink(String),
606    /// Inline math $math$
607    InlineMath(String),
608    /// Display math $$math$$
609    DisplayMath(String),
610    /// Emoji shortcode :emoji:
611    EmojiShortcode(String),
612    /// Autolink <https://...> or <mailto:...> or <user@domain.com>
613    Autolink(String),
614    /// HTML tag <tag> or </tag> or <tag/>
615    HtmlTag(String),
616    /// HTML entity &nbsp; or &#123;
617    HtmlEntity(String),
618    /// Hugo/Go template shortcode {{< ... >}} or {{% ... %}}
619    HugoShortcode(String),
620    /// MkDocs/kramdown attribute list {#id .class key="value"}
621    AttrList(String),
622    /// Inline code `code`
623    Code(String),
624    /// Bold text **text** or __text__
625    Bold {
626        content: String,
627        /// True if underscore markers (__), false for asterisks (**)
628        underscore: bool,
629    },
630    /// Italic text *text* or _text_
631    Italic {
632        content: String,
633        /// True if underscore marker (_), false for asterisk (*)
634        underscore: bool,
635    },
636}
637
638impl std::fmt::Display for Element {
639    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
640        match self {
641            Element::Text(s) => write!(f, "{s}"),
642            Element::Link { text, url } => write!(f, "[{text}]({url})"),
643            Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
644            Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
645            Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
646            Element::InlineImage { alt, url } => write!(f, "![{alt}]({url})"),
647            Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
648            Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
649            Element::LinkedImage {
650                alt,
651                img_source,
652                link_target,
653            } => {
654                // Build the image part: ![alt](url) or ![alt][ref]
655                let img_part = match img_source {
656                    LinkedImageSource::Inline(url) => format!("![{alt}]({url})"),
657                    LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
658                };
659                // Build the link part: (url) or [ref]
660                match link_target {
661                    LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
662                    LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
663                }
664            }
665            Element::FootnoteReference { note } => write!(f, "[^{note}]"),
666            Element::Strikethrough(s) => write!(f, "~~{s}~~"),
667            Element::WikiLink(s) => write!(f, "[[{s}]]"),
668            Element::InlineMath(s) => write!(f, "${s}$"),
669            Element::DisplayMath(s) => write!(f, "$${s}$$"),
670            Element::EmojiShortcode(s) => write!(f, ":{s}:"),
671            Element::Autolink(s) => write!(f, "{s}"),
672            Element::HtmlTag(s) => write!(f, "{s}"),
673            Element::HtmlEntity(s) => write!(f, "{s}"),
674            Element::HugoShortcode(s) => write!(f, "{s}"),
675            Element::AttrList(s) => write!(f, "{s}"),
676            Element::Code(s) => write!(f, "`{s}`"),
677            Element::Bold { content, underscore } => {
678                if *underscore {
679                    write!(f, "__{content}__")
680                } else {
681                    write!(f, "**{content}**")
682                }
683            }
684            Element::Italic { content, underscore } => {
685                if *underscore {
686                    write!(f, "_{content}_")
687                } else {
688                    write!(f, "*{content}*")
689                }
690            }
691        }
692    }
693}
694
695impl Element {
696    /// Calculate the display width of this element using the given length mode.
697    /// This formats the element and computes its width, correctly handling
698    /// visual width for CJK characters and other wide glyphs.
699    fn display_width(&self, mode: ReflowLengthMode) -> usize {
700        let formatted = format!("{self}");
701        display_len(&formatted, mode)
702    }
703}
704
705/// An emphasis or formatting span parsed by pulldown-cmark
706#[derive(Debug, Clone)]
707struct EmphasisSpan {
708    /// Byte offset where the emphasis starts (including markers)
709    start: usize,
710    /// Byte offset where the emphasis ends (after closing markers)
711    end: usize,
712    /// The content inside the emphasis markers
713    content: String,
714    /// Whether this is strong (bold) emphasis
715    is_strong: bool,
716    /// Whether this is strikethrough (~~text~~)
717    is_strikethrough: bool,
718    /// Whether the original used underscore markers (for emphasis only)
719    uses_underscore: bool,
720}
721
722/// Extract emphasis and strikethrough spans from text using pulldown-cmark
723///
724/// This provides CommonMark-compliant emphasis parsing, correctly handling:
725/// - Nested emphasis like `*text **bold** more*`
726/// - Left/right flanking delimiter rules
727/// - Underscore vs asterisk markers
728/// - GFM strikethrough (~~text~~)
729///
730/// Returns spans sorted by start position.
731fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
732    let mut spans = Vec::new();
733    let mut options = Options::empty();
734    options.insert(Options::ENABLE_STRIKETHROUGH);
735
736    // Stacks to track nested formatting with their start positions
737    let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); // (start_byte, uses_underscore)
738    let mut strong_stack: Vec<(usize, bool)> = Vec::new();
739    let mut strikethrough_stack: Vec<usize> = Vec::new();
740
741    let parser = Parser::new_ext(text, options).into_offset_iter();
742
743    for (event, range) in parser {
744        match event {
745            Event::Start(Tag::Emphasis) => {
746                // Check if this uses underscore by looking at the original text
747                let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
748                emphasis_stack.push((range.start, uses_underscore));
749            }
750            Event::End(TagEnd::Emphasis) => {
751                if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
752                    // Extract content between the markers (1 char marker on each side)
753                    let content_start = start_byte + 1;
754                    let content_end = range.end - 1;
755                    if content_end > content_start
756                        && let Some(content) = text.get(content_start..content_end)
757                    {
758                        spans.push(EmphasisSpan {
759                            start: start_byte,
760                            end: range.end,
761                            content: content.to_string(),
762                            is_strong: false,
763                            is_strikethrough: false,
764                            uses_underscore,
765                        });
766                    }
767                }
768            }
769            Event::Start(Tag::Strong) => {
770                // Check if this uses underscore by looking at the original text
771                let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
772                strong_stack.push((range.start, uses_underscore));
773            }
774            Event::End(TagEnd::Strong) => {
775                if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
776                    // Extract content between the markers (2 char marker on each side)
777                    let content_start = start_byte + 2;
778                    let content_end = range.end - 2;
779                    if content_end > content_start
780                        && let Some(content) = text.get(content_start..content_end)
781                    {
782                        spans.push(EmphasisSpan {
783                            start: start_byte,
784                            end: range.end,
785                            content: content.to_string(),
786                            is_strong: true,
787                            is_strikethrough: false,
788                            uses_underscore,
789                        });
790                    }
791                }
792            }
793            Event::Start(Tag::Strikethrough) => {
794                strikethrough_stack.push(range.start);
795            }
796            Event::End(TagEnd::Strikethrough) => {
797                if let Some(start_byte) = strikethrough_stack.pop() {
798                    // Extract content between the ~~ markers (2 char marker on each side)
799                    let content_start = start_byte + 2;
800                    let content_end = range.end - 2;
801                    if content_end > content_start
802                        && let Some(content) = text.get(content_start..content_end)
803                    {
804                        spans.push(EmphasisSpan {
805                            start: start_byte,
806                            end: range.end,
807                            content: content.to_string(),
808                            is_strong: false,
809                            is_strikethrough: true,
810                            uses_underscore: false,
811                        });
812                    }
813                }
814            }
815            _ => {}
816        }
817    }
818
819    // Sort by start position
820    spans.sort_by_key(|s| s.start);
821    spans
822}
823
824/// Parse markdown elements from text preserving the raw syntax
825///
826/// Detection order is critical:
827/// 1. Linked images [![alt](img)](link) - must be detected first as atomic units
828/// 2. Inline images ![alt](url) - before links to handle ! prefix
829/// 3. Reference images ![alt][ref] - before reference links
830/// 4. Inline links [text](url) - before reference links
831/// 5. Reference links [text][ref] - before shortcut references
832/// 6. Shortcut reference links [ref] - detected last to avoid false positives
833/// 7. Other elements (code, bold, italic, etc.) - processed normally
834fn parse_markdown_elements(text: &str) -> Vec<Element> {
835    parse_markdown_elements_inner(text, false)
836}
837
838fn parse_markdown_elements_with_attr_lists(text: &str) -> Vec<Element> {
839    parse_markdown_elements_inner(text, true)
840}
841
842fn parse_markdown_elements_inner(text: &str, attr_lists: bool) -> Vec<Element> {
843    let mut elements = Vec::new();
844    let mut remaining = text;
845
846    // Pre-extract emphasis spans using pulldown-cmark for CommonMark-compliant parsing
847    let emphasis_spans = extract_emphasis_spans(text);
848
849    while !remaining.is_empty() {
850        // Calculate current byte offset in original text
851        let current_offset = text.len() - remaining.len();
852        // Find the earliest occurrence of any markdown pattern
853        // Store (start, end, pattern_name) to unify standard Regex and FancyRegex match results
854        let mut earliest_match: Option<(usize, usize, &str)> = None;
855
856        // Check for linked images FIRST (all 4 variants)
857        // Quick literal check: only run expensive regexes if we might have a linked image
858        // Pattern starts with "[!" so check for that first
859        if remaining.contains("[!") {
860            // Pattern 1: [![alt](img)](link) - inline image in inline link
861            if let Some(m) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
862                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
863            {
864                earliest_match = Some((m.start(), m.end(), "linked_image_ii"));
865            }
866
867            // Pattern 2: [![alt][ref]](link) - reference image in inline link
868            if let Some(m) = LINKED_IMAGE_REF_INLINE.find(remaining)
869                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
870            {
871                earliest_match = Some((m.start(), m.end(), "linked_image_ri"));
872            }
873
874            // Pattern 3: [![alt](img)][ref] - inline image in reference link
875            if let Some(m) = LINKED_IMAGE_INLINE_REF.find(remaining)
876                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
877            {
878                earliest_match = Some((m.start(), m.end(), "linked_image_ir"));
879            }
880
881            // Pattern 4: [![alt][ref]][ref] - reference image in reference link
882            if let Some(m) = LINKED_IMAGE_REF_REF.find(remaining)
883                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
884            {
885                earliest_match = Some((m.start(), m.end(), "linked_image_rr"));
886            }
887        }
888
889        // Check for images (they start with ! so should be detected before links)
890        // Inline images - ![alt](url)
891        if let Some(m) = INLINE_IMAGE_REGEX.find(remaining)
892            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
893        {
894            earliest_match = Some((m.start(), m.end(), "inline_image"));
895        }
896
897        // Reference images - ![alt][ref]
898        if let Some(m) = REF_IMAGE_REGEX.find(remaining)
899            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
900        {
901            earliest_match = Some((m.start(), m.end(), "ref_image"));
902        }
903
904        // Check for footnote references - [^note]
905        if let Some(m) = FOOTNOTE_REF_REGEX.find(remaining)
906            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
907        {
908            earliest_match = Some((m.start(), m.end(), "footnote_ref"));
909        }
910
911        // Check for inline links - [text](url)
912        if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
913            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
914        {
915            earliest_match = Some((m.start(), m.end(), "inline_link"));
916        }
917
918        // Check for reference links - [text][ref]
919        if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
920            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
921        {
922            earliest_match = Some((m.start(), m.end(), "ref_link"));
923        }
924
925        // Check for shortcut reference links - [ref]
926        // Only check if we haven't found an earlier pattern that would conflict
927        if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
928            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
929        {
930            earliest_match = Some((m.start(), m.end(), "shortcut_ref"));
931        }
932
933        // Check for wiki-style links - [[wiki]]
934        if let Some(m) = WIKI_LINK_REGEX.find(remaining)
935            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
936        {
937            earliest_match = Some((m.start(), m.end(), "wiki_link"));
938        }
939
940        // Check for display math first (before inline) - $$math$$
941        if let Some(m) = DISPLAY_MATH_REGEX.find(remaining)
942            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
943        {
944            earliest_match = Some((m.start(), m.end(), "display_math"));
945        }
946
947        // Check for inline math - $math$
948        if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
949            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
950        {
951            earliest_match = Some((m.start(), m.end(), "inline_math"));
952        }
953
954        // Note: Strikethrough is now handled by pulldown-cmark in extract_emphasis_spans
955
956        // Check for emoji shortcodes - :emoji:
957        if let Some(m) = EMOJI_SHORTCODE_REGEX.find(remaining)
958            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
959        {
960            earliest_match = Some((m.start(), m.end(), "emoji"));
961        }
962
963        // Check for HTML entities - &nbsp; etc
964        if let Some(m) = HTML_ENTITY_REGEX.find(remaining)
965            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
966        {
967            earliest_match = Some((m.start(), m.end(), "html_entity"));
968        }
969
970        // Check for Hugo shortcodes - {{< ... >}} or {{% ... %}}
971        // Must be checked before other patterns to avoid false sentence breaks
972        if let Some(m) = HUGO_SHORTCODE_REGEX.find(remaining)
973            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
974        {
975            earliest_match = Some((m.start(), m.end(), "hugo_shortcode"));
976        }
977
978        // Check for HTML tags - <tag> </tag> <tag/>
979        // But exclude autolinks like <https://...> or <mailto:...> or email autolinks <user@domain.com>
980        if let Some(m) = HTML_TAG_PATTERN.find(remaining)
981            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
982        {
983            // Check if this is an autolink (starts with protocol or mailto:)
984            let matched_text = &remaining[m.start()..m.end()];
985            let is_url_autolink = matched_text.starts_with("<http://")
986                || matched_text.starts_with("<https://")
987                || matched_text.starts_with("<mailto:")
988                || matched_text.starts_with("<ftp://")
989                || matched_text.starts_with("<ftps://");
990
991            // Check if this is an email autolink (per CommonMark spec: <local@domain.tld>)
992            // Use centralized EMAIL_PATTERN for consistency with MD034 and other rules
993            let is_email_autolink = {
994                let content = matched_text.trim_start_matches('<').trim_end_matches('>');
995                EMAIL_PATTERN.is_match(content)
996            };
997
998            if is_url_autolink || is_email_autolink {
999                earliest_match = Some((m.start(), m.end(), "autolink"));
1000            } else {
1001                earliest_match = Some((m.start(), m.end(), "html_tag"));
1002            }
1003        }
1004
1005        // Find earliest non-link special characters
1006        let mut next_special = remaining.len();
1007        let mut special_type = "";
1008        let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
1009        let mut attr_list_len: usize = 0;
1010
1011        // Check for code spans (not handled by pulldown-cmark in this context)
1012        if let Some(pos) = remaining.find('`')
1013            && pos < next_special
1014        {
1015            next_special = pos;
1016            special_type = "code";
1017        }
1018
1019        // Check for MkDocs/kramdown attr lists - {#id .class key="value"}
1020        if attr_lists
1021            && let Some(pos) = remaining.find('{')
1022            && pos < next_special
1023            && let Some(m) = ATTR_LIST_PATTERN.find(&remaining[pos..])
1024            && m.start() == 0
1025        {
1026            next_special = pos;
1027            special_type = "attr_list";
1028            attr_list_len = m.end();
1029        }
1030
1031        // Check for emphasis using pulldown-cmark's pre-extracted spans
1032        // Find the earliest emphasis span that starts within remaining text
1033        for span in &emphasis_spans {
1034            if span.start >= current_offset && span.start < current_offset + remaining.len() {
1035                let pos_in_remaining = span.start - current_offset;
1036                if pos_in_remaining < next_special {
1037                    next_special = pos_in_remaining;
1038                    special_type = "pulldown_emphasis";
1039                    pulldown_emphasis = Some(span);
1040                }
1041                break; // Spans are sorted by start position, so first match is earliest
1042            }
1043        }
1044
1045        // Determine which pattern to process first
1046        let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
1047            pos < next_special
1048        } else {
1049            false
1050        };
1051
1052        if should_process_markdown_link {
1053            let (pos, match_end, pattern_type) = earliest_match.unwrap();
1054
1055            // Add any text before the match
1056            if pos > 0 {
1057                elements.push(Element::Text(remaining[..pos].to_string()));
1058            }
1059
1060            // Process the matched pattern
1061            match pattern_type {
1062                // Pattern 1: [![alt](img)](link) - inline image in inline link
1063                "linked_image_ii" => {
1064                    if let Some(caps) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
1065                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1066                        let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1067                        let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1068                        elements.push(Element::LinkedImage {
1069                            alt: alt.to_string(),
1070                            img_source: LinkedImageSource::Inline(img_url.to_string()),
1071                            link_target: LinkedImageTarget::Inline(link_url.to_string()),
1072                        });
1073                        remaining = &remaining[match_end..];
1074                    } else {
1075                        elements.push(Element::Text("[".to_string()));
1076                        remaining = &remaining[1..];
1077                    }
1078                }
1079                // Pattern 2: [![alt][ref]](link) - reference image in inline link
1080                "linked_image_ri" => {
1081                    if let Some(caps) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
1082                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1083                        let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1084                        let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1085                        elements.push(Element::LinkedImage {
1086                            alt: alt.to_string(),
1087                            img_source: LinkedImageSource::Reference(img_ref.to_string()),
1088                            link_target: LinkedImageTarget::Inline(link_url.to_string()),
1089                        });
1090                        remaining = &remaining[match_end..];
1091                    } else {
1092                        elements.push(Element::Text("[".to_string()));
1093                        remaining = &remaining[1..];
1094                    }
1095                }
1096                // Pattern 3: [![alt](img)][ref] - inline image in reference link
1097                "linked_image_ir" => {
1098                    if let Some(caps) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
1099                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1100                        let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1101                        let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1102                        elements.push(Element::LinkedImage {
1103                            alt: alt.to_string(),
1104                            img_source: LinkedImageSource::Inline(img_url.to_string()),
1105                            link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1106                        });
1107                        remaining = &remaining[match_end..];
1108                    } else {
1109                        elements.push(Element::Text("[".to_string()));
1110                        remaining = &remaining[1..];
1111                    }
1112                }
1113                // Pattern 4: [![alt][ref]][ref] - reference image in reference link
1114                "linked_image_rr" => {
1115                    if let Some(caps) = LINKED_IMAGE_REF_REF.captures(remaining) {
1116                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1117                        let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1118                        let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
1119                        elements.push(Element::LinkedImage {
1120                            alt: alt.to_string(),
1121                            img_source: LinkedImageSource::Reference(img_ref.to_string()),
1122                            link_target: LinkedImageTarget::Reference(link_ref.to_string()),
1123                        });
1124                        remaining = &remaining[match_end..];
1125                    } else {
1126                        elements.push(Element::Text("[".to_string()));
1127                        remaining = &remaining[1..];
1128                    }
1129                }
1130                "inline_image" => {
1131                    if let Some(caps) = INLINE_IMAGE_REGEX.captures(remaining) {
1132                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1133                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1134                        elements.push(Element::InlineImage {
1135                            alt: alt.to_string(),
1136                            url: url.to_string(),
1137                        });
1138                        remaining = &remaining[match_end..];
1139                    } else {
1140                        elements.push(Element::Text("!".to_string()));
1141                        remaining = &remaining[1..];
1142                    }
1143                }
1144                "ref_image" => {
1145                    if let Some(caps) = REF_IMAGE_REGEX.captures(remaining) {
1146                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1147                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1148
1149                        if reference.is_empty() {
1150                            elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1151                        } else {
1152                            elements.push(Element::ReferenceImage {
1153                                alt: alt.to_string(),
1154                                reference: reference.to_string(),
1155                            });
1156                        }
1157                        remaining = &remaining[match_end..];
1158                    } else {
1159                        elements.push(Element::Text("!".to_string()));
1160                        remaining = &remaining[1..];
1161                    }
1162                }
1163                "footnote_ref" => {
1164                    if let Some(caps) = FOOTNOTE_REF_REGEX.captures(remaining) {
1165                        let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1166                        elements.push(Element::FootnoteReference { note: note.to_string() });
1167                        remaining = &remaining[match_end..];
1168                    } else {
1169                        elements.push(Element::Text("[".to_string()));
1170                        remaining = &remaining[1..];
1171                    }
1172                }
1173                "inline_link" => {
1174                    if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1175                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1176                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1177                        elements.push(Element::Link {
1178                            text: text.to_string(),
1179                            url: url.to_string(),
1180                        });
1181                        remaining = &remaining[match_end..];
1182                    } else {
1183                        // Fallback - shouldn't happen
1184                        elements.push(Element::Text("[".to_string()));
1185                        remaining = &remaining[1..];
1186                    }
1187                }
1188                "ref_link" => {
1189                    if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1190                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1191                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1192
1193                        if reference.is_empty() {
1194                            // Empty reference link [text][]
1195                            elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1196                        } else {
1197                            // Regular reference link [text][ref]
1198                            elements.push(Element::ReferenceLink {
1199                                text: text.to_string(),
1200                                reference: reference.to_string(),
1201                            });
1202                        }
1203                        remaining = &remaining[match_end..];
1204                    } else {
1205                        // Fallback - shouldn't happen
1206                        elements.push(Element::Text("[".to_string()));
1207                        remaining = &remaining[1..];
1208                    }
1209                }
1210                "shortcut_ref" => {
1211                    if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1212                        let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1213                        elements.push(Element::ShortcutReference {
1214                            reference: reference.to_string(),
1215                        });
1216                        remaining = &remaining[match_end..];
1217                    } else {
1218                        // Fallback - shouldn't happen
1219                        elements.push(Element::Text("[".to_string()));
1220                        remaining = &remaining[1..];
1221                    }
1222                }
1223                "wiki_link" => {
1224                    if let Some(caps) = WIKI_LINK_REGEX.captures(remaining) {
1225                        let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1226                        elements.push(Element::WikiLink(content.to_string()));
1227                        remaining = &remaining[match_end..];
1228                    } else {
1229                        elements.push(Element::Text("[[".to_string()));
1230                        remaining = &remaining[2..];
1231                    }
1232                }
1233                "display_math" => {
1234                    if let Some(caps) = DISPLAY_MATH_REGEX.captures(remaining) {
1235                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1236                        elements.push(Element::DisplayMath(math.to_string()));
1237                        remaining = &remaining[match_end..];
1238                    } else {
1239                        elements.push(Element::Text("$$".to_string()));
1240                        remaining = &remaining[2..];
1241                    }
1242                }
1243                "inline_math" => {
1244                    if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1245                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1246                        elements.push(Element::InlineMath(math.to_string()));
1247                        remaining = &remaining[match_end..];
1248                    } else {
1249                        elements.push(Element::Text("$".to_string()));
1250                        remaining = &remaining[1..];
1251                    }
1252                }
1253                // Note: "strikethrough" case removed - now handled by pulldown-cmark
1254                "emoji" => {
1255                    if let Some(caps) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1256                        let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1257                        elements.push(Element::EmojiShortcode(emoji.to_string()));
1258                        remaining = &remaining[match_end..];
1259                    } else {
1260                        elements.push(Element::Text(":".to_string()));
1261                        remaining = &remaining[1..];
1262                    }
1263                }
1264                "html_entity" => {
1265                    // HTML entities are captured whole
1266                    elements.push(Element::HtmlEntity(remaining[pos..match_end].to_string()));
1267                    remaining = &remaining[match_end..];
1268                }
1269                "hugo_shortcode" => {
1270                    // Hugo shortcodes are atomic elements - preserve them exactly
1271                    elements.push(Element::HugoShortcode(remaining[pos..match_end].to_string()));
1272                    remaining = &remaining[match_end..];
1273                }
1274                "autolink" => {
1275                    // Autolinks are atomic elements - preserve them exactly
1276                    elements.push(Element::Autolink(remaining[pos..match_end].to_string()));
1277                    remaining = &remaining[match_end..];
1278                }
1279                "html_tag" => {
1280                    // HTML tags are captured whole
1281                    elements.push(Element::HtmlTag(remaining[pos..match_end].to_string()));
1282                    remaining = &remaining[match_end..];
1283                }
1284                _ => {
1285                    // Unknown pattern, treat as text
1286                    elements.push(Element::Text("[".to_string()));
1287                    remaining = &remaining[1..];
1288                }
1289            }
1290        } else {
1291            // Process non-link special characters
1292
1293            // Add any text before the special character
1294            if next_special > 0 && next_special < remaining.len() {
1295                elements.push(Element::Text(remaining[..next_special].to_string()));
1296                remaining = &remaining[next_special..];
1297            }
1298
1299            // Process the special element
1300            match special_type {
1301                "code" => {
1302                    // Find end of code
1303                    if let Some(code_end) = remaining[1..].find('`') {
1304                        let code = &remaining[1..1 + code_end];
1305                        elements.push(Element::Code(code.to_string()));
1306                        remaining = &remaining[1 + code_end + 1..];
1307                    } else {
1308                        // No closing backtick, treat as text
1309                        elements.push(Element::Text(remaining.to_string()));
1310                        break;
1311                    }
1312                }
1313                "attr_list" => {
1314                    elements.push(Element::AttrList(remaining[..attr_list_len].to_string()));
1315                    remaining = &remaining[attr_list_len..];
1316                }
1317                "pulldown_emphasis" => {
1318                    // Use pre-extracted emphasis/strikethrough span from pulldown-cmark
1319                    if let Some(span) = pulldown_emphasis {
1320                        let span_len = span.end - span.start;
1321                        if span.is_strikethrough {
1322                            elements.push(Element::Strikethrough(span.content.clone()));
1323                        } else if span.is_strong {
1324                            elements.push(Element::Bold {
1325                                content: span.content.clone(),
1326                                underscore: span.uses_underscore,
1327                            });
1328                        } else {
1329                            elements.push(Element::Italic {
1330                                content: span.content.clone(),
1331                                underscore: span.uses_underscore,
1332                            });
1333                        }
1334                        remaining = &remaining[span_len..];
1335                    } else {
1336                        // Fallback - shouldn't happen
1337                        elements.push(Element::Text(remaining[..1].to_string()));
1338                        remaining = &remaining[1..];
1339                    }
1340                }
1341                _ => {
1342                    // No special elements found, add all remaining text
1343                    elements.push(Element::Text(remaining.to_string()));
1344                    break;
1345                }
1346            }
1347        }
1348    }
1349
1350    elements
1351}
1352
1353/// Reflow elements for sentence-per-line mode
1354fn reflow_elements_sentence_per_line(
1355    elements: &[Element],
1356    custom_abbreviations: &Option<Vec<String>>,
1357    require_sentence_capital: bool,
1358) -> Vec<String> {
1359    let abbreviations = get_abbreviations(custom_abbreviations);
1360    let mut lines = Vec::new();
1361    let mut current_line = String::new();
1362
1363    for (idx, element) in elements.iter().enumerate() {
1364        let element_str = format!("{element}");
1365
1366        // For text elements, split into sentences
1367        if let Element::Text(text) = element {
1368            // Simply append text - it already has correct spacing from tokenization
1369            let combined = format!("{current_line}{text}");
1370            // Use the pre-computed abbreviations set to avoid redundant computation
1371            let sentences = split_into_sentences_with_set(&combined, &abbreviations, require_sentence_capital);
1372
1373            if sentences.len() > 1 {
1374                // We found sentence boundaries
1375                for (i, sentence) in sentences.iter().enumerate() {
1376                    if i == 0 {
1377                        // First sentence might continue from previous elements
1378                        // But check if it ends with an abbreviation
1379                        let trimmed = sentence.trim();
1380
1381                        if text_ends_with_abbreviation(trimmed, &abbreviations) {
1382                            // Don't emit yet - this sentence ends with abbreviation, continue accumulating
1383                            current_line = sentence.to_string();
1384                        } else {
1385                            // Normal case - emit the first sentence
1386                            lines.push(sentence.to_string());
1387                            current_line.clear();
1388                        }
1389                    } else if i == sentences.len() - 1 {
1390                        // Last sentence: check if it's complete or incomplete
1391                        let trimmed = sentence.trim();
1392                        let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1393
1394                        if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1395                            // Complete sentence - emit it immediately
1396                            lines.push(sentence.to_string());
1397                            current_line.clear();
1398                        } else {
1399                            // Incomplete sentence - save for next iteration
1400                            current_line = sentence.to_string();
1401                        }
1402                    } else {
1403                        // Complete sentences in the middle
1404                        lines.push(sentence.to_string());
1405                    }
1406                }
1407            } else {
1408                // Single sentence - check if it's complete
1409                let trimmed = combined.trim();
1410
1411                // If the combined result is only whitespace, don't accumulate it.
1412                // This prevents leading spaces on subsequent elements when lines
1413                // are joined with spaces during reflow iteration.
1414                if trimmed.is_empty() {
1415                    continue;
1416                }
1417
1418                let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1419
1420                if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1421                    // Complete single sentence - emit it
1422                    lines.push(trimmed.to_string());
1423                    current_line.clear();
1424                } else {
1425                    // Incomplete sentence - continue accumulating
1426                    current_line = combined;
1427                }
1428            }
1429        } else if let Element::Italic { content, underscore } = element {
1430            // Handle italic elements - may contain multiple sentences that need continuation
1431            let marker = if *underscore { "_" } else { "*" };
1432            handle_emphasis_sentence_split(
1433                content,
1434                marker,
1435                &abbreviations,
1436                require_sentence_capital,
1437                &mut current_line,
1438                &mut lines,
1439            );
1440        } else if let Element::Bold { content, underscore } = element {
1441            // Handle bold elements - may contain multiple sentences that need continuation
1442            let marker = if *underscore { "__" } else { "**" };
1443            handle_emphasis_sentence_split(
1444                content,
1445                marker,
1446                &abbreviations,
1447                require_sentence_capital,
1448                &mut current_line,
1449                &mut lines,
1450            );
1451        } else if let Element::Strikethrough(content) = element {
1452            // Handle strikethrough elements - may contain multiple sentences that need continuation
1453            handle_emphasis_sentence_split(
1454                content,
1455                "~~",
1456                &abbreviations,
1457                require_sentence_capital,
1458                &mut current_line,
1459                &mut lines,
1460            );
1461        } else {
1462            // Non-text, non-emphasis elements (Code, Links, etc.)
1463            // Check if this element is adjacent to the preceding text (no space between)
1464            let is_adjacent = if idx > 0 {
1465                match &elements[idx - 1] {
1466                    Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1467                    _ => true,
1468                }
1469            } else {
1470                false
1471            };
1472
1473            // Add space before element if needed, but not for adjacent elements
1474            if !is_adjacent
1475                && !current_line.is_empty()
1476                && !current_line.ends_with(' ')
1477                && !current_line.ends_with('(')
1478                && !current_line.ends_with('[')
1479            {
1480                current_line.push(' ');
1481            }
1482            current_line.push_str(&element_str);
1483        }
1484    }
1485
1486    // Add any remaining content
1487    if !current_line.is_empty() {
1488        lines.push(current_line.trim().to_string());
1489    }
1490    lines
1491}
1492
1493/// Handle splitting emphasis content at sentence boundaries while preserving markers
1494fn handle_emphasis_sentence_split(
1495    content: &str,
1496    marker: &str,
1497    abbreviations: &HashSet<String>,
1498    require_sentence_capital: bool,
1499    current_line: &mut String,
1500    lines: &mut Vec<String>,
1501) {
1502    // Split the emphasis content into sentences
1503    let sentences = split_into_sentences_with_set(content, abbreviations, require_sentence_capital);
1504
1505    if sentences.len() <= 1 {
1506        // Single sentence or no boundaries - treat as atomic
1507        if !current_line.is_empty()
1508            && !current_line.ends_with(' ')
1509            && !current_line.ends_with('(')
1510            && !current_line.ends_with('[')
1511        {
1512            current_line.push(' ');
1513        }
1514        current_line.push_str(marker);
1515        current_line.push_str(content);
1516        current_line.push_str(marker);
1517
1518        // Check if the emphasis content ends with sentence punctuation - if so, emit
1519        let trimmed = content.trim();
1520        let ends_with_punct = ends_with_sentence_punct(trimmed);
1521        if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1522            lines.push(current_line.clone());
1523            current_line.clear();
1524        }
1525    } else {
1526        // Multiple sentences - each gets its own emphasis markers
1527        for (i, sentence) in sentences.iter().enumerate() {
1528            let trimmed = sentence.trim();
1529            if trimmed.is_empty() {
1530                continue;
1531            }
1532
1533            if i == 0 {
1534                // First sentence: combine with current_line and emit
1535                if !current_line.is_empty()
1536                    && !current_line.ends_with(' ')
1537                    && !current_line.ends_with('(')
1538                    && !current_line.ends_with('[')
1539                {
1540                    current_line.push(' ');
1541                }
1542                current_line.push_str(marker);
1543                current_line.push_str(trimmed);
1544                current_line.push_str(marker);
1545
1546                // Check if this is a complete sentence
1547                let ends_with_punct = ends_with_sentence_punct(trimmed);
1548                if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1549                    lines.push(current_line.clone());
1550                    current_line.clear();
1551                }
1552            } else if i == sentences.len() - 1 {
1553                // Last sentence: check if complete
1554                let ends_with_punct = ends_with_sentence_punct(trimmed);
1555
1556                let mut line = String::new();
1557                line.push_str(marker);
1558                line.push_str(trimmed);
1559                line.push_str(marker);
1560
1561                if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1562                    lines.push(line);
1563                } else {
1564                    // Incomplete - keep in current_line for potential continuation
1565                    *current_line = line;
1566                }
1567            } else {
1568                // Middle sentences: emit with markers
1569                let mut line = String::new();
1570                line.push_str(marker);
1571                line.push_str(trimmed);
1572                line.push_str(marker);
1573                lines.push(line);
1574            }
1575        }
1576    }
1577}
1578
1579/// English break-words used for semantic line break splitting.
1580/// These are conjunctions and relative pronouns where a line break
1581/// reads naturally.
1582const BREAK_WORDS: &[&str] = &[
1583    "and",
1584    "or",
1585    "but",
1586    "nor",
1587    "yet",
1588    "so",
1589    "for",
1590    "which",
1591    "that",
1592    "because",
1593    "when",
1594    "if",
1595    "while",
1596    "where",
1597    "although",
1598    "though",
1599    "unless",
1600    "since",
1601    "after",
1602    "before",
1603    "until",
1604    "as",
1605    "once",
1606    "whether",
1607    "however",
1608    "therefore",
1609    "moreover",
1610    "furthermore",
1611    "nevertheless",
1612    "whereas",
1613];
1614
1615/// Check if a character is clause punctuation for semantic line breaks
1616fn is_clause_punctuation(c: char) -> bool {
1617    matches!(c, ',' | ';' | ':' | '\u{2014}') // comma, semicolon, colon, em dash
1618}
1619
1620/// Compute element spans for a flat text representation of elements.
1621/// Returns Vec of (start, end) byte offsets for non-Text elements,
1622/// so we can check that a split position doesn't fall inside them.
1623fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1624    let mut spans = Vec::new();
1625    let mut offset = 0;
1626    for element in elements {
1627        let rendered = format!("{element}");
1628        let len = rendered.len();
1629        if !matches!(element, Element::Text(_)) {
1630            spans.push((offset, offset + len));
1631        }
1632        offset += len;
1633    }
1634    spans
1635}
1636
1637/// Check if a byte position falls inside any non-Text element span
1638fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1639    spans.iter().any(|(start, end)| pos > *start && pos < *end)
1640}
1641
1642/// Minimum fraction of line_length that the first part of a split must occupy.
1643/// Prevents awkwardly short first lines like "A," or "Note:" on their own.
1644const MIN_SPLIT_RATIO: f64 = 0.3;
1645
1646/// Split a line at the latest clause punctuation that keeps the first part
1647/// within `line_length`. Returns None if no valid split point exists or if
1648/// the split would create an unreasonably short first line.
1649fn split_at_clause_punctuation(
1650    text: &str,
1651    line_length: usize,
1652    element_spans: &[(usize, usize)],
1653    length_mode: ReflowLengthMode,
1654) -> Option<(String, String)> {
1655    let chars: Vec<char> = text.chars().collect();
1656    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1657
1658    // Find the char index where accumulated display width exceeds line_length
1659    let mut width_acc = 0;
1660    let mut search_end_char = 0;
1661    for (idx, &c) in chars.iter().enumerate() {
1662        let c_width = display_len(&c.to_string(), length_mode);
1663        if width_acc + c_width > line_length {
1664            break;
1665        }
1666        width_acc += c_width;
1667        search_end_char = idx + 1;
1668    }
1669
1670    let mut best_pos = None;
1671    for i in (0..search_end_char).rev() {
1672        if is_clause_punctuation(chars[i]) {
1673            // Convert char position to byte position for element span check
1674            let byte_pos: usize = chars[..=i].iter().map(|c| c.len_utf8()).sum();
1675            if !is_inside_element(byte_pos, element_spans) {
1676                best_pos = Some(i);
1677                break;
1678            }
1679        }
1680    }
1681
1682    let pos = best_pos?;
1683
1684    // Reject splits that create very short first lines
1685    let first: String = chars[..=pos].iter().collect();
1686    let first_display_len = display_len(&first, length_mode);
1687    if first_display_len < min_first_len {
1688        return None;
1689    }
1690
1691    // Split after the punctuation character
1692    let rest: String = chars[pos + 1..].iter().collect();
1693    let rest = rest.trim_start().to_string();
1694
1695    if rest.is_empty() {
1696        return None;
1697    }
1698
1699    Some((first, rest))
1700}
1701
1702/// Split a line before the latest break-word that keeps the first part
1703/// within `line_length`. Returns None if no valid split point exists or if
1704/// the split would create an unreasonably short first line.
1705fn split_at_break_word(
1706    text: &str,
1707    line_length: usize,
1708    element_spans: &[(usize, usize)],
1709    length_mode: ReflowLengthMode,
1710) -> Option<(String, String)> {
1711    let lower = text.to_lowercase();
1712    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1713    let mut best_split: Option<(usize, usize)> = None; // (byte_start, word_len_bytes)
1714
1715    for &word in BREAK_WORDS {
1716        let mut search_start = 0;
1717        while let Some(pos) = lower[search_start..].find(word) {
1718            let abs_pos = search_start + pos;
1719
1720            // Verify it's a word boundary: preceded by space, followed by space
1721            let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1722            let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1723
1724            if preceded_by_space && followed_by_space {
1725                // The break goes BEFORE the word, so first part ends at abs_pos - 1
1726                let first_part = text[..abs_pos].trim_end();
1727                let first_part_len = display_len(first_part, length_mode);
1728
1729                if first_part_len >= min_first_len
1730                    && first_part_len <= line_length
1731                    && !is_inside_element(abs_pos, element_spans)
1732                {
1733                    // Prefer the latest valid split point
1734                    if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1735                        best_split = Some((abs_pos, word.len()));
1736                    }
1737                }
1738            }
1739
1740            search_start = abs_pos + word.len();
1741        }
1742    }
1743
1744    let (byte_start, _word_len) = best_split?;
1745
1746    let first = text[..byte_start].trim_end().to_string();
1747    let rest = text[byte_start..].to_string();
1748
1749    if first.is_empty() || rest.trim().is_empty() {
1750        return None;
1751    }
1752
1753    Some((first, rest))
1754}
1755
1756/// Recursively cascade-split a line that exceeds line_length.
1757/// Tries clause punctuation first, then break-words, then word wrap.
1758fn cascade_split_line(
1759    text: &str,
1760    line_length: usize,
1761    abbreviations: &Option<Vec<String>>,
1762    length_mode: ReflowLengthMode,
1763    attr_lists: bool,
1764) -> Vec<String> {
1765    if line_length == 0 || display_len(text, length_mode) <= line_length {
1766        return vec![text.to_string()];
1767    }
1768
1769    let elements = parse_markdown_elements_inner(text, attr_lists);
1770    let element_spans = compute_element_spans(&elements);
1771
1772    // Try clause punctuation split
1773    if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
1774        let mut result = vec![first];
1775        result.extend(cascade_split_line(
1776            &rest,
1777            line_length,
1778            abbreviations,
1779            length_mode,
1780            attr_lists,
1781        ));
1782        return result;
1783    }
1784
1785    // Try break-word split
1786    if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
1787        let mut result = vec![first];
1788        result.extend(cascade_split_line(
1789            &rest,
1790            line_length,
1791            abbreviations,
1792            length_mode,
1793            attr_lists,
1794        ));
1795        return result;
1796    }
1797
1798    // Fallback: word wrap using existing reflow_elements
1799    let options = ReflowOptions {
1800        line_length,
1801        break_on_sentences: false,
1802        preserve_breaks: false,
1803        sentence_per_line: false,
1804        semantic_line_breaks: false,
1805        abbreviations: abbreviations.clone(),
1806        length_mode,
1807        attr_lists,
1808        require_sentence_capital: true,
1809        max_list_continuation_indent: None,
1810    };
1811    reflow_elements(&elements, &options)
1812}
1813
1814/// Reflow elements using semantic line breaks strategy:
1815/// 1. Split at sentence boundaries (always)
1816/// 2. For lines exceeding line_length, cascade through clause punct → break-words → word wrap
1817fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1818    // Step 1: Split into sentences using existing sentence-per-line logic
1819    let sentence_lines =
1820        reflow_elements_sentence_per_line(elements, &options.abbreviations, options.require_sentence_capital);
1821
1822    // Step 2: For each sentence line, apply cascading splits if it exceeds line_length
1823    // When line_length is 0 (unlimited), skip cascading — sentence splits only
1824    if options.line_length == 0 {
1825        return sentence_lines;
1826    }
1827
1828    let length_mode = options.length_mode;
1829    let mut result = Vec::new();
1830    for line in sentence_lines {
1831        if display_len(&line, length_mode) <= options.line_length {
1832            result.push(line);
1833        } else {
1834            result.extend(cascade_split_line(
1835                &line,
1836                options.line_length,
1837                &options.abbreviations,
1838                length_mode,
1839                options.attr_lists,
1840            ));
1841        }
1842    }
1843
1844    // Step 3: Merge very short trailing lines back into the previous line.
1845    // Word wrap can produce lines like "was" or "see" on their own, which reads poorly.
1846    let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
1847    let mut merged: Vec<String> = Vec::with_capacity(result.len());
1848    for line in result {
1849        if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
1850            // Don't merge across sentence boundaries — sentence splits are intentional
1851            let prev_ends_at_sentence = {
1852                let trimmed = merged.last().unwrap().trim_end();
1853                trimmed
1854                    .chars()
1855                    .rev()
1856                    .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
1857                    .is_some_and(|c| matches!(c, '.' | '!' | '?'))
1858            };
1859
1860            if !prev_ends_at_sentence {
1861                let prev = merged.last_mut().unwrap();
1862                let combined = format!("{prev} {line}");
1863                // Only merge if the combined line fits within the limit
1864                if display_len(&combined, length_mode) <= options.line_length {
1865                    *prev = combined;
1866                    continue;
1867                }
1868            }
1869        }
1870        merged.push(line);
1871    }
1872    merged
1873}
1874
1875/// Find the last space in `line` that is safe to split at.
1876/// Safe spaces are those NOT inside rendered non-Text elements.
1877/// `element_spans` contains (start, end) byte ranges of non-Text elements in the line.
1878/// Find the last space in `line` that is not inside any element span.
1879/// Spans use exclusive bounds (pos > start && pos < end) because element
1880/// delimiters (e.g., `[`, `]`, `(`, `)`, `<`, `>`, `` ` ``) are never
1881/// spaces, so only interior positions need protection.
1882fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
1883    line.char_indices()
1884        .rev()
1885        .map(|(pos, _)| pos)
1886        .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
1887}
1888
1889/// Reflow elements into lines that fit within the line length
1890fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1891    let mut lines = Vec::new();
1892    let mut current_line = String::new();
1893    let mut current_length = 0;
1894    // Track byte spans of non-Text elements in current_line for safe splitting
1895    let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
1896    let length_mode = options.length_mode;
1897
1898    for (idx, element) in elements.iter().enumerate() {
1899        let element_str = format!("{element}");
1900        let element_len = element.display_width(length_mode);
1901
1902        // Determine adjacency from the original elements, not from current_line.
1903        // Elements are adjacent when there's no whitespace between them in the source:
1904        // - Text("v") → HugoShortcode("{{<...>}}") = adjacent (text has no trailing space)
1905        // - Text(" and ") → InlineLink("[a](url)") = NOT adjacent (text has trailing space)
1906        // - HugoShortcode("{{<...>}}") → Text(",") = adjacent (text has no leading space)
1907        let is_adjacent_to_prev = if idx > 0 {
1908            match (&elements[idx - 1], element) {
1909                (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1910                (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
1911                _ => true,
1912            }
1913        } else {
1914            false
1915        };
1916
1917        // For text elements that might need breaking
1918        if let Element::Text(text) = element {
1919            // Check if original text had leading whitespace
1920            let has_leading_space = text.starts_with(char::is_whitespace);
1921            // If this is a text element, always process it word by word
1922            let words: Vec<&str> = text.split_whitespace().collect();
1923
1924            for (i, word) in words.iter().enumerate() {
1925                let word_len = display_len(word, length_mode);
1926                // Check if this "word" is just punctuation that should stay attached
1927                let is_trailing_punct = word
1928                    .chars()
1929                    .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1930
1931                // First word of text adjacent to preceding non-text element
1932                // must stay attached (e.g., shortcode followed by punctuation or text)
1933                let is_first_adjacent = i == 0 && is_adjacent_to_prev;
1934
1935                if is_first_adjacent {
1936                    // Attach directly without space, preventing line break
1937                    if current_length + word_len > options.line_length && current_length > 0 {
1938                        // Would exceed — break before the adjacent group
1939                        // Use element-aware space search to avoid splitting inside links/code/etc.
1940                        if let Some(last_space) = rfind_safe_space(&current_line, &current_line_element_spans) {
1941                            let before = current_line[..last_space].trim_end().to_string();
1942                            let after = current_line[last_space + 1..].to_string();
1943                            lines.push(before);
1944                            current_line = format!("{after}{word}");
1945                            current_length = display_len(&current_line, length_mode);
1946                            current_line_element_spans.clear();
1947                        } else {
1948                            current_line.push_str(word);
1949                            current_length += word_len;
1950                        }
1951                    } else {
1952                        current_line.push_str(word);
1953                        current_length += word_len;
1954                    }
1955                } else if current_length > 0
1956                    && current_length + 1 + word_len > options.line_length
1957                    && !is_trailing_punct
1958                {
1959                    // Start a new line (but never for trailing punctuation)
1960                    lines.push(current_line.trim().to_string());
1961                    current_line = word.to_string();
1962                    current_length = word_len;
1963                    current_line_element_spans.clear();
1964                } else {
1965                    // Add word to current line
1966                    // Only add space if: we have content AND (this isn't the first word OR original had leading space)
1967                    // AND this isn't trailing punctuation (which attaches directly)
1968                    if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1969                        current_line.push(' ');
1970                        current_length += 1;
1971                    }
1972                    current_line.push_str(word);
1973                    current_length += word_len;
1974                }
1975            }
1976        } else if matches!(
1977            element,
1978            Element::Italic { .. } | Element::Bold { .. } | Element::Strikethrough(_)
1979        ) && element_len > options.line_length
1980        {
1981            // Italic, bold, and strikethrough with content longer than line_length need word wrapping.
1982            // Split content word-by-word, attach the opening marker to the first word
1983            // and the closing marker to the last word.
1984            let (content, marker): (&str, &str) = match element {
1985                Element::Italic { content, underscore } => (content.as_str(), if *underscore { "_" } else { "*" }),
1986                Element::Bold { content, underscore } => (content.as_str(), if *underscore { "__" } else { "**" }),
1987                Element::Strikethrough(content) => (content.as_str(), "~~"),
1988                _ => unreachable!(),
1989            };
1990
1991            let words: Vec<&str> = content.split_whitespace().collect();
1992            let n = words.len();
1993
1994            if n == 0 {
1995                // Empty span — treat as atomic
1996                let full = format!("{marker}{marker}");
1997                let full_len = display_len(&full, length_mode);
1998                if !is_adjacent_to_prev && current_length > 0 {
1999                    current_line.push(' ');
2000                    current_length += 1;
2001                }
2002                current_line.push_str(&full);
2003                current_length += full_len;
2004            } else {
2005                for (i, word) in words.iter().enumerate() {
2006                    let is_first = i == 0;
2007                    let is_last = i == n - 1;
2008                    let word_str: String = match (is_first, is_last) {
2009                        (true, true) => format!("{marker}{word}{marker}"),
2010                        (true, false) => format!("{marker}{word}"),
2011                        (false, true) => format!("{word}{marker}"),
2012                        (false, false) => word.to_string(),
2013                    };
2014                    let word_len = display_len(&word_str, length_mode);
2015
2016                    let needs_space = if is_first {
2017                        !is_adjacent_to_prev && current_length > 0
2018                    } else {
2019                        current_length > 0
2020                    };
2021
2022                    if needs_space && current_length + 1 + word_len > options.line_length {
2023                        lines.push(current_line.trim_end().to_string());
2024                        current_line = word_str;
2025                        current_length = word_len;
2026                        current_line_element_spans.clear();
2027                    } else {
2028                        if needs_space {
2029                            current_line.push(' ');
2030                            current_length += 1;
2031                        }
2032                        current_line.push_str(&word_str);
2033                        current_length += word_len;
2034                    }
2035                }
2036            }
2037        } else {
2038            // For non-text elements (code, links, references), treat as atomic units
2039            // These should never be broken across lines
2040
2041            if is_adjacent_to_prev {
2042                // Adjacent to preceding text — attach directly without space
2043                if current_length + element_len > options.line_length {
2044                    // Would exceed limit — break before the adjacent word group
2045                    // Use element-aware space search to avoid splitting inside links/code/etc.
2046                    if let Some(last_space) = rfind_safe_space(&current_line, &current_line_element_spans) {
2047                        let before = current_line[..last_space].trim_end().to_string();
2048                        let after = current_line[last_space + 1..].to_string();
2049                        lines.push(before);
2050                        current_line = format!("{after}{element_str}");
2051                        current_length = display_len(&current_line, length_mode);
2052                        current_line_element_spans.clear();
2053                        // Record the element span in the new current_line
2054                        let start = after.len();
2055                        current_line_element_spans.push((start, start + element_str.len()));
2056                    } else {
2057                        // No safe space to break at — accept the long line
2058                        let start = current_line.len();
2059                        current_line.push_str(&element_str);
2060                        current_length += element_len;
2061                        current_line_element_spans.push((start, current_line.len()));
2062                    }
2063                } else {
2064                    let start = current_line.len();
2065                    current_line.push_str(&element_str);
2066                    current_length += element_len;
2067                    current_line_element_spans.push((start, current_line.len()));
2068                }
2069            } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
2070                // Not adjacent, would exceed — start new line
2071                lines.push(current_line.trim().to_string());
2072                current_line = element_str.clone();
2073                current_length = element_len;
2074                current_line_element_spans.clear();
2075                current_line_element_spans.push((0, element_str.len()));
2076            } else {
2077                // Not adjacent, fits — add with space
2078                let ends_with_opener =
2079                    current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
2080                if current_length > 0 && !ends_with_opener {
2081                    current_line.push(' ');
2082                    current_length += 1;
2083                }
2084                let start = current_line.len();
2085                current_line.push_str(&element_str);
2086                current_length += element_len;
2087                current_line_element_spans.push((start, current_line.len()));
2088            }
2089        }
2090    }
2091
2092    // Don't forget the last line
2093    if !current_line.is_empty() {
2094        lines.push(current_line.trim_end().to_string());
2095    }
2096
2097    lines
2098}
2099
2100/// Reflow markdown content preserving structure
2101pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
2102    let lines: Vec<&str> = content.lines().collect();
2103    let mut result = Vec::new();
2104    let mut i = 0;
2105
2106    while i < lines.len() {
2107        let line = lines[i];
2108        let trimmed = line.trim();
2109
2110        // Preserve empty lines
2111        if trimmed.is_empty() {
2112            result.push(String::new());
2113            i += 1;
2114            continue;
2115        }
2116
2117        // Preserve headings as-is
2118        if trimmed.starts_with('#') {
2119            result.push(line.to_string());
2120            i += 1;
2121            continue;
2122        }
2123
2124        // Preserve Quarto/Pandoc div markers (:::) as-is
2125        if trimmed.starts_with(":::") {
2126            result.push(line.to_string());
2127            i += 1;
2128            continue;
2129        }
2130
2131        // Preserve fenced code blocks
2132        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2133            result.push(line.to_string());
2134            i += 1;
2135            // Copy lines until closing fence
2136            while i < lines.len() {
2137                result.push(lines[i].to_string());
2138                if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
2139                    i += 1;
2140                    break;
2141                }
2142                i += 1;
2143            }
2144            continue;
2145        }
2146
2147        // Preserve indented code blocks (4+ columns accounting for tab expansion)
2148        if calculate_indentation_width_default(line) >= 4 {
2149            // Collect all consecutive indented lines
2150            result.push(line.to_string());
2151            i += 1;
2152            while i < lines.len() {
2153                let next_line = lines[i];
2154                // Continue if next line is also indented or empty (empty lines in code blocks are ok)
2155                if calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
2156                    result.push(next_line.to_string());
2157                    i += 1;
2158                } else {
2159                    break;
2160                }
2161            }
2162            continue;
2163        }
2164
2165        // Preserve block quotes (but reflow their content)
2166        if trimmed.starts_with('>') {
2167            // find() returns byte position which is correct for str slicing
2168            // The unwrap is safe because we already verified trimmed starts with '>'
2169            let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
2170            let quote_prefix = line[0..gt_pos + 1].to_string();
2171            let quote_content = &line[quote_prefix.len()..].trim_start();
2172
2173            let reflowed = reflow_line(quote_content, options);
2174            for reflowed_line in reflowed.iter() {
2175                result.push(format!("{quote_prefix} {reflowed_line}"));
2176            }
2177            i += 1;
2178            continue;
2179        }
2180
2181        // Preserve horizontal rules first (before checking for lists)
2182        if is_horizontal_rule(trimmed) {
2183            result.push(line.to_string());
2184            i += 1;
2185            continue;
2186        }
2187
2188        // Preserve lists (but not horizontal rules)
2189        if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
2190            // Find the list marker and preserve indentation
2191            let indent = line.len() - line.trim_start().len();
2192            let indent_str = " ".repeat(indent);
2193
2194            // For numbered lists, find the period and the space after it
2195            // For bullet lists, find the marker and the space after it
2196            let mut marker_end = indent;
2197            let mut content_start = indent;
2198
2199            if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
2200                // Numbered list: find the period
2201                if let Some(period_pos) = line[indent..].find('.') {
2202                    marker_end = indent + period_pos + 1; // Include the period
2203                    content_start = marker_end;
2204                    // Skip any spaces after the period to find content start
2205                    // Use byte-based check since content_start is a byte index
2206                    // This is safe because space is ASCII (single byte)
2207                    while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2208                        content_start += 1;
2209                    }
2210                }
2211            } else {
2212                // Bullet list: marker is single character
2213                marker_end = indent + 1; // Just the marker character
2214                content_start = marker_end;
2215                // Skip any spaces after the marker
2216                // Use byte-based check since content_start is a byte index
2217                // This is safe because space is ASCII (single byte)
2218                while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
2219                    content_start += 1;
2220                }
2221            }
2222
2223            // Minimum indent for continuation lines (based on list marker, before checkbox)
2224            let min_continuation_indent = content_start;
2225
2226            // Detect checkbox/task list markers: [ ], [x], [X]
2227            // GFM task lists work with both unordered and ordered lists
2228            let rest = &line[content_start..];
2229            if rest.starts_with("[ ] ") || rest.starts_with("[x] ") || rest.starts_with("[X] ") {
2230                marker_end = content_start + 3; // Include the checkbox `[ ]`
2231                content_start += 4; // Skip past `[ ] `
2232            }
2233
2234            let marker = &line[indent..marker_end];
2235
2236            // Collect all content for this list item (including continuation lines)
2237            // Preserve hard breaks (2 trailing spaces) while trimming excessive whitespace
2238            let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
2239            i += 1;
2240
2241            // Collect continuation lines (indented lines that are part of this list item)
2242            // Use the base marker indent (not checkbox-extended) for collection,
2243            // since users may indent continuations to the bullet level, not the checkbox level
2244            while i < lines.len() {
2245                let next_line = lines[i];
2246                let next_trimmed = next_line.trim();
2247
2248                // Stop if we hit an empty line or another list item or special block
2249                if is_block_boundary(next_trimmed) {
2250                    break;
2251                }
2252
2253                // Check if this line is indented (continuation of list item)
2254                let next_indent = next_line.len() - next_line.trim_start().len();
2255                if next_indent >= min_continuation_indent {
2256                    // This is a continuation line - add its content
2257                    // Preserve hard breaks while trimming excessive whitespace
2258                    let trimmed_start = next_line.trim_start();
2259                    list_content.push(trim_preserving_hard_break(trimmed_start));
2260                    i += 1;
2261                } else {
2262                    // Not indented enough, not part of this list item
2263                    break;
2264                }
2265            }
2266
2267            // Join content, but respect hard breaks (lines ending with 2 spaces or backslash)
2268            // Hard breaks should prevent joining with the next line
2269            let combined_content = if options.preserve_breaks {
2270                list_content[0].clone()
2271            } else {
2272                // Check if any lines have hard breaks - if so, preserve the structure
2273                let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2274                if has_hard_breaks {
2275                    // Don't join lines with hard breaks - keep them separate with newlines
2276                    list_content.join("\n")
2277                } else {
2278                    // No hard breaks, safe to join with spaces
2279                    list_content.join(" ")
2280                }
2281            };
2282
2283            // Calculate the proper indentation for continuation lines
2284            let trimmed_marker = marker;
2285            let continuation_spaces = if let Some(max_indent) = options.max_list_continuation_indent {
2286                // Cap the relative indent (past the nesting level) to max_indent,
2287                // then add back the nesting indent so nested items stay correct
2288                indent + (content_start - indent).min(max_indent)
2289            } else {
2290                content_start
2291            };
2292
2293            // Adjust line length to account for list marker and space
2294            let prefix_length = indent + trimmed_marker.len() + 1;
2295
2296            // Create adjusted options with reduced line length
2297            let adjusted_options = ReflowOptions {
2298                line_length: options.line_length.saturating_sub(prefix_length),
2299                ..options.clone()
2300            };
2301
2302            let reflowed = reflow_line(&combined_content, &adjusted_options);
2303            for (j, reflowed_line) in reflowed.iter().enumerate() {
2304                if j == 0 {
2305                    result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2306                } else {
2307                    // Continuation lines aligned with text after marker
2308                    let continuation_indent = " ".repeat(continuation_spaces);
2309                    result.push(format!("{continuation_indent}{reflowed_line}"));
2310                }
2311            }
2312            continue;
2313        }
2314
2315        // Preserve tables
2316        if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2317            result.push(line.to_string());
2318            i += 1;
2319            continue;
2320        }
2321
2322        // Preserve reference definitions
2323        if trimmed.starts_with('[') && line.contains("]:") {
2324            result.push(line.to_string());
2325            i += 1;
2326            continue;
2327        }
2328
2329        // Preserve definition list items (extended markdown)
2330        if is_definition_list_item(trimmed) {
2331            result.push(line.to_string());
2332            i += 1;
2333            continue;
2334        }
2335
2336        // Check if this is a single line that doesn't need processing
2337        let mut is_single_line_paragraph = true;
2338        if i + 1 < lines.len() {
2339            let next_trimmed = lines[i + 1].trim();
2340            // Check if next line continues this paragraph
2341            if !is_block_boundary(next_trimmed) {
2342                is_single_line_paragraph = false;
2343            }
2344        }
2345
2346        // If it's a single line that fits, just add it as-is
2347        if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2348            result.push(line.to_string());
2349            i += 1;
2350            continue;
2351        }
2352
2353        // For regular paragraphs, collect consecutive lines
2354        let mut paragraph_parts = Vec::new();
2355        let mut current_part = vec![line];
2356        i += 1;
2357
2358        // If preserve_breaks is true, treat each line separately
2359        if options.preserve_breaks {
2360            // Don't collect consecutive lines - just reflow this single line
2361            let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2362                Some("\\")
2363            } else if line.ends_with("  ") {
2364                Some("  ")
2365            } else {
2366                None
2367            };
2368            let reflowed = reflow_line(line, options);
2369
2370            // Preserve hard breaks (two trailing spaces or backslash)
2371            if let Some(break_marker) = hard_break_type {
2372                if !reflowed.is_empty() {
2373                    let mut reflowed_with_break = reflowed;
2374                    let last_idx = reflowed_with_break.len() - 1;
2375                    if !has_hard_break(&reflowed_with_break[last_idx]) {
2376                        reflowed_with_break[last_idx].push_str(break_marker);
2377                    }
2378                    result.extend(reflowed_with_break);
2379                }
2380            } else {
2381                result.extend(reflowed);
2382            }
2383        } else {
2384            // Original behavior: collect consecutive lines into a paragraph
2385            while i < lines.len() {
2386                let prev_line = if !current_part.is_empty() {
2387                    current_part.last().unwrap()
2388                } else {
2389                    ""
2390                };
2391                let next_line = lines[i];
2392                let next_trimmed = next_line.trim();
2393
2394                // Stop at empty lines or special blocks
2395                if is_block_boundary(next_trimmed) {
2396                    break;
2397                }
2398
2399                // Check if previous line ends with hard break (two spaces or backslash)
2400                // or is a complete sentence in sentence_per_line mode
2401                let prev_trimmed = prev_line.trim();
2402                let abbreviations = get_abbreviations(&options.abbreviations);
2403                let ends_with_sentence = (prev_trimmed.ends_with('.')
2404                    || prev_trimmed.ends_with('!')
2405                    || prev_trimmed.ends_with('?')
2406                    || prev_trimmed.ends_with(".*")
2407                    || prev_trimmed.ends_with("!*")
2408                    || prev_trimmed.ends_with("?*")
2409                    || prev_trimmed.ends_with("._")
2410                    || prev_trimmed.ends_with("!_")
2411                    || prev_trimmed.ends_with("?_")
2412                    // Quote-terminated sentences (straight and curly quotes)
2413                    || prev_trimmed.ends_with(".\"")
2414                    || prev_trimmed.ends_with("!\"")
2415                    || prev_trimmed.ends_with("?\"")
2416                    || prev_trimmed.ends_with(".'")
2417                    || prev_trimmed.ends_with("!'")
2418                    || prev_trimmed.ends_with("?'")
2419                    || prev_trimmed.ends_with(".\u{201D}")
2420                    || prev_trimmed.ends_with("!\u{201D}")
2421                    || prev_trimmed.ends_with("?\u{201D}")
2422                    || prev_trimmed.ends_with(".\u{2019}")
2423                    || prev_trimmed.ends_with("!\u{2019}")
2424                    || prev_trimmed.ends_with("?\u{2019}"))
2425                    && !text_ends_with_abbreviation(
2426                        prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2427                        &abbreviations,
2428                    );
2429
2430                if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2431                    // Start a new part after hard break or complete sentence
2432                    paragraph_parts.push(current_part.join(" "));
2433                    current_part = vec![next_line];
2434                } else {
2435                    current_part.push(next_line);
2436                }
2437                i += 1;
2438            }
2439
2440            // Add the last part
2441            if !current_part.is_empty() {
2442                if current_part.len() == 1 {
2443                    // Single line, don't add trailing space
2444                    paragraph_parts.push(current_part[0].to_string());
2445                } else {
2446                    paragraph_parts.push(current_part.join(" "));
2447                }
2448            }
2449
2450            // Reflow each part separately, preserving hard breaks
2451            for (j, part) in paragraph_parts.iter().enumerate() {
2452                let reflowed = reflow_line(part, options);
2453                result.extend(reflowed);
2454
2455                // Preserve hard break by ensuring last line of part ends with hard break marker
2456                // Use two spaces as the default hard break format for reflows
2457                // But don't add hard breaks in sentence_per_line mode - lines are already separate
2458                if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2459                    let last_idx = result.len() - 1;
2460                    if !has_hard_break(&result[last_idx]) {
2461                        result[last_idx].push_str("  ");
2462                    }
2463                }
2464            }
2465        }
2466    }
2467
2468    // Preserve trailing newline if the original content had one
2469    let result_text = result.join("\n");
2470    if content.ends_with('\n') && !result_text.ends_with('\n') {
2471        format!("{result_text}\n")
2472    } else {
2473        result_text
2474    }
2475}
2476
2477/// Information about a reflowed paragraph
2478#[derive(Debug, Clone)]
2479pub struct ParagraphReflow {
2480    /// Starting byte offset of the paragraph in the original content
2481    pub start_byte: usize,
2482    /// Ending byte offset of the paragraph in the original content
2483    pub end_byte: usize,
2484    /// The reflowed text for this paragraph
2485    pub reflowed_text: String,
2486}
2487
2488/// A collected blockquote line used for style-preserving reflow.
2489///
2490/// The invariant `is_explicit == true` iff `prefix.is_some()` is enforced by the
2491/// constructors. Use [`BlockquoteLineData::explicit`] or [`BlockquoteLineData::lazy`]
2492/// rather than constructing the struct directly.
2493#[derive(Debug, Clone)]
2494pub struct BlockquoteLineData {
2495    /// Trimmed content without the `> ` prefix.
2496    pub(crate) content: String,
2497    /// Whether this line carries an explicit blockquote marker.
2498    pub(crate) is_explicit: bool,
2499    /// Full blockquote prefix (e.g. `"> "`, `"> > "`). `None` for lazy continuation lines.
2500    pub(crate) prefix: Option<String>,
2501}
2502
2503impl BlockquoteLineData {
2504    /// Create an explicit (marker-bearing) blockquote line.
2505    pub fn explicit(content: String, prefix: String) -> Self {
2506        Self {
2507            content,
2508            is_explicit: true,
2509            prefix: Some(prefix),
2510        }
2511    }
2512
2513    /// Create a lazy continuation line (no blockquote marker).
2514    pub fn lazy(content: String) -> Self {
2515        Self {
2516            content,
2517            is_explicit: false,
2518            prefix: None,
2519        }
2520    }
2521}
2522
2523/// Style for blockquote continuation lines after reflow.
2524#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2525pub enum BlockquoteContinuationStyle {
2526    Explicit,
2527    Lazy,
2528}
2529
2530/// Determine the continuation style for a blockquote paragraph from its collected lines.
2531///
2532/// The first line is always explicit (it carries the marker), so only continuation
2533/// lines (index 1+) are counted. Ties resolve to `Explicit`.
2534///
2535/// When the slice has only one element (no continuation lines to inspect), both
2536/// counts are zero and the tie-breaking rule returns `Explicit`.
2537pub fn blockquote_continuation_style(lines: &[BlockquoteLineData]) -> BlockquoteContinuationStyle {
2538    let mut explicit_count = 0usize;
2539    let mut lazy_count = 0usize;
2540
2541    for line in lines.iter().skip(1) {
2542        if line.is_explicit {
2543            explicit_count += 1;
2544        } else {
2545            lazy_count += 1;
2546        }
2547    }
2548
2549    if explicit_count > 0 && lazy_count == 0 {
2550        BlockquoteContinuationStyle::Explicit
2551    } else if lazy_count > 0 && explicit_count == 0 {
2552        BlockquoteContinuationStyle::Lazy
2553    } else if explicit_count >= lazy_count {
2554        BlockquoteContinuationStyle::Explicit
2555    } else {
2556        BlockquoteContinuationStyle::Lazy
2557    }
2558}
2559
2560/// Determine the dominant blockquote prefix for a paragraph.
2561///
2562/// The most frequently occurring explicit prefix wins. Ties are broken by earliest
2563/// first appearance. Falls back to `fallback` when no explicit lines are present.
2564pub fn dominant_blockquote_prefix(lines: &[BlockquoteLineData], fallback: &str) -> String {
2565    let mut counts: std::collections::HashMap<String, (usize, usize)> = std::collections::HashMap::new();
2566
2567    for (idx, line) in lines.iter().enumerate() {
2568        let Some(prefix) = line.prefix.as_ref() else {
2569            continue;
2570        };
2571        counts
2572            .entry(prefix.clone())
2573            .and_modify(|entry| entry.0 += 1)
2574            .or_insert((1, idx));
2575    }
2576
2577    counts
2578        .into_iter()
2579        .max_by(|(_, (count_a, first_idx_a)), (_, (count_b, first_idx_b))| {
2580            count_a.cmp(count_b).then_with(|| first_idx_b.cmp(first_idx_a))
2581        })
2582        .map(|(prefix, _)| prefix)
2583        .unwrap_or_else(|| fallback.to_string())
2584}
2585
2586/// Whether a reflowed blockquote content line must carry an explicit prefix.
2587///
2588/// Lines that would start a new block structure (headings, fences, lists, etc.)
2589/// cannot safely use lazy continuation syntax.
2590pub(crate) fn should_force_explicit_blockquote_line(content_line: &str) -> bool {
2591    let trimmed = content_line.trim_start();
2592    trimmed.starts_with('>')
2593        || trimmed.starts_with('#')
2594        || trimmed.starts_with("```")
2595        || trimmed.starts_with("~~~")
2596        || is_unordered_list_marker(trimmed)
2597        || is_numbered_list_item(trimmed)
2598        || is_horizontal_rule(trimmed)
2599        || is_definition_list_item(trimmed)
2600        || (trimmed.starts_with('[') && trimmed.contains("]:"))
2601        || trimmed.starts_with(":::")
2602        || (trimmed.starts_with('<')
2603            && !trimmed.starts_with("<http")
2604            && !trimmed.starts_with("<https")
2605            && !trimmed.starts_with("<mailto:"))
2606}
2607
2608/// Reflow blockquote content lines and apply continuation style.
2609///
2610/// Segments separated by hard breaks are reflowed independently. The output lines
2611/// receive blockquote prefixes according to `continuation_style`: the first line and
2612/// any line that would start a new block structure always get an explicit prefix;
2613/// other lines follow the detected style.
2614///
2615/// Returns the styled, reflowed lines (without a trailing newline).
2616pub fn reflow_blockquote_content(
2617    lines: &[BlockquoteLineData],
2618    explicit_prefix: &str,
2619    continuation_style: BlockquoteContinuationStyle,
2620    options: &ReflowOptions,
2621) -> Vec<String> {
2622    let content_strs: Vec<&str> = lines.iter().map(|l| l.content.as_str()).collect();
2623    let segments = split_into_segments_strs(&content_strs);
2624    let mut reflowed_content_lines: Vec<String> = Vec::new();
2625
2626    for segment in segments {
2627        let hard_break_type = segment.last().and_then(|&line| {
2628            let line = line.strip_suffix('\r').unwrap_or(line);
2629            if line.ends_with('\\') {
2630                Some("\\")
2631            } else if line.ends_with("  ") {
2632                Some("  ")
2633            } else {
2634                None
2635            }
2636        });
2637
2638        let pieces: Vec<&str> = segment
2639            .iter()
2640            .map(|&line| {
2641                if let Some(l) = line.strip_suffix('\\') {
2642                    l.trim_end()
2643                } else if let Some(l) = line.strip_suffix("  ") {
2644                    l.trim_end()
2645                } else {
2646                    line.trim_end()
2647                }
2648            })
2649            .collect();
2650
2651        let segment_text = pieces.join(" ");
2652        let segment_text = segment_text.trim();
2653        if segment_text.is_empty() {
2654            continue;
2655        }
2656
2657        let mut reflowed = reflow_line(segment_text, options);
2658        if let Some(break_marker) = hard_break_type
2659            && !reflowed.is_empty()
2660        {
2661            let last_idx = reflowed.len() - 1;
2662            if !has_hard_break(&reflowed[last_idx]) {
2663                reflowed[last_idx].push_str(break_marker);
2664            }
2665        }
2666        reflowed_content_lines.extend(reflowed);
2667    }
2668
2669    let mut styled_lines: Vec<String> = Vec::new();
2670    for (idx, line) in reflowed_content_lines.iter().enumerate() {
2671        let force_explicit = idx == 0
2672            || continuation_style == BlockquoteContinuationStyle::Explicit
2673            || should_force_explicit_blockquote_line(line);
2674        if force_explicit {
2675            styled_lines.push(format!("{explicit_prefix}{line}"));
2676        } else {
2677            styled_lines.push(line.clone());
2678        }
2679    }
2680
2681    styled_lines
2682}
2683
2684fn is_blockquote_content_boundary(content: &str) -> bool {
2685    let trimmed = content.trim();
2686    trimmed.is_empty()
2687        || is_block_boundary(trimmed)
2688        || crate::utils::table_utils::TableUtils::is_potential_table_row(content)
2689        || trimmed.starts_with(":::")
2690        || crate::utils::is_template_directive_only(content)
2691        || is_standalone_attr_list(content)
2692        || is_snippet_block_delimiter(content)
2693}
2694
2695fn split_into_segments_strs<'a>(lines: &[&'a str]) -> Vec<Vec<&'a str>> {
2696    let mut segments = Vec::new();
2697    let mut current = Vec::new();
2698
2699    for &line in lines {
2700        current.push(line);
2701        if has_hard_break(line) {
2702            segments.push(current);
2703            current = Vec::new();
2704        }
2705    }
2706
2707    if !current.is_empty() {
2708        segments.push(current);
2709    }
2710
2711    segments
2712}
2713
2714fn reflow_blockquote_paragraph_at_line(
2715    content: &str,
2716    lines: &[&str],
2717    target_idx: usize,
2718    options: &ReflowOptions,
2719) -> Option<ParagraphReflow> {
2720    let mut anchor_idx = target_idx;
2721    let mut target_level = if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[target_idx]) {
2722        parsed.nesting_level
2723    } else {
2724        let mut found = None;
2725        let mut idx = target_idx;
2726        loop {
2727            if lines[idx].trim().is_empty() {
2728                break;
2729            }
2730            if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[idx]) {
2731                found = Some((idx, parsed.nesting_level));
2732                break;
2733            }
2734            if idx == 0 {
2735                break;
2736            }
2737            idx -= 1;
2738        }
2739        let (idx, level) = found?;
2740        anchor_idx = idx;
2741        level
2742    };
2743
2744    // Expand backward to capture prior quote content at the same nesting level.
2745    let mut para_start = anchor_idx;
2746    while para_start > 0 {
2747        let prev_idx = para_start - 1;
2748        let prev_line = lines[prev_idx];
2749
2750        if prev_line.trim().is_empty() {
2751            break;
2752        }
2753
2754        if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(prev_line) {
2755            if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2756                break;
2757            }
2758            para_start = prev_idx;
2759            continue;
2760        }
2761
2762        let prev_lazy = prev_line.trim_start();
2763        if is_blockquote_content_boundary(prev_lazy) {
2764            break;
2765        }
2766        para_start = prev_idx;
2767    }
2768
2769    // Lazy continuation cannot precede the first explicit marker.
2770    while para_start < lines.len() {
2771        let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(lines[para_start]) else {
2772            para_start += 1;
2773            continue;
2774        };
2775        target_level = parsed.nesting_level;
2776        break;
2777    }
2778
2779    if para_start >= lines.len() || para_start > target_idx {
2780        return None;
2781    }
2782
2783    // Collect explicit lines at target level and lazy continuation lines.
2784    // Each entry is (original_line_idx, BlockquoteLineData).
2785    let mut collected: Vec<(usize, BlockquoteLineData)> = Vec::new();
2786    let mut idx = para_start;
2787    while idx < lines.len() {
2788        if !collected.is_empty() && has_hard_break(&collected[collected.len() - 1].1.content) {
2789            break;
2790        }
2791
2792        let line = lines[idx];
2793        if line.trim().is_empty() {
2794            break;
2795        }
2796
2797        if let Some(parsed) = crate::utils::blockquote::parse_blockquote_prefix(line) {
2798            if parsed.nesting_level != target_level || is_blockquote_content_boundary(parsed.content) {
2799                break;
2800            }
2801            collected.push((
2802                idx,
2803                BlockquoteLineData::explicit(trim_preserving_hard_break(parsed.content), parsed.prefix.to_string()),
2804            ));
2805            idx += 1;
2806            continue;
2807        }
2808
2809        let lazy_content = line.trim_start();
2810        if is_blockquote_content_boundary(lazy_content) {
2811            break;
2812        }
2813
2814        collected.push((idx, BlockquoteLineData::lazy(trim_preserving_hard_break(lazy_content))));
2815        idx += 1;
2816    }
2817
2818    if collected.is_empty() {
2819        return None;
2820    }
2821
2822    let para_end = collected[collected.len() - 1].0;
2823    if target_idx < para_start || target_idx > para_end {
2824        return None;
2825    }
2826
2827    let line_data: Vec<BlockquoteLineData> = collected.iter().map(|(_, d)| d.clone()).collect();
2828
2829    let fallback_prefix = line_data
2830        .iter()
2831        .find_map(|d| d.prefix.clone())
2832        .unwrap_or_else(|| "> ".to_string());
2833    let explicit_prefix = dominant_blockquote_prefix(&line_data, &fallback_prefix);
2834    let continuation_style = blockquote_continuation_style(&line_data);
2835
2836    let adjusted_line_length = options
2837        .line_length
2838        .saturating_sub(display_len(&explicit_prefix, options.length_mode))
2839        .max(1);
2840
2841    let adjusted_options = ReflowOptions {
2842        line_length: adjusted_line_length,
2843        ..options.clone()
2844    };
2845
2846    let styled_lines = reflow_blockquote_content(&line_data, &explicit_prefix, continuation_style, &adjusted_options);
2847
2848    if styled_lines.is_empty() {
2849        return None;
2850    }
2851
2852    // Calculate byte offsets.
2853    let mut start_byte = 0;
2854    for line in lines.iter().take(para_start) {
2855        start_byte += line.len() + 1;
2856    }
2857
2858    let mut end_byte = start_byte;
2859    for line in lines.iter().take(para_end + 1).skip(para_start) {
2860        end_byte += line.len() + 1;
2861    }
2862
2863    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2864    if !includes_trailing_newline {
2865        end_byte -= 1;
2866    }
2867
2868    let reflowed_joined = styled_lines.join("\n");
2869    let reflowed_text = if includes_trailing_newline {
2870        if reflowed_joined.ends_with('\n') {
2871            reflowed_joined
2872        } else {
2873            format!("{reflowed_joined}\n")
2874        }
2875    } else if reflowed_joined.ends_with('\n') {
2876        reflowed_joined.trim_end_matches('\n').to_string()
2877    } else {
2878        reflowed_joined
2879    };
2880
2881    Some(ParagraphReflow {
2882        start_byte,
2883        end_byte,
2884        reflowed_text,
2885    })
2886}
2887
2888/// Reflow a single paragraph at the specified line number
2889///
2890/// This function finds the paragraph containing the given line number,
2891/// reflows it according to the specified line length, and returns
2892/// information about the paragraph location and its reflowed text.
2893///
2894/// # Arguments
2895///
2896/// * `content` - The full document content
2897/// * `line_number` - The 1-based line number within the paragraph to reflow
2898/// * `line_length` - The target line length for reflowing
2899///
2900/// # Returns
2901///
2902/// Returns `Some(ParagraphReflow)` if a paragraph was found and reflowed,
2903/// or `None` if the line number is out of bounds or the content at that
2904/// line shouldn't be reflowed (e.g., code blocks, headings, etc.)
2905pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
2906    reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
2907}
2908
2909/// Reflow a paragraph at the given line with a specific length mode.
2910pub fn reflow_paragraph_at_line_with_mode(
2911    content: &str,
2912    line_number: usize,
2913    line_length: usize,
2914    length_mode: ReflowLengthMode,
2915) -> Option<ParagraphReflow> {
2916    let options = ReflowOptions {
2917        line_length,
2918        length_mode,
2919        ..Default::default()
2920    };
2921    reflow_paragraph_at_line_with_options(content, line_number, &options)
2922}
2923
2924/// Reflow a paragraph at the given line using the provided options.
2925///
2926/// This is the canonical implementation used by both the rule's fix mode and the
2927/// LSP "Reflow paragraph" action. Passing a fully configured `ReflowOptions` allows
2928/// the LSP action to respect user-configured reflow mode, abbreviations, etc.
2929///
2930/// # Returns
2931///
2932/// Returns `Some(ParagraphReflow)` with byte offsets and reflowed text, or `None`
2933/// if the line is out of bounds or sits inside a non-reflow-able construct.
2934pub fn reflow_paragraph_at_line_with_options(
2935    content: &str,
2936    line_number: usize,
2937    options: &ReflowOptions,
2938) -> Option<ParagraphReflow> {
2939    if line_number == 0 {
2940        return None;
2941    }
2942
2943    let lines: Vec<&str> = content.lines().collect();
2944
2945    // Check if line number is valid (1-based)
2946    if line_number > lines.len() {
2947        return None;
2948    }
2949
2950    let target_idx = line_number - 1; // Convert to 0-based
2951    let target_line = lines[target_idx];
2952    let trimmed = target_line.trim();
2953
2954    // Handle blockquote paragraphs (including lazy continuation lines) with
2955    // style-preserving output.
2956    if let Some(blockquote_reflow) = reflow_blockquote_paragraph_at_line(content, &lines, target_idx, options) {
2957        return Some(blockquote_reflow);
2958    }
2959
2960    // Don't reflow special blocks
2961    if is_paragraph_boundary(trimmed, target_line) {
2962        return None;
2963    }
2964
2965    // Find paragraph start - scan backward until blank line or special block
2966    let mut para_start = target_idx;
2967    while para_start > 0 {
2968        let prev_idx = para_start - 1;
2969        let prev_line = lines[prev_idx];
2970        let prev_trimmed = prev_line.trim();
2971
2972        // Stop at blank line or special blocks
2973        if is_paragraph_boundary(prev_trimmed, prev_line) {
2974            break;
2975        }
2976
2977        para_start = prev_idx;
2978    }
2979
2980    // Find paragraph end - scan forward until blank line or special block
2981    let mut para_end = target_idx;
2982    while para_end + 1 < lines.len() {
2983        let next_idx = para_end + 1;
2984        let next_line = lines[next_idx];
2985        let next_trimmed = next_line.trim();
2986
2987        // Stop at blank line or special blocks
2988        if is_paragraph_boundary(next_trimmed, next_line) {
2989            break;
2990        }
2991
2992        para_end = next_idx;
2993    }
2994
2995    // Extract paragraph lines
2996    let paragraph_lines = &lines[para_start..=para_end];
2997
2998    // Calculate byte offsets
2999    let mut start_byte = 0;
3000    for line in lines.iter().take(para_start) {
3001        start_byte += line.len() + 1; // +1 for newline
3002    }
3003
3004    let mut end_byte = start_byte;
3005    for line in paragraph_lines.iter() {
3006        end_byte += line.len() + 1; // +1 for newline
3007    }
3008
3009    // Track whether the byte range includes a trailing newline
3010    // (it doesn't if this is the last line and the file doesn't end with newline)
3011    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
3012
3013    // Adjust end_byte if the last line doesn't have a newline
3014    if !includes_trailing_newline {
3015        end_byte -= 1;
3016    }
3017
3018    // Join paragraph lines and reflow
3019    let paragraph_text = paragraph_lines.join("\n");
3020
3021    // Reflow the paragraph using reflow_markdown to handle it properly
3022    let reflowed = reflow_markdown(&paragraph_text, options);
3023
3024    // Ensure reflowed text matches whether the byte range includes a trailing newline
3025    // This is critical: if the range includes a newline, the replacement must too,
3026    // otherwise the next line will get appended to the reflowed paragraph
3027    let reflowed_text = if includes_trailing_newline {
3028        // Range includes newline - ensure reflowed text has one
3029        if reflowed.ends_with('\n') {
3030            reflowed
3031        } else {
3032            format!("{reflowed}\n")
3033        }
3034    } else {
3035        // Range doesn't include newline - ensure reflowed text doesn't have one
3036        if reflowed.ends_with('\n') {
3037            reflowed.trim_end_matches('\n').to_string()
3038        } else {
3039            reflowed
3040        }
3041    };
3042
3043    Some(ParagraphReflow {
3044        start_byte,
3045        end_byte,
3046        reflowed_text,
3047    })
3048}
3049
3050#[cfg(test)]
3051mod tests {
3052    use super::*;
3053
3054    /// Unit test for private helper function text_ends_with_abbreviation()
3055    ///
3056    /// This test stays inline because it tests a private function.
3057    /// All other tests (public API, integration tests) are in tests/utils/text_reflow_test.rs
3058    #[test]
3059    fn test_helper_function_text_ends_with_abbreviation() {
3060        // Test the helper function directly
3061        let abbreviations = get_abbreviations(&None);
3062
3063        // True cases - built-in abbreviations (titles and i.e./e.g.)
3064        assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
3065        assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
3066        assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
3067        assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
3068        assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
3069        assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
3070        assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
3071        assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
3072
3073        // False cases - NOT in built-in list (etc doesn't always have period)
3074        assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
3075        assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
3076        assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
3077        assert!(!text_ends_with_abbreviation("items.", &abbreviations));
3078        assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
3079        assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); // question mark, not period
3080        assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); // exclamation, not period
3081        assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); // question mark
3082        assert!(!text_ends_with_abbreviation("word", &abbreviations)); // no punctuation
3083        assert!(!text_ends_with_abbreviation("", &abbreviations)); // empty string
3084    }
3085
3086    #[test]
3087    fn test_is_unordered_list_marker() {
3088        // Valid unordered list markers
3089        assert!(is_unordered_list_marker("- item"));
3090        assert!(is_unordered_list_marker("* item"));
3091        assert!(is_unordered_list_marker("+ item"));
3092        assert!(is_unordered_list_marker("-")); // lone marker
3093        assert!(is_unordered_list_marker("*"));
3094        assert!(is_unordered_list_marker("+"));
3095
3096        // Not list markers
3097        assert!(!is_unordered_list_marker("---")); // horizontal rule
3098        assert!(!is_unordered_list_marker("***")); // horizontal rule
3099        assert!(!is_unordered_list_marker("- - -")); // horizontal rule
3100        assert!(!is_unordered_list_marker("* * *")); // horizontal rule
3101        assert!(!is_unordered_list_marker("*emphasis*")); // emphasis, not list
3102        assert!(!is_unordered_list_marker("-word")); // no space after marker
3103        assert!(!is_unordered_list_marker("")); // empty
3104        assert!(!is_unordered_list_marker("text")); // plain text
3105        assert!(!is_unordered_list_marker("# heading")); // heading
3106    }
3107
3108    #[test]
3109    fn test_is_block_boundary() {
3110        // Block boundaries
3111        assert!(is_block_boundary("")); // empty line
3112        assert!(is_block_boundary("# Heading")); // ATX heading
3113        assert!(is_block_boundary("## Level 2")); // ATX heading
3114        assert!(is_block_boundary("```rust")); // code fence
3115        assert!(is_block_boundary("~~~")); // tilde code fence
3116        assert!(is_block_boundary("> quote")); // blockquote
3117        assert!(is_block_boundary("| cell |")); // table
3118        assert!(is_block_boundary("[link]: http://example.com")); // reference def
3119        assert!(is_block_boundary("---")); // horizontal rule
3120        assert!(is_block_boundary("***")); // horizontal rule
3121        assert!(is_block_boundary("- item")); // unordered list
3122        assert!(is_block_boundary("* item")); // unordered list
3123        assert!(is_block_boundary("+ item")); // unordered list
3124        assert!(is_block_boundary("1. item")); // ordered list
3125        assert!(is_block_boundary("10. item")); // ordered list
3126        assert!(is_block_boundary(": definition")); // definition list
3127        assert!(is_block_boundary(":::")); // div marker
3128        assert!(is_block_boundary("::::: {.callout-note}")); // div marker with attrs
3129
3130        // NOT block boundaries (paragraph continuation)
3131        assert!(!is_block_boundary("regular text"));
3132        assert!(!is_block_boundary("*emphasis*")); // emphasis, not list
3133        assert!(!is_block_boundary("[link](url)")); // inline link, not reference def
3134        assert!(!is_block_boundary("some words here"));
3135    }
3136
3137    #[test]
3138    fn test_definition_list_boundary_in_single_line_paragraph() {
3139        // Verifies that a definition list item after a single-line paragraph
3140        // is treated as a block boundary, not merged into the paragraph
3141        let options = ReflowOptions {
3142            line_length: 80,
3143            ..Default::default()
3144        };
3145        let input = "Term\n: Definition of the term";
3146        let result = reflow_markdown(input, &options);
3147        // The definition list marker should remain on its own line
3148        assert!(
3149            result.contains(": Definition"),
3150            "Definition list item should not be merged into previous line. Got: {result:?}"
3151        );
3152        let lines: Vec<&str> = result.lines().collect();
3153        assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
3154        assert_eq!(lines[0], "Term");
3155        assert_eq!(lines[1], ": Definition of the term");
3156    }
3157
3158    #[test]
3159    fn test_is_paragraph_boundary() {
3160        // Core block boundary checks are inherited
3161        assert!(is_paragraph_boundary("# Heading", "# Heading"));
3162        assert!(is_paragraph_boundary("- item", "- item"));
3163        assert!(is_paragraph_boundary(":::", ":::"));
3164        assert!(is_paragraph_boundary(": definition", ": definition"));
3165
3166        // Indented code blocks (≥4 spaces or tab)
3167        assert!(is_paragraph_boundary("code", "    code"));
3168        assert!(is_paragraph_boundary("code", "\tcode"));
3169
3170        // Table rows via is_potential_table_row
3171        assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
3172        assert!(is_paragraph_boundary("a | b", "a | b")); // pipe-delimited without leading pipe
3173
3174        // Not paragraph boundaries
3175        assert!(!is_paragraph_boundary("regular text", "regular text"));
3176        assert!(!is_paragraph_boundary("text", "  text")); // 2-space indent is not code
3177    }
3178
3179    #[test]
3180    fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
3181        // Verifies that div markers (:::) are treated as paragraph boundaries
3182        // in reflow_paragraph_at_line, preventing reflow across div boundaries
3183        let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
3184        // Line 3 is the div marker — should not be reflowed
3185        let result = reflow_paragraph_at_line(content, 3, 80);
3186        assert!(result.is_none(), "Div marker line should not be reflowed");
3187    }
3188}