Skip to main content

rumdl_lib/utils/
text_reflow.rs

1//! Text reflow utilities for MD013
2//!
3//! This module implements text wrapping/reflow functionality that preserves
4//! Markdown elements like links, emphasis, code spans, etc.
5
6use crate::utils::element_cache::ElementCache;
7use crate::utils::is_definition_list_item;
8use crate::utils::regex_cache::{
9    DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
10    HUGO_SHORTCODE_REGEX, INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX,
11    LINKED_IMAGE_INLINE_INLINE, LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF,
12    REF_IMAGE_REGEX, REF_LINK_REGEX, SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
13};
14use crate::utils::sentence_utils::{
15    get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
16    text_ends_with_abbreviation,
17};
18use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
19use std::collections::HashSet;
20use unicode_width::UnicodeWidthStr;
21
22/// Length calculation mode for reflow
23#[derive(Clone, Copy, Debug, Default, PartialEq)]
24pub enum ReflowLengthMode {
25    /// Count Unicode characters (grapheme clusters)
26    Chars,
27    /// Count visual display width (CJK = 2 columns, emoji = 2, etc.)
28    #[default]
29    Visual,
30    /// Count raw bytes
31    Bytes,
32}
33
34/// Calculate the display length of a string based on the length mode
35fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
36    match mode {
37        ReflowLengthMode::Chars => s.chars().count(),
38        ReflowLengthMode::Visual => s.width(),
39        ReflowLengthMode::Bytes => s.len(),
40    }
41}
42
43/// Options for reflowing text
44#[derive(Clone)]
45pub struct ReflowOptions {
46    /// Target line length
47    pub line_length: usize,
48    /// Whether to break on sentence boundaries when possible
49    pub break_on_sentences: bool,
50    /// Whether to preserve existing line breaks in paragraphs
51    pub preserve_breaks: bool,
52    /// Whether to enforce one sentence per line
53    pub sentence_per_line: bool,
54    /// Whether to use semantic line breaks (cascading split strategy)
55    pub semantic_line_breaks: bool,
56    /// Custom abbreviations for sentence detection
57    /// Periods are optional - both "Dr" and "Dr." work the same
58    /// Custom abbreviations are always added to the built-in defaults
59    pub abbreviations: Option<Vec<String>>,
60    /// How to measure string length for line-length comparisons
61    pub length_mode: ReflowLengthMode,
62}
63
64impl Default for ReflowOptions {
65    fn default() -> Self {
66        Self {
67            line_length: 80,
68            break_on_sentences: true,
69            preserve_breaks: false,
70            sentence_per_line: false,
71            semantic_line_breaks: false,
72            abbreviations: None,
73            length_mode: ReflowLengthMode::default(),
74        }
75    }
76}
77
78/// Detect if a character position is a sentence boundary
79/// Based on the approach from github.com/JoshuaKGoldberg/sentences-per-line
80/// Supports both ASCII punctuation (. ! ?) and CJK punctuation (。 ! ?)
81fn is_sentence_boundary(text: &str, pos: usize, abbreviations: &HashSet<String>) -> bool {
82    let chars: Vec<char> = text.chars().collect();
83
84    if pos + 1 >= chars.len() {
85        return false;
86    }
87
88    let c = chars[pos];
89    let next_char = chars[pos + 1];
90
91    // Check for CJK sentence-ending punctuation (。, !, ?)
92    // CJK punctuation doesn't require space or uppercase after it
93    if is_cjk_sentence_ending(c) {
94        // Skip any trailing emphasis/strikethrough markers
95        let mut after_punct_pos = pos + 1;
96        while after_punct_pos < chars.len()
97            && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
98        {
99            after_punct_pos += 1;
100        }
101
102        // Skip whitespace
103        while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
104            after_punct_pos += 1;
105        }
106
107        // Check if we have more content (any non-whitespace)
108        if after_punct_pos >= chars.len() {
109            return false;
110        }
111
112        // Skip leading emphasis/strikethrough markers
113        while after_punct_pos < chars.len()
114            && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
115        {
116            after_punct_pos += 1;
117        }
118
119        if after_punct_pos >= chars.len() {
120            return false;
121        }
122
123        // For CJK, we accept any character as the start of the next sentence
124        // (no uppercase requirement, since CJK doesn't have case)
125        return true;
126    }
127
128    // Check for ASCII sentence-ending punctuation
129    if c != '.' && c != '!' && c != '?' {
130        return false;
131    }
132
133    // Must be followed by space, closing quote, or emphasis/strikethrough marker followed by space
134    let (_space_pos, after_space_pos) = if next_char == ' ' {
135        // Normal case: punctuation followed by space
136        (pos + 1, pos + 2)
137    } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
138        // Sentence ends with quote - check what follows the quote
139        if chars[pos + 2] == ' ' {
140            // Just quote followed by space: 'sentence." '
141            (pos + 2, pos + 3)
142        } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
143            // Quote followed by emphasis: 'sentence."* '
144            (pos + 3, pos + 4)
145        } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
146            && pos + 4 < chars.len()
147            && chars[pos + 3] == chars[pos + 2]
148            && chars[pos + 4] == ' '
149        {
150            // Quote followed by bold: 'sentence."** '
151            (pos + 4, pos + 5)
152        } else {
153            return false;
154        }
155    } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
156        // Sentence ends with emphasis: "sentence.* " or "sentence._ "
157        (pos + 2, pos + 3)
158    } else if (next_char == '*' || next_char == '_')
159        && pos + 3 < chars.len()
160        && chars[pos + 2] == next_char
161        && chars[pos + 3] == ' '
162    {
163        // Sentence ends with bold: "sentence.** " or "sentence.__ "
164        (pos + 3, pos + 4)
165    } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
166        // Sentence ends with strikethrough: "sentence.~~ "
167        (pos + 3, pos + 4)
168    } else {
169        return false;
170    };
171
172    // Skip all whitespace after the space to find the start of the next sentence
173    let mut next_char_pos = after_space_pos;
174    while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
175        next_char_pos += 1;
176    }
177
178    // Check if we reached the end of the string
179    if next_char_pos >= chars.len() {
180        return false;
181    }
182
183    // Skip leading emphasis/strikethrough markers and opening quotes to find the actual first letter
184    let mut first_letter_pos = next_char_pos;
185    while first_letter_pos < chars.len()
186        && (chars[first_letter_pos] == '*'
187            || chars[first_letter_pos] == '_'
188            || chars[first_letter_pos] == '~'
189            || is_opening_quote(chars[first_letter_pos]))
190    {
191        first_letter_pos += 1;
192    }
193
194    // Check if we reached the end after skipping emphasis
195    if first_letter_pos >= chars.len() {
196        return false;
197    }
198
199    // First character of next sentence must be uppercase or CJK
200    let first_char = chars[first_letter_pos];
201    if !first_char.is_uppercase() && !is_cjk_char(first_char) {
202        return false;
203    }
204
205    // Look back to check for common abbreviations (only applies to periods)
206    if pos > 0 && c == '.' {
207        // Convert char index to byte offset for string slicing
208        let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
209        if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
210            return false;
211        }
212
213        // Check for decimal numbers (e.g., "3.14")
214        // Make sure to check if first_letter_pos is within bounds
215        if chars[pos - 1].is_numeric() && first_letter_pos < chars.len() && chars[first_letter_pos].is_numeric() {
216            return false;
217        }
218    }
219    true
220}
221
222/// Split text into sentences
223pub fn split_into_sentences(text: &str) -> Vec<String> {
224    split_into_sentences_custom(text, &None)
225}
226
227/// Split text into sentences with custom abbreviations
228pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
229    let abbreviations = get_abbreviations(custom_abbreviations);
230    split_into_sentences_with_set(text, &abbreviations)
231}
232
233/// Internal function to split text into sentences with a pre-computed abbreviations set
234/// Use this when calling multiple times in a loop to avoid repeatedly computing the set
235fn split_into_sentences_with_set(text: &str, abbreviations: &HashSet<String>) -> Vec<String> {
236    let mut sentences = Vec::new();
237    let mut current_sentence = String::new();
238    let mut chars = text.chars().peekable();
239    let mut pos = 0;
240
241    while let Some(c) = chars.next() {
242        current_sentence.push(c);
243
244        if is_sentence_boundary(text, pos, abbreviations) {
245            // Consume any trailing emphasis/strikethrough markers and quotes (they belong to the current sentence)
246            while let Some(&next) = chars.peek() {
247                if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
248                    current_sentence.push(chars.next().unwrap());
249                    pos += 1;
250                } else {
251                    break;
252                }
253            }
254
255            // Consume the space after the sentence
256            if chars.peek() == Some(&' ') {
257                chars.next();
258                pos += 1;
259            }
260
261            sentences.push(current_sentence.trim().to_string());
262            current_sentence.clear();
263        }
264
265        pos += 1;
266    }
267
268    // Add any remaining text as the last sentence
269    if !current_sentence.trim().is_empty() {
270        sentences.push(current_sentence.trim().to_string());
271    }
272    sentences
273}
274
275/// Check if a line is a horizontal rule (---, ___, ***)
276fn is_horizontal_rule(line: &str) -> bool {
277    if line.len() < 3 {
278        return false;
279    }
280
281    // Check if line consists only of -, _, or * characters (at least 3)
282    let chars: Vec<char> = line.chars().collect();
283    if chars.is_empty() {
284        return false;
285    }
286
287    let first_char = chars[0];
288    if first_char != '-' && first_char != '_' && first_char != '*' {
289        return false;
290    }
291
292    // All characters should be the same (allowing spaces between)
293    for c in &chars {
294        if *c != first_char && *c != ' ' {
295            return false;
296        }
297    }
298
299    // Count non-space characters
300    let non_space_count = chars.iter().filter(|c| **c != ' ').count();
301    non_space_count >= 3
302}
303
304/// Check if a line is a numbered list item (e.g., "1. ", "10. ")
305fn is_numbered_list_item(line: &str) -> bool {
306    let mut chars = line.chars();
307
308    // Must start with a digit
309    if !chars.next().is_some_and(|c| c.is_numeric()) {
310        return false;
311    }
312
313    // Can have more digits
314    while let Some(c) = chars.next() {
315        if c == '.' {
316            // After period, must have a space (consistent with list marker extraction)
317            // "2019." alone is NOT treated as a list item to avoid false positives
318            return chars.next() == Some(' ');
319        }
320        if !c.is_numeric() {
321            return false;
322        }
323    }
324
325    false
326}
327
328/// Check if a trimmed line is an unordered list item (-, *, + followed by space)
329fn is_unordered_list_marker(s: &str) -> bool {
330    matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
331        && !is_horizontal_rule(s)
332        && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
333}
334
335/// Shared structural checks for block boundary detection.
336/// Checks elements that only depend on the trimmed line content.
337fn is_block_boundary_core(trimmed: &str) -> bool {
338    trimmed.is_empty()
339        || trimmed.starts_with('#')
340        || trimmed.starts_with("```")
341        || trimmed.starts_with("~~~")
342        || trimmed.starts_with('>')
343        || (trimmed.starts_with('[') && trimmed.contains("]:"))
344        || is_horizontal_rule(trimmed)
345        || is_unordered_list_marker(trimmed)
346        || is_numbered_list_item(trimmed)
347        || is_definition_list_item(trimmed)
348        || trimmed.starts_with(":::")
349}
350
351/// Check if a trimmed line starts a new structural block element.
352/// Used for paragraph boundary detection in `reflow_markdown()`.
353fn is_block_boundary(trimmed: &str) -> bool {
354    is_block_boundary_core(trimmed) || trimmed.starts_with('|')
355}
356
357/// Check if a line starts a new structural block for paragraph boundary detection
358/// in `reflow_paragraph_at_line()`. Extends the core checks with indented code blocks
359/// (≥4 spaces) and table row detection via `is_potential_table_row`.
360fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
361    is_block_boundary_core(trimmed)
362        || ElementCache::calculate_indentation_width_default(line) >= 4
363        || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
364}
365
366/// Check if a line ends with a hard break (either two spaces or backslash)
367///
368/// CommonMark supports two formats for hard line breaks:
369/// 1. Two or more trailing spaces
370/// 2. A backslash at the end of the line
371fn has_hard_break(line: &str) -> bool {
372    let line = line.strip_suffix('\r').unwrap_or(line);
373    line.ends_with("  ") || line.ends_with('\\')
374}
375
376/// Check if text ends with sentence-terminating punctuation (. ! ?)
377fn ends_with_sentence_punct(text: &str) -> bool {
378    text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
379}
380
381/// Trim trailing whitespace while preserving hard breaks (two trailing spaces or backslash)
382///
383/// Hard breaks in Markdown can be indicated by:
384/// 1. Two trailing spaces before a newline (traditional)
385/// 2. A backslash at the end of the line (mdformat style)
386fn trim_preserving_hard_break(s: &str) -> String {
387    // Strip trailing \r from CRLF line endings first to handle Windows files
388    let s = s.strip_suffix('\r').unwrap_or(s);
389
390    // Check for backslash hard break (mdformat style)
391    if s.ends_with('\\') {
392        // Preserve the backslash exactly as-is
393        return s.to_string();
394    }
395
396    // Check if there are at least 2 trailing spaces (traditional hard break)
397    if s.ends_with("  ") {
398        // Find the position where non-space content ends
399        let content_end = s.trim_end().len();
400        if content_end == 0 {
401            // String is all whitespace
402            return String::new();
403        }
404        // Preserve exactly 2 trailing spaces for hard break
405        format!("{}  ", &s[..content_end])
406    } else {
407        // No hard break, just trim all trailing whitespace
408        s.trim_end().to_string()
409    }
410}
411
412pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
413    // For sentence-per-line mode, always process regardless of length
414    if options.sentence_per_line {
415        let elements = parse_markdown_elements(line);
416        return reflow_elements_sentence_per_line(&elements, &options.abbreviations);
417    }
418
419    // For semantic line breaks mode, use cascading split strategy
420    if options.semantic_line_breaks {
421        let elements = parse_markdown_elements(line);
422        return reflow_elements_semantic(&elements, options);
423    }
424
425    // Quick check: if line is already short enough or no wrapping requested, return as-is
426    // line_length = 0 means no wrapping (unlimited line length)
427    if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
428        return vec![line.to_string()];
429    }
430
431    // Parse the markdown to identify elements
432    let elements = parse_markdown_elements(line);
433
434    // Reflow the elements into lines
435    reflow_elements(&elements, options)
436}
437
438/// Image source in a linked image structure
439#[derive(Debug, Clone)]
440enum LinkedImageSource {
441    /// Inline image URL: ![alt](url)
442    Inline(String),
443    /// Reference image: ![alt][ref]
444    Reference(String),
445}
446
447/// Link target in a linked image structure
448#[derive(Debug, Clone)]
449enum LinkedImageTarget {
450    /// Inline link URL: ](url)
451    Inline(String),
452    /// Reference link: ][ref]
453    Reference(String),
454}
455
456/// Represents a piece of content in the markdown
457#[derive(Debug, Clone)]
458enum Element {
459    /// Plain text that can be wrapped
460    Text(String),
461    /// A complete markdown inline link [text](url)
462    Link { text: String, url: String },
463    /// A complete markdown reference link [text][ref]
464    ReferenceLink { text: String, reference: String },
465    /// A complete markdown empty reference link [text][]
466    EmptyReferenceLink { text: String },
467    /// A complete markdown shortcut reference link [ref]
468    ShortcutReference { reference: String },
469    /// A complete markdown inline image ![alt](url)
470    InlineImage { alt: String, url: String },
471    /// A complete markdown reference image ![alt][ref]
472    ReferenceImage { alt: String, reference: String },
473    /// A complete markdown empty reference image ![alt][]
474    EmptyReferenceImage { alt: String },
475    /// A clickable image badge in any of 4 forms:
476    /// - [![alt](img-url)](link-url)
477    /// - [![alt][img-ref]](link-url)
478    /// - [![alt](img-url)][link-ref]
479    /// - [![alt][img-ref]][link-ref]
480    LinkedImage {
481        alt: String,
482        img_source: LinkedImageSource,
483        link_target: LinkedImageTarget,
484    },
485    /// Footnote reference [^note]
486    FootnoteReference { note: String },
487    /// Strikethrough text ~~text~~
488    Strikethrough(String),
489    /// Wiki-style link [[wiki]] or [[wiki|text]]
490    WikiLink(String),
491    /// Inline math $math$
492    InlineMath(String),
493    /// Display math $$math$$
494    DisplayMath(String),
495    /// Emoji shortcode :emoji:
496    EmojiShortcode(String),
497    /// Autolink <https://...> or <mailto:...> or <user@domain.com>
498    Autolink(String),
499    /// HTML tag <tag> or </tag> or <tag/>
500    HtmlTag(String),
501    /// HTML entity &nbsp; or &#123;
502    HtmlEntity(String),
503    /// Hugo/Go template shortcode {{< ... >}} or {{% ... %}}
504    HugoShortcode(String),
505    /// Inline code `code`
506    Code(String),
507    /// Bold text **text** or __text__
508    Bold {
509        content: String,
510        /// True if underscore markers (__), false for asterisks (**)
511        underscore: bool,
512    },
513    /// Italic text *text* or _text_
514    Italic {
515        content: String,
516        /// True if underscore marker (_), false for asterisk (*)
517        underscore: bool,
518    },
519}
520
521impl std::fmt::Display for Element {
522    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
523        match self {
524            Element::Text(s) => write!(f, "{s}"),
525            Element::Link { text, url } => write!(f, "[{text}]({url})"),
526            Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
527            Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
528            Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
529            Element::InlineImage { alt, url } => write!(f, "![{alt}]({url})"),
530            Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
531            Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
532            Element::LinkedImage {
533                alt,
534                img_source,
535                link_target,
536            } => {
537                // Build the image part: ![alt](url) or ![alt][ref]
538                let img_part = match img_source {
539                    LinkedImageSource::Inline(url) => format!("![{alt}]({url})"),
540                    LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
541                };
542                // Build the link part: (url) or [ref]
543                match link_target {
544                    LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
545                    LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
546                }
547            }
548            Element::FootnoteReference { note } => write!(f, "[^{note}]"),
549            Element::Strikethrough(s) => write!(f, "~~{s}~~"),
550            Element::WikiLink(s) => write!(f, "[[{s}]]"),
551            Element::InlineMath(s) => write!(f, "${s}$"),
552            Element::DisplayMath(s) => write!(f, "$${s}$$"),
553            Element::EmojiShortcode(s) => write!(f, ":{s}:"),
554            Element::Autolink(s) => write!(f, "{s}"),
555            Element::HtmlTag(s) => write!(f, "{s}"),
556            Element::HtmlEntity(s) => write!(f, "{s}"),
557            Element::HugoShortcode(s) => write!(f, "{s}"),
558            Element::Code(s) => write!(f, "`{s}`"),
559            Element::Bold { content, underscore } => {
560                if *underscore {
561                    write!(f, "__{content}__")
562                } else {
563                    write!(f, "**{content}**")
564                }
565            }
566            Element::Italic { content, underscore } => {
567                if *underscore {
568                    write!(f, "_{content}_")
569                } else {
570                    write!(f, "*{content}*")
571                }
572            }
573        }
574    }
575}
576
577impl Element {
578    /// Calculate the display width of this element using the given length mode.
579    /// This formats the element and computes its width, correctly handling
580    /// visual width for CJK characters and other wide glyphs.
581    fn display_width(&self, mode: ReflowLengthMode) -> usize {
582        let formatted = format!("{self}");
583        display_len(&formatted, mode)
584    }
585}
586
587/// An emphasis or formatting span parsed by pulldown-cmark
588#[derive(Debug, Clone)]
589struct EmphasisSpan {
590    /// Byte offset where the emphasis starts (including markers)
591    start: usize,
592    /// Byte offset where the emphasis ends (after closing markers)
593    end: usize,
594    /// The content inside the emphasis markers
595    content: String,
596    /// Whether this is strong (bold) emphasis
597    is_strong: bool,
598    /// Whether this is strikethrough (~~text~~)
599    is_strikethrough: bool,
600    /// Whether the original used underscore markers (for emphasis only)
601    uses_underscore: bool,
602}
603
604/// Extract emphasis and strikethrough spans from text using pulldown-cmark
605///
606/// This provides CommonMark-compliant emphasis parsing, correctly handling:
607/// - Nested emphasis like `*text **bold** more*`
608/// - Left/right flanking delimiter rules
609/// - Underscore vs asterisk markers
610/// - GFM strikethrough (~~text~~)
611///
612/// Returns spans sorted by start position.
613fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
614    let mut spans = Vec::new();
615    let mut options = Options::empty();
616    options.insert(Options::ENABLE_STRIKETHROUGH);
617
618    // Stacks to track nested formatting with their start positions
619    let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); // (start_byte, uses_underscore)
620    let mut strong_stack: Vec<(usize, bool)> = Vec::new();
621    let mut strikethrough_stack: Vec<usize> = Vec::new();
622
623    let parser = Parser::new_ext(text, options).into_offset_iter();
624
625    for (event, range) in parser {
626        match event {
627            Event::Start(Tag::Emphasis) => {
628                // Check if this uses underscore by looking at the original text
629                let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
630                emphasis_stack.push((range.start, uses_underscore));
631            }
632            Event::End(TagEnd::Emphasis) => {
633                if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
634                    // Extract content between the markers (1 char marker on each side)
635                    let content_start = start_byte + 1;
636                    let content_end = range.end - 1;
637                    if content_end > content_start
638                        && let Some(content) = text.get(content_start..content_end)
639                    {
640                        spans.push(EmphasisSpan {
641                            start: start_byte,
642                            end: range.end,
643                            content: content.to_string(),
644                            is_strong: false,
645                            is_strikethrough: false,
646                            uses_underscore,
647                        });
648                    }
649                }
650            }
651            Event::Start(Tag::Strong) => {
652                // Check if this uses underscore by looking at the original text
653                let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
654                strong_stack.push((range.start, uses_underscore));
655            }
656            Event::End(TagEnd::Strong) => {
657                if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
658                    // Extract content between the markers (2 char marker on each side)
659                    let content_start = start_byte + 2;
660                    let content_end = range.end - 2;
661                    if content_end > content_start
662                        && let Some(content) = text.get(content_start..content_end)
663                    {
664                        spans.push(EmphasisSpan {
665                            start: start_byte,
666                            end: range.end,
667                            content: content.to_string(),
668                            is_strong: true,
669                            is_strikethrough: false,
670                            uses_underscore,
671                        });
672                    }
673                }
674            }
675            Event::Start(Tag::Strikethrough) => {
676                strikethrough_stack.push(range.start);
677            }
678            Event::End(TagEnd::Strikethrough) => {
679                if let Some(start_byte) = strikethrough_stack.pop() {
680                    // Extract content between the ~~ markers (2 char marker on each side)
681                    let content_start = start_byte + 2;
682                    let content_end = range.end - 2;
683                    if content_end > content_start
684                        && let Some(content) = text.get(content_start..content_end)
685                    {
686                        spans.push(EmphasisSpan {
687                            start: start_byte,
688                            end: range.end,
689                            content: content.to_string(),
690                            is_strong: false,
691                            is_strikethrough: true,
692                            uses_underscore: false,
693                        });
694                    }
695                }
696            }
697            _ => {}
698        }
699    }
700
701    // Sort by start position
702    spans.sort_by_key(|s| s.start);
703    spans
704}
705
706/// Parse markdown elements from text preserving the raw syntax
707///
708/// Detection order is critical:
709/// 1. Linked images [![alt](img)](link) - must be detected first as atomic units
710/// 2. Inline images ![alt](url) - before links to handle ! prefix
711/// 3. Reference images ![alt][ref] - before reference links
712/// 4. Inline links [text](url) - before reference links
713/// 5. Reference links [text][ref] - before shortcut references
714/// 6. Shortcut reference links [ref] - detected last to avoid false positives
715/// 7. Other elements (code, bold, italic, etc.) - processed normally
716fn parse_markdown_elements(text: &str) -> Vec<Element> {
717    let mut elements = Vec::new();
718    let mut remaining = text;
719
720    // Pre-extract emphasis spans using pulldown-cmark for CommonMark-compliant parsing
721    let emphasis_spans = extract_emphasis_spans(text);
722
723    while !remaining.is_empty() {
724        // Calculate current byte offset in original text
725        let current_offset = text.len() - remaining.len();
726        // Find the earliest occurrence of any markdown pattern
727        let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
728
729        // Check for linked images FIRST (all 4 variants)
730        // Quick literal check: only run expensive regexes if we might have a linked image
731        // Pattern starts with "[!" so check for that first
732        if remaining.contains("[!") {
733            // Pattern 1: [![alt](img)](link) - inline image in inline link
734            if let Ok(Some(m)) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
735                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
736            {
737                earliest_match = Some((m.start(), "linked_image_ii", m));
738            }
739
740            // Pattern 2: [![alt][ref]](link) - reference image in inline link
741            if let Ok(Some(m)) = LINKED_IMAGE_REF_INLINE.find(remaining)
742                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
743            {
744                earliest_match = Some((m.start(), "linked_image_ri", m));
745            }
746
747            // Pattern 3: [![alt](img)][ref] - inline image in reference link
748            if let Ok(Some(m)) = LINKED_IMAGE_INLINE_REF.find(remaining)
749                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
750            {
751                earliest_match = Some((m.start(), "linked_image_ir", m));
752            }
753
754            // Pattern 4: [![alt][ref]][ref] - reference image in reference link
755            if let Ok(Some(m)) = LINKED_IMAGE_REF_REF.find(remaining)
756                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
757            {
758                earliest_match = Some((m.start(), "linked_image_rr", m));
759            }
760        }
761
762        // Check for images (they start with ! so should be detected before links)
763        // Inline images - ![alt](url)
764        if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
765            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
766        {
767            earliest_match = Some((m.start(), "inline_image", m));
768        }
769
770        // Reference images - ![alt][ref]
771        if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
772            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
773        {
774            earliest_match = Some((m.start(), "ref_image", m));
775        }
776
777        // Check for footnote references - [^note]
778        if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
779            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
780        {
781            earliest_match = Some((m.start(), "footnote_ref", m));
782        }
783
784        // Check for inline links - [text](url)
785        if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
786            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
787        {
788            earliest_match = Some((m.start(), "inline_link", m));
789        }
790
791        // Check for reference links - [text][ref]
792        if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
793            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
794        {
795            earliest_match = Some((m.start(), "ref_link", m));
796        }
797
798        // Check for shortcut reference links - [ref]
799        // Only check if we haven't found an earlier pattern that would conflict
800        if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
801            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
802        {
803            earliest_match = Some((m.start(), "shortcut_ref", m));
804        }
805
806        // Check for wiki-style links - [[wiki]]
807        if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
808            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
809        {
810            earliest_match = Some((m.start(), "wiki_link", m));
811        }
812
813        // Check for display math first (before inline) - $$math$$
814        if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
815            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
816        {
817            earliest_match = Some((m.start(), "display_math", m));
818        }
819
820        // Check for inline math - $math$
821        if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
822            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
823        {
824            earliest_match = Some((m.start(), "inline_math", m));
825        }
826
827        // Note: Strikethrough is now handled by pulldown-cmark in extract_emphasis_spans
828
829        // Check for emoji shortcodes - :emoji:
830        if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
831            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
832        {
833            earliest_match = Some((m.start(), "emoji", m));
834        }
835
836        // Check for HTML entities - &nbsp; etc
837        if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
838            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
839        {
840            earliest_match = Some((m.start(), "html_entity", m));
841        }
842
843        // Check for Hugo shortcodes - {{< ... >}} or {{% ... %}}
844        // Must be checked before other patterns to avoid false sentence breaks
845        if let Ok(Some(m)) = HUGO_SHORTCODE_REGEX.find(remaining)
846            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
847        {
848            earliest_match = Some((m.start(), "hugo_shortcode", m));
849        }
850
851        // Check for HTML tags - <tag> </tag> <tag/>
852        // But exclude autolinks like <https://...> or <mailto:...> or email autolinks <user@domain.com>
853        if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
854            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
855        {
856            // Check if this is an autolink (starts with protocol or mailto:)
857            let matched_text = &remaining[m.start()..m.end()];
858            let is_url_autolink = matched_text.starts_with("<http://")
859                || matched_text.starts_with("<https://")
860                || matched_text.starts_with("<mailto:")
861                || matched_text.starts_with("<ftp://")
862                || matched_text.starts_with("<ftps://");
863
864            // Check if this is an email autolink (per CommonMark spec: <local@domain.tld>)
865            // Use centralized EMAIL_PATTERN for consistency with MD034 and other rules
866            let is_email_autolink = {
867                let content = matched_text.trim_start_matches('<').trim_end_matches('>');
868                EMAIL_PATTERN.is_match(content)
869            };
870
871            if is_url_autolink || is_email_autolink {
872                earliest_match = Some((m.start(), "autolink", m));
873            } else {
874                earliest_match = Some((m.start(), "html_tag", m));
875            }
876        }
877
878        // Find earliest non-link special characters
879        let mut next_special = remaining.len();
880        let mut special_type = "";
881        let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
882
883        // Check for code spans (not handled by pulldown-cmark in this context)
884        if let Some(pos) = remaining.find('`')
885            && pos < next_special
886        {
887            next_special = pos;
888            special_type = "code";
889        }
890
891        // Check for emphasis using pulldown-cmark's pre-extracted spans
892        // Find the earliest emphasis span that starts within remaining text
893        for span in &emphasis_spans {
894            if span.start >= current_offset && span.start < current_offset + remaining.len() {
895                let pos_in_remaining = span.start - current_offset;
896                if pos_in_remaining < next_special {
897                    next_special = pos_in_remaining;
898                    special_type = "pulldown_emphasis";
899                    pulldown_emphasis = Some(span);
900                }
901                break; // Spans are sorted by start position, so first match is earliest
902            }
903        }
904
905        // Determine which pattern to process first
906        let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
907            pos < next_special
908        } else {
909            false
910        };
911
912        if should_process_markdown_link {
913            let (pos, pattern_type, match_obj) = earliest_match.unwrap();
914
915            // Add any text before the match
916            if pos > 0 {
917                elements.push(Element::Text(remaining[..pos].to_string()));
918            }
919
920            // Process the matched pattern
921            match pattern_type {
922                // Pattern 1: [![alt](img)](link) - inline image in inline link
923                "linked_image_ii" => {
924                    if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
925                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
926                        let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
927                        let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
928                        elements.push(Element::LinkedImage {
929                            alt: alt.to_string(),
930                            img_source: LinkedImageSource::Inline(img_url.to_string()),
931                            link_target: LinkedImageTarget::Inline(link_url.to_string()),
932                        });
933                        remaining = &remaining[match_obj.end()..];
934                    } else {
935                        elements.push(Element::Text("[".to_string()));
936                        remaining = &remaining[1..];
937                    }
938                }
939                // Pattern 2: [![alt][ref]](link) - reference image in inline link
940                "linked_image_ri" => {
941                    if let Ok(Some(caps)) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
942                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
943                        let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
944                        let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
945                        elements.push(Element::LinkedImage {
946                            alt: alt.to_string(),
947                            img_source: LinkedImageSource::Reference(img_ref.to_string()),
948                            link_target: LinkedImageTarget::Inline(link_url.to_string()),
949                        });
950                        remaining = &remaining[match_obj.end()..];
951                    } else {
952                        elements.push(Element::Text("[".to_string()));
953                        remaining = &remaining[1..];
954                    }
955                }
956                // Pattern 3: [![alt](img)][ref] - inline image in reference link
957                "linked_image_ir" => {
958                    if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
959                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
960                        let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
961                        let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
962                        elements.push(Element::LinkedImage {
963                            alt: alt.to_string(),
964                            img_source: LinkedImageSource::Inline(img_url.to_string()),
965                            link_target: LinkedImageTarget::Reference(link_ref.to_string()),
966                        });
967                        remaining = &remaining[match_obj.end()..];
968                    } else {
969                        elements.push(Element::Text("[".to_string()));
970                        remaining = &remaining[1..];
971                    }
972                }
973                // Pattern 4: [![alt][ref]][ref] - reference image in reference link
974                "linked_image_rr" => {
975                    if let Ok(Some(caps)) = LINKED_IMAGE_REF_REF.captures(remaining) {
976                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
977                        let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
978                        let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
979                        elements.push(Element::LinkedImage {
980                            alt: alt.to_string(),
981                            img_source: LinkedImageSource::Reference(img_ref.to_string()),
982                            link_target: LinkedImageTarget::Reference(link_ref.to_string()),
983                        });
984                        remaining = &remaining[match_obj.end()..];
985                    } else {
986                        elements.push(Element::Text("[".to_string()));
987                        remaining = &remaining[1..];
988                    }
989                }
990                "inline_image" => {
991                    if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
992                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
993                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
994                        elements.push(Element::InlineImage {
995                            alt: alt.to_string(),
996                            url: url.to_string(),
997                        });
998                        remaining = &remaining[match_obj.end()..];
999                    } else {
1000                        elements.push(Element::Text("!".to_string()));
1001                        remaining = &remaining[1..];
1002                    }
1003                }
1004                "ref_image" => {
1005                    if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
1006                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1007                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1008
1009                        if reference.is_empty() {
1010                            elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1011                        } else {
1012                            elements.push(Element::ReferenceImage {
1013                                alt: alt.to_string(),
1014                                reference: reference.to_string(),
1015                            });
1016                        }
1017                        remaining = &remaining[match_obj.end()..];
1018                    } else {
1019                        elements.push(Element::Text("!".to_string()));
1020                        remaining = &remaining[1..];
1021                    }
1022                }
1023                "footnote_ref" => {
1024                    if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
1025                        let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1026                        elements.push(Element::FootnoteReference { note: note.to_string() });
1027                        remaining = &remaining[match_obj.end()..];
1028                    } else {
1029                        elements.push(Element::Text("[".to_string()));
1030                        remaining = &remaining[1..];
1031                    }
1032                }
1033                "inline_link" => {
1034                    if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1035                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1036                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1037                        elements.push(Element::Link {
1038                            text: text.to_string(),
1039                            url: url.to_string(),
1040                        });
1041                        remaining = &remaining[match_obj.end()..];
1042                    } else {
1043                        // Fallback - shouldn't happen
1044                        elements.push(Element::Text("[".to_string()));
1045                        remaining = &remaining[1..];
1046                    }
1047                }
1048                "ref_link" => {
1049                    if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1050                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1051                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1052
1053                        if reference.is_empty() {
1054                            // Empty reference link [text][]
1055                            elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1056                        } else {
1057                            // Regular reference link [text][ref]
1058                            elements.push(Element::ReferenceLink {
1059                                text: text.to_string(),
1060                                reference: reference.to_string(),
1061                            });
1062                        }
1063                        remaining = &remaining[match_obj.end()..];
1064                    } else {
1065                        // Fallback - shouldn't happen
1066                        elements.push(Element::Text("[".to_string()));
1067                        remaining = &remaining[1..];
1068                    }
1069                }
1070                "shortcut_ref" => {
1071                    if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1072                        let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1073                        elements.push(Element::ShortcutReference {
1074                            reference: reference.to_string(),
1075                        });
1076                        remaining = &remaining[match_obj.end()..];
1077                    } else {
1078                        // Fallback - shouldn't happen
1079                        elements.push(Element::Text("[".to_string()));
1080                        remaining = &remaining[1..];
1081                    }
1082                }
1083                "wiki_link" => {
1084                    if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
1085                        let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1086                        elements.push(Element::WikiLink(content.to_string()));
1087                        remaining = &remaining[match_obj.end()..];
1088                    } else {
1089                        elements.push(Element::Text("[[".to_string()));
1090                        remaining = &remaining[2..];
1091                    }
1092                }
1093                "display_math" => {
1094                    if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
1095                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1096                        elements.push(Element::DisplayMath(math.to_string()));
1097                        remaining = &remaining[match_obj.end()..];
1098                    } else {
1099                        elements.push(Element::Text("$$".to_string()));
1100                        remaining = &remaining[2..];
1101                    }
1102                }
1103                "inline_math" => {
1104                    if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1105                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1106                        elements.push(Element::InlineMath(math.to_string()));
1107                        remaining = &remaining[match_obj.end()..];
1108                    } else {
1109                        elements.push(Element::Text("$".to_string()));
1110                        remaining = &remaining[1..];
1111                    }
1112                }
1113                // Note: "strikethrough" case removed - now handled by pulldown-cmark
1114                "emoji" => {
1115                    if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1116                        let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1117                        elements.push(Element::EmojiShortcode(emoji.to_string()));
1118                        remaining = &remaining[match_obj.end()..];
1119                    } else {
1120                        elements.push(Element::Text(":".to_string()));
1121                        remaining = &remaining[1..];
1122                    }
1123                }
1124                "html_entity" => {
1125                    // HTML entities are captured whole - use as_str() to get just the matched content
1126                    elements.push(Element::HtmlEntity(match_obj.as_str().to_string()));
1127                    remaining = &remaining[match_obj.end()..];
1128                }
1129                "hugo_shortcode" => {
1130                    // Hugo shortcodes are atomic elements - preserve them exactly
1131                    elements.push(Element::HugoShortcode(match_obj.as_str().to_string()));
1132                    remaining = &remaining[match_obj.end()..];
1133                }
1134                "autolink" => {
1135                    // Autolinks are atomic elements - preserve them exactly
1136                    elements.push(Element::Autolink(match_obj.as_str().to_string()));
1137                    remaining = &remaining[match_obj.end()..];
1138                }
1139                "html_tag" => {
1140                    // HTML tags are captured whole - use as_str() to get just the matched content
1141                    elements.push(Element::HtmlTag(match_obj.as_str().to_string()));
1142                    remaining = &remaining[match_obj.end()..];
1143                }
1144                _ => {
1145                    // Unknown pattern, treat as text
1146                    elements.push(Element::Text("[".to_string()));
1147                    remaining = &remaining[1..];
1148                }
1149            }
1150        } else {
1151            // Process non-link special characters
1152
1153            // Add any text before the special character
1154            if next_special > 0 && next_special < remaining.len() {
1155                elements.push(Element::Text(remaining[..next_special].to_string()));
1156                remaining = &remaining[next_special..];
1157            }
1158
1159            // Process the special element
1160            match special_type {
1161                "code" => {
1162                    // Find end of code
1163                    if let Some(code_end) = remaining[1..].find('`') {
1164                        let code = &remaining[1..1 + code_end];
1165                        elements.push(Element::Code(code.to_string()));
1166                        remaining = &remaining[1 + code_end + 1..];
1167                    } else {
1168                        // No closing backtick, treat as text
1169                        elements.push(Element::Text(remaining.to_string()));
1170                        break;
1171                    }
1172                }
1173                "pulldown_emphasis" => {
1174                    // Use pre-extracted emphasis/strikethrough span from pulldown-cmark
1175                    if let Some(span) = pulldown_emphasis {
1176                        let span_len = span.end - span.start;
1177                        if span.is_strikethrough {
1178                            elements.push(Element::Strikethrough(span.content.clone()));
1179                        } else if span.is_strong {
1180                            elements.push(Element::Bold {
1181                                content: span.content.clone(),
1182                                underscore: span.uses_underscore,
1183                            });
1184                        } else {
1185                            elements.push(Element::Italic {
1186                                content: span.content.clone(),
1187                                underscore: span.uses_underscore,
1188                            });
1189                        }
1190                        remaining = &remaining[span_len..];
1191                    } else {
1192                        // Fallback - shouldn't happen
1193                        elements.push(Element::Text(remaining[..1].to_string()));
1194                        remaining = &remaining[1..];
1195                    }
1196                }
1197                _ => {
1198                    // No special elements found, add all remaining text
1199                    elements.push(Element::Text(remaining.to_string()));
1200                    break;
1201                }
1202            }
1203        }
1204    }
1205
1206    elements
1207}
1208
1209/// Reflow elements for sentence-per-line mode
1210fn reflow_elements_sentence_per_line(elements: &[Element], custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
1211    let abbreviations = get_abbreviations(custom_abbreviations);
1212    let mut lines = Vec::new();
1213    let mut current_line = String::new();
1214
1215    for (idx, element) in elements.iter().enumerate() {
1216        let element_str = format!("{element}");
1217
1218        // For text elements, split into sentences
1219        if let Element::Text(text) = element {
1220            // Simply append text - it already has correct spacing from tokenization
1221            let combined = format!("{current_line}{text}");
1222            // Use the pre-computed abbreviations set to avoid redundant computation
1223            let sentences = split_into_sentences_with_set(&combined, &abbreviations);
1224
1225            if sentences.len() > 1 {
1226                // We found sentence boundaries
1227                for (i, sentence) in sentences.iter().enumerate() {
1228                    if i == 0 {
1229                        // First sentence might continue from previous elements
1230                        // But check if it ends with an abbreviation
1231                        let trimmed = sentence.trim();
1232
1233                        if text_ends_with_abbreviation(trimmed, &abbreviations) {
1234                            // Don't emit yet - this sentence ends with abbreviation, continue accumulating
1235                            current_line = sentence.to_string();
1236                        } else {
1237                            // Normal case - emit the first sentence
1238                            lines.push(sentence.to_string());
1239                            current_line.clear();
1240                        }
1241                    } else if i == sentences.len() - 1 {
1242                        // Last sentence: check if it's complete or incomplete
1243                        let trimmed = sentence.trim();
1244                        let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1245
1246                        if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1247                            // Complete sentence - emit it immediately
1248                            lines.push(sentence.to_string());
1249                            current_line.clear();
1250                        } else {
1251                            // Incomplete sentence - save for next iteration
1252                            current_line = sentence.to_string();
1253                        }
1254                    } else {
1255                        // Complete sentences in the middle
1256                        lines.push(sentence.to_string());
1257                    }
1258                }
1259            } else {
1260                // Single sentence - check if it's complete
1261                let trimmed = combined.trim();
1262
1263                // If the combined result is only whitespace, don't accumulate it.
1264                // This prevents leading spaces on subsequent elements when lines
1265                // are joined with spaces during reflow iteration.
1266                if trimmed.is_empty() {
1267                    continue;
1268                }
1269
1270                let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1271
1272                if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1273                    // Complete single sentence - emit it
1274                    lines.push(trimmed.to_string());
1275                    current_line.clear();
1276                } else {
1277                    // Incomplete sentence - continue accumulating
1278                    current_line = combined;
1279                }
1280            }
1281        } else if let Element::Italic { content, underscore } = element {
1282            // Handle italic elements - may contain multiple sentences that need continuation
1283            let marker = if *underscore { "_" } else { "*" };
1284            handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1285        } else if let Element::Bold { content, underscore } = element {
1286            // Handle bold elements - may contain multiple sentences that need continuation
1287            let marker = if *underscore { "__" } else { "**" };
1288            handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1289        } else if let Element::Strikethrough(content) = element {
1290            // Handle strikethrough elements - may contain multiple sentences that need continuation
1291            handle_emphasis_sentence_split(content, "~~", &abbreviations, &mut current_line, &mut lines);
1292        } else {
1293            // Non-text, non-emphasis elements (Code, Links, etc.)
1294            // Check if this element is adjacent to the preceding text (no space between)
1295            let is_adjacent = if idx > 0 {
1296                match &elements[idx - 1] {
1297                    Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1298                    _ => true,
1299                }
1300            } else {
1301                false
1302            };
1303
1304            // Add space before element if needed, but not for adjacent elements
1305            if !is_adjacent
1306                && !current_line.is_empty()
1307                && !current_line.ends_with(' ')
1308                && !current_line.ends_with('(')
1309                && !current_line.ends_with('[')
1310            {
1311                current_line.push(' ');
1312            }
1313            current_line.push_str(&element_str);
1314        }
1315    }
1316
1317    // Add any remaining content
1318    if !current_line.is_empty() {
1319        lines.push(current_line.trim().to_string());
1320    }
1321    lines
1322}
1323
1324/// Handle splitting emphasis content at sentence boundaries while preserving markers
1325fn handle_emphasis_sentence_split(
1326    content: &str,
1327    marker: &str,
1328    abbreviations: &HashSet<String>,
1329    current_line: &mut String,
1330    lines: &mut Vec<String>,
1331) {
1332    // Split the emphasis content into sentences
1333    let sentences = split_into_sentences_with_set(content, abbreviations);
1334
1335    if sentences.len() <= 1 {
1336        // Single sentence or no boundaries - treat as atomic
1337        if !current_line.is_empty()
1338            && !current_line.ends_with(' ')
1339            && !current_line.ends_with('(')
1340            && !current_line.ends_with('[')
1341        {
1342            current_line.push(' ');
1343        }
1344        current_line.push_str(marker);
1345        current_line.push_str(content);
1346        current_line.push_str(marker);
1347
1348        // Check if the emphasis content ends with sentence punctuation - if so, emit
1349        let trimmed = content.trim();
1350        let ends_with_punct = ends_with_sentence_punct(trimmed);
1351        if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1352            lines.push(current_line.clone());
1353            current_line.clear();
1354        }
1355    } else {
1356        // Multiple sentences - each gets its own emphasis markers
1357        for (i, sentence) in sentences.iter().enumerate() {
1358            let trimmed = sentence.trim();
1359            if trimmed.is_empty() {
1360                continue;
1361            }
1362
1363            if i == 0 {
1364                // First sentence: combine with current_line and emit
1365                if !current_line.is_empty()
1366                    && !current_line.ends_with(' ')
1367                    && !current_line.ends_with('(')
1368                    && !current_line.ends_with('[')
1369                {
1370                    current_line.push(' ');
1371                }
1372                current_line.push_str(marker);
1373                current_line.push_str(trimmed);
1374                current_line.push_str(marker);
1375
1376                // Check if this is a complete sentence
1377                let ends_with_punct = ends_with_sentence_punct(trimmed);
1378                if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1379                    lines.push(current_line.clone());
1380                    current_line.clear();
1381                }
1382            } else if i == sentences.len() - 1 {
1383                // Last sentence: check if complete
1384                let ends_with_punct = ends_with_sentence_punct(trimmed);
1385
1386                let mut line = String::new();
1387                line.push_str(marker);
1388                line.push_str(trimmed);
1389                line.push_str(marker);
1390
1391                if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1392                    lines.push(line);
1393                } else {
1394                    // Incomplete - keep in current_line for potential continuation
1395                    *current_line = line;
1396                }
1397            } else {
1398                // Middle sentences: emit with markers
1399                let mut line = String::new();
1400                line.push_str(marker);
1401                line.push_str(trimmed);
1402                line.push_str(marker);
1403                lines.push(line);
1404            }
1405        }
1406    }
1407}
1408
1409/// English break-words used for semantic line break splitting.
1410/// These are conjunctions and relative pronouns where a line break
1411/// reads naturally.
1412const BREAK_WORDS: &[&str] = &[
1413    "and",
1414    "or",
1415    "but",
1416    "nor",
1417    "yet",
1418    "so",
1419    "for",
1420    "which",
1421    "that",
1422    "because",
1423    "when",
1424    "if",
1425    "while",
1426    "where",
1427    "although",
1428    "though",
1429    "unless",
1430    "since",
1431    "after",
1432    "before",
1433    "until",
1434    "as",
1435    "once",
1436    "whether",
1437    "however",
1438    "therefore",
1439    "moreover",
1440    "furthermore",
1441    "nevertheless",
1442    "whereas",
1443];
1444
1445/// Check if a character is clause punctuation for semantic line breaks
1446fn is_clause_punctuation(c: char) -> bool {
1447    matches!(c, ',' | ';' | ':' | '\u{2014}') // comma, semicolon, colon, em dash
1448}
1449
1450/// Compute element spans for a flat text representation of elements.
1451/// Returns Vec of (start, end) byte offsets for non-Text elements,
1452/// so we can check that a split position doesn't fall inside them.
1453fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1454    let mut spans = Vec::new();
1455    let mut offset = 0;
1456    for element in elements {
1457        let rendered = format!("{element}");
1458        let len = rendered.len();
1459        if !matches!(element, Element::Text(_)) {
1460            spans.push((offset, offset + len));
1461        }
1462        offset += len;
1463    }
1464    spans
1465}
1466
1467/// Check if a byte position falls inside any non-Text element span
1468fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1469    spans.iter().any(|(start, end)| pos > *start && pos < *end)
1470}
1471
1472/// Minimum fraction of line_length that the first part of a split must occupy.
1473/// Prevents awkwardly short first lines like "A," or "Note:" on their own.
1474const MIN_SPLIT_RATIO: f64 = 0.3;
1475
1476/// Split a line at the latest clause punctuation that keeps the first part
1477/// within `line_length`. Returns None if no valid split point exists or if
1478/// the split would create an unreasonably short first line.
1479fn split_at_clause_punctuation(
1480    text: &str,
1481    line_length: usize,
1482    element_spans: &[(usize, usize)],
1483    length_mode: ReflowLengthMode,
1484) -> Option<(String, String)> {
1485    let chars: Vec<char> = text.chars().collect();
1486    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1487
1488    // Find the char index where accumulated display width exceeds line_length
1489    let mut width_acc = 0;
1490    let mut search_end_char = 0;
1491    for (idx, &c) in chars.iter().enumerate() {
1492        let c_width = display_len(&c.to_string(), length_mode);
1493        if width_acc + c_width > line_length {
1494            break;
1495        }
1496        width_acc += c_width;
1497        search_end_char = idx + 1;
1498    }
1499
1500    let mut best_pos = None;
1501    for i in (0..search_end_char).rev() {
1502        if is_clause_punctuation(chars[i]) {
1503            // Convert char position to byte position for element span check
1504            let byte_pos: usize = chars[..=i].iter().map(|c| c.len_utf8()).sum();
1505            if !is_inside_element(byte_pos, element_spans) {
1506                best_pos = Some(i);
1507                break;
1508            }
1509        }
1510    }
1511
1512    let pos = best_pos?;
1513
1514    // Reject splits that create very short first lines
1515    let first: String = chars[..=pos].iter().collect();
1516    let first_display_len = display_len(&first, length_mode);
1517    if first_display_len < min_first_len {
1518        return None;
1519    }
1520
1521    // Split after the punctuation character
1522    let rest: String = chars[pos + 1..].iter().collect();
1523    let rest = rest.trim_start().to_string();
1524
1525    if rest.is_empty() {
1526        return None;
1527    }
1528
1529    Some((first, rest))
1530}
1531
1532/// Split a line before the latest break-word that keeps the first part
1533/// within `line_length`. Returns None if no valid split point exists or if
1534/// the split would create an unreasonably short first line.
1535fn split_at_break_word(
1536    text: &str,
1537    line_length: usize,
1538    element_spans: &[(usize, usize)],
1539    length_mode: ReflowLengthMode,
1540) -> Option<(String, String)> {
1541    let lower = text.to_lowercase();
1542    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1543    let mut best_split: Option<(usize, usize)> = None; // (byte_start, word_len_bytes)
1544
1545    for &word in BREAK_WORDS {
1546        let mut search_start = 0;
1547        while let Some(pos) = lower[search_start..].find(word) {
1548            let abs_pos = search_start + pos;
1549
1550            // Verify it's a word boundary: preceded by space, followed by space
1551            let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1552            let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1553
1554            if preceded_by_space && followed_by_space {
1555                // The break goes BEFORE the word, so first part ends at abs_pos - 1
1556                let first_part = text[..abs_pos].trim_end();
1557                let first_part_len = display_len(first_part, length_mode);
1558
1559                if first_part_len >= min_first_len
1560                    && first_part_len <= line_length
1561                    && !is_inside_element(abs_pos, element_spans)
1562                {
1563                    // Prefer the latest valid split point
1564                    if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1565                        best_split = Some((abs_pos, word.len()));
1566                    }
1567                }
1568            }
1569
1570            search_start = abs_pos + word.len();
1571        }
1572    }
1573
1574    let (byte_start, _word_len) = best_split?;
1575
1576    let first = text[..byte_start].trim_end().to_string();
1577    let rest = text[byte_start..].to_string();
1578
1579    if first.is_empty() || rest.trim().is_empty() {
1580        return None;
1581    }
1582
1583    Some((first, rest))
1584}
1585
1586/// Recursively cascade-split a line that exceeds line_length.
1587/// Tries clause punctuation first, then break-words, then word wrap.
1588fn cascade_split_line(
1589    text: &str,
1590    line_length: usize,
1591    abbreviations: &Option<Vec<String>>,
1592    length_mode: ReflowLengthMode,
1593) -> Vec<String> {
1594    if line_length == 0 || display_len(text, length_mode) <= line_length {
1595        return vec![text.to_string()];
1596    }
1597
1598    let elements = parse_markdown_elements(text);
1599    let element_spans = compute_element_spans(&elements);
1600
1601    // Try clause punctuation split
1602    if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
1603        let mut result = vec![first];
1604        result.extend(cascade_split_line(&rest, line_length, abbreviations, length_mode));
1605        return result;
1606    }
1607
1608    // Try break-word split
1609    if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
1610        let mut result = vec![first];
1611        result.extend(cascade_split_line(&rest, line_length, abbreviations, length_mode));
1612        return result;
1613    }
1614
1615    // Fallback: word wrap using existing reflow_elements
1616    let options = ReflowOptions {
1617        line_length,
1618        break_on_sentences: false,
1619        preserve_breaks: false,
1620        sentence_per_line: false,
1621        semantic_line_breaks: false,
1622        abbreviations: abbreviations.clone(),
1623        length_mode,
1624    };
1625    reflow_elements(&elements, &options)
1626}
1627
1628/// Reflow elements using semantic line breaks strategy:
1629/// 1. Split at sentence boundaries (always)
1630/// 2. For lines exceeding line_length, cascade through clause punct → break-words → word wrap
1631fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1632    // Step 1: Split into sentences using existing sentence-per-line logic
1633    let sentence_lines = reflow_elements_sentence_per_line(elements, &options.abbreviations);
1634
1635    // Step 2: For each sentence line, apply cascading splits if it exceeds line_length
1636    // When line_length is 0 (unlimited), skip cascading — sentence splits only
1637    if options.line_length == 0 {
1638        return sentence_lines;
1639    }
1640
1641    let length_mode = options.length_mode;
1642    let mut result = Vec::new();
1643    for line in sentence_lines {
1644        if display_len(&line, length_mode) <= options.line_length {
1645            result.push(line);
1646        } else {
1647            result.extend(cascade_split_line(
1648                &line,
1649                options.line_length,
1650                &options.abbreviations,
1651                length_mode,
1652            ));
1653        }
1654    }
1655
1656    // Step 3: Merge very short trailing lines back into the previous line.
1657    // Word wrap can produce lines like "was" or "see" on their own, which reads poorly.
1658    let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
1659    let mut merged: Vec<String> = Vec::with_capacity(result.len());
1660    for line in result {
1661        if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
1662            // Don't merge across sentence boundaries — sentence splits are intentional
1663            let prev_ends_at_sentence = {
1664                let trimmed = merged.last().unwrap().trim_end();
1665                trimmed
1666                    .chars()
1667                    .rev()
1668                    .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
1669                    .is_some_and(|c| matches!(c, '.' | '!' | '?'))
1670            };
1671
1672            if !prev_ends_at_sentence {
1673                let prev = merged.last_mut().unwrap();
1674                let combined = format!("{prev} {line}");
1675                // Only merge if the combined line fits within the limit
1676                if display_len(&combined, length_mode) <= options.line_length {
1677                    *prev = combined;
1678                    continue;
1679                }
1680            }
1681        }
1682        merged.push(line);
1683    }
1684    merged
1685}
1686
1687/// Find the last space in `line` that is safe to split at.
1688/// Safe spaces are those NOT inside rendered non-Text elements.
1689/// `element_spans` contains (start, end) byte ranges of non-Text elements in the line.
1690/// Find the last space in `line` that is not inside any element span.
1691/// Spans use exclusive bounds (pos > start && pos < end) because element
1692/// delimiters (e.g., `[`, `]`, `(`, `)`, `<`, `>`, `` ` ``) are never
1693/// spaces, so only interior positions need protection.
1694fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
1695    line.char_indices()
1696        .rev()
1697        .map(|(pos, _)| pos)
1698        .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
1699}
1700
1701/// Reflow elements into lines that fit within the line length
1702fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1703    let mut lines = Vec::new();
1704    let mut current_line = String::new();
1705    let mut current_length = 0;
1706    // Track byte spans of non-Text elements in current_line for safe splitting
1707    let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
1708    let length_mode = options.length_mode;
1709
1710    for (idx, element) in elements.iter().enumerate() {
1711        let element_str = format!("{element}");
1712        let element_len = element.display_width(length_mode);
1713
1714        // Determine adjacency from the original elements, not from current_line.
1715        // Elements are adjacent when there's no whitespace between them in the source:
1716        // - Text("v") → HugoShortcode("{{<...>}}") = adjacent (text has no trailing space)
1717        // - Text(" and ") → InlineLink("[a](url)") = NOT adjacent (text has trailing space)
1718        // - HugoShortcode("{{<...>}}") → Text(",") = adjacent (text has no leading space)
1719        let is_adjacent_to_prev = if idx > 0 {
1720            match (&elements[idx - 1], element) {
1721                (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1722                (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
1723                _ => true,
1724            }
1725        } else {
1726            false
1727        };
1728
1729        // For text elements that might need breaking
1730        if let Element::Text(text) = element {
1731            // Check if original text had leading whitespace
1732            let has_leading_space = text.starts_with(char::is_whitespace);
1733            // If this is a text element, always process it word by word
1734            let words: Vec<&str> = text.split_whitespace().collect();
1735
1736            for (i, word) in words.iter().enumerate() {
1737                let word_len = display_len(word, length_mode);
1738                // Check if this "word" is just punctuation that should stay attached
1739                let is_trailing_punct = word
1740                    .chars()
1741                    .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1742
1743                // First word of text adjacent to preceding non-text element
1744                // must stay attached (e.g., shortcode followed by punctuation or text)
1745                let is_first_adjacent = i == 0 && is_adjacent_to_prev;
1746
1747                if is_first_adjacent {
1748                    // Attach directly without space, preventing line break
1749                    if current_length + word_len > options.line_length && current_length > 0 {
1750                        // Would exceed — break before the adjacent group
1751                        // Use element-aware space search to avoid splitting inside links/code/etc.
1752                        if let Some(last_space) = rfind_safe_space(&current_line, &current_line_element_spans) {
1753                            let before = current_line[..last_space].trim_end().to_string();
1754                            let after = current_line[last_space + 1..].to_string();
1755                            lines.push(before);
1756                            current_line = format!("{after}{word}");
1757                            current_length = display_len(&current_line, length_mode);
1758                            current_line_element_spans.clear();
1759                        } else {
1760                            current_line.push_str(word);
1761                            current_length += word_len;
1762                        }
1763                    } else {
1764                        current_line.push_str(word);
1765                        current_length += word_len;
1766                    }
1767                } else if current_length > 0
1768                    && current_length + 1 + word_len > options.line_length
1769                    && !is_trailing_punct
1770                {
1771                    // Start a new line (but never for trailing punctuation)
1772                    lines.push(current_line.trim().to_string());
1773                    current_line = word.to_string();
1774                    current_length = word_len;
1775                    current_line_element_spans.clear();
1776                } else {
1777                    // Add word to current line
1778                    // Only add space if: we have content AND (this isn't the first word OR original had leading space)
1779                    // AND this isn't trailing punctuation (which attaches directly)
1780                    if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1781                        current_line.push(' ');
1782                        current_length += 1;
1783                    }
1784                    current_line.push_str(word);
1785                    current_length += word_len;
1786                }
1787            }
1788        } else {
1789            // For non-text elements (code, links, references), treat as atomic units
1790            // These should never be broken across lines
1791
1792            if is_adjacent_to_prev {
1793                // Adjacent to preceding text — attach directly without space
1794                if current_length + element_len > options.line_length {
1795                    // Would exceed limit — break before the adjacent word group
1796                    // Use element-aware space search to avoid splitting inside links/code/etc.
1797                    if let Some(last_space) = rfind_safe_space(&current_line, &current_line_element_spans) {
1798                        let before = current_line[..last_space].trim_end().to_string();
1799                        let after = current_line[last_space + 1..].to_string();
1800                        lines.push(before);
1801                        current_line = format!("{after}{element_str}");
1802                        current_length = display_len(&current_line, length_mode);
1803                        current_line_element_spans.clear();
1804                        // Record the element span in the new current_line
1805                        let start = after.len();
1806                        current_line_element_spans.push((start, start + element_str.len()));
1807                    } else {
1808                        // No safe space to break at — accept the long line
1809                        let start = current_line.len();
1810                        current_line.push_str(&element_str);
1811                        current_length += element_len;
1812                        current_line_element_spans.push((start, current_line.len()));
1813                    }
1814                } else {
1815                    let start = current_line.len();
1816                    current_line.push_str(&element_str);
1817                    current_length += element_len;
1818                    current_line_element_spans.push((start, current_line.len()));
1819                }
1820            } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
1821                // Not adjacent, would exceed — start new line
1822                lines.push(current_line.trim().to_string());
1823                current_line = element_str.clone();
1824                current_length = element_len;
1825                current_line_element_spans.clear();
1826                current_line_element_spans.push((0, element_str.len()));
1827            } else {
1828                // Not adjacent, fits — add with space
1829                let ends_with_opener =
1830                    current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
1831                if current_length > 0 && !ends_with_opener {
1832                    current_line.push(' ');
1833                    current_length += 1;
1834                }
1835                let start = current_line.len();
1836                current_line.push_str(&element_str);
1837                current_length += element_len;
1838                current_line_element_spans.push((start, current_line.len()));
1839            }
1840        }
1841    }
1842
1843    // Don't forget the last line
1844    if !current_line.is_empty() {
1845        lines.push(current_line.trim_end().to_string());
1846    }
1847
1848    lines
1849}
1850
1851/// Reflow markdown content preserving structure
1852pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
1853    let lines: Vec<&str> = content.lines().collect();
1854    let mut result = Vec::new();
1855    let mut i = 0;
1856
1857    while i < lines.len() {
1858        let line = lines[i];
1859        let trimmed = line.trim();
1860
1861        // Preserve empty lines
1862        if trimmed.is_empty() {
1863            result.push(String::new());
1864            i += 1;
1865            continue;
1866        }
1867
1868        // Preserve headings as-is
1869        if trimmed.starts_with('#') {
1870            result.push(line.to_string());
1871            i += 1;
1872            continue;
1873        }
1874
1875        // Preserve Quarto/Pandoc div markers (:::) as-is
1876        if trimmed.starts_with(":::") {
1877            result.push(line.to_string());
1878            i += 1;
1879            continue;
1880        }
1881
1882        // Preserve fenced code blocks
1883        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
1884            result.push(line.to_string());
1885            i += 1;
1886            // Copy lines until closing fence
1887            while i < lines.len() {
1888                result.push(lines[i].to_string());
1889                if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
1890                    i += 1;
1891                    break;
1892                }
1893                i += 1;
1894            }
1895            continue;
1896        }
1897
1898        // Preserve indented code blocks (4+ columns accounting for tab expansion)
1899        if ElementCache::calculate_indentation_width_default(line) >= 4 {
1900            // Collect all consecutive indented lines
1901            result.push(line.to_string());
1902            i += 1;
1903            while i < lines.len() {
1904                let next_line = lines[i];
1905                // Continue if next line is also indented or empty (empty lines in code blocks are ok)
1906                if ElementCache::calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
1907                    result.push(next_line.to_string());
1908                    i += 1;
1909                } else {
1910                    break;
1911                }
1912            }
1913            continue;
1914        }
1915
1916        // Preserve block quotes (but reflow their content)
1917        if trimmed.starts_with('>') {
1918            // find() returns byte position which is correct for str slicing
1919            // The unwrap is safe because we already verified trimmed starts with '>'
1920            let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
1921            let quote_prefix = line[0..gt_pos + 1].to_string();
1922            let quote_content = &line[quote_prefix.len()..].trim_start();
1923
1924            let reflowed = reflow_line(quote_content, options);
1925            for reflowed_line in reflowed.iter() {
1926                result.push(format!("{quote_prefix} {reflowed_line}"));
1927            }
1928            i += 1;
1929            continue;
1930        }
1931
1932        // Preserve horizontal rules first (before checking for lists)
1933        if is_horizontal_rule(trimmed) {
1934            result.push(line.to_string());
1935            i += 1;
1936            continue;
1937        }
1938
1939        // Preserve lists (but not horizontal rules)
1940        if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
1941            // Find the list marker and preserve indentation
1942            let indent = line.len() - line.trim_start().len();
1943            let indent_str = " ".repeat(indent);
1944
1945            // For numbered lists, find the period and the space after it
1946            // For bullet lists, find the marker and the space after it
1947            let mut marker_end = indent;
1948            let mut content_start = indent;
1949
1950            if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
1951                // Numbered list: find the period
1952                if let Some(period_pos) = line[indent..].find('.') {
1953                    marker_end = indent + period_pos + 1; // Include the period
1954                    content_start = marker_end;
1955                    // Skip any spaces after the period to find content start
1956                    // Use byte-based check since content_start is a byte index
1957                    // This is safe because space is ASCII (single byte)
1958                    while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1959                        content_start += 1;
1960                    }
1961                }
1962            } else {
1963                // Bullet list: marker is single character
1964                marker_end = indent + 1; // Just the marker character
1965                content_start = marker_end;
1966                // Skip any spaces after the marker
1967                // Use byte-based check since content_start is a byte index
1968                // This is safe because space is ASCII (single byte)
1969                while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1970                    content_start += 1;
1971                }
1972            }
1973
1974            let marker = &line[indent..marker_end];
1975
1976            // Collect all content for this list item (including continuation lines)
1977            // Preserve hard breaks (2 trailing spaces) while trimming excessive whitespace
1978            let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
1979            i += 1;
1980
1981            // Collect continuation lines (indented lines that are part of this list item)
1982            while i < lines.len() {
1983                let next_line = lines[i];
1984                let next_trimmed = next_line.trim();
1985
1986                // Stop if we hit an empty line or another list item or special block
1987                if is_block_boundary(next_trimmed) {
1988                    break;
1989                }
1990
1991                // Check if this line is indented (continuation of list item)
1992                let next_indent = next_line.len() - next_line.trim_start().len();
1993                if next_indent >= content_start {
1994                    // This is a continuation line - add its content
1995                    // Preserve hard breaks while trimming excessive whitespace
1996                    let trimmed_start = next_line.trim_start();
1997                    list_content.push(trim_preserving_hard_break(trimmed_start));
1998                    i += 1;
1999                } else {
2000                    // Not indented enough, not part of this list item
2001                    break;
2002                }
2003            }
2004
2005            // Join content, but respect hard breaks (lines ending with 2 spaces or backslash)
2006            // Hard breaks should prevent joining with the next line
2007            let combined_content = if options.preserve_breaks {
2008                list_content[0].clone()
2009            } else {
2010                // Check if any lines have hard breaks - if so, preserve the structure
2011                let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2012                if has_hard_breaks {
2013                    // Don't join lines with hard breaks - keep them separate with newlines
2014                    list_content.join("\n")
2015                } else {
2016                    // No hard breaks, safe to join with spaces
2017                    list_content.join(" ")
2018                }
2019            };
2020
2021            // Calculate the proper indentation for continuation lines
2022            let trimmed_marker = marker;
2023            let continuation_spaces = content_start;
2024
2025            // Adjust line length to account for list marker and space
2026            let prefix_length = indent + trimmed_marker.len() + 1;
2027
2028            // Create adjusted options with reduced line length
2029            let adjusted_options = ReflowOptions {
2030                line_length: options.line_length.saturating_sub(prefix_length),
2031                ..options.clone()
2032            };
2033
2034            let reflowed = reflow_line(&combined_content, &adjusted_options);
2035            for (j, reflowed_line) in reflowed.iter().enumerate() {
2036                if j == 0 {
2037                    result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2038                } else {
2039                    // Continuation lines aligned with text after marker
2040                    let continuation_indent = " ".repeat(continuation_spaces);
2041                    result.push(format!("{continuation_indent}{reflowed_line}"));
2042                }
2043            }
2044            continue;
2045        }
2046
2047        // Preserve tables
2048        if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2049            result.push(line.to_string());
2050            i += 1;
2051            continue;
2052        }
2053
2054        // Preserve reference definitions
2055        if trimmed.starts_with('[') && line.contains("]:") {
2056            result.push(line.to_string());
2057            i += 1;
2058            continue;
2059        }
2060
2061        // Preserve definition list items (extended markdown)
2062        if is_definition_list_item(trimmed) {
2063            result.push(line.to_string());
2064            i += 1;
2065            continue;
2066        }
2067
2068        // Check if this is a single line that doesn't need processing
2069        let mut is_single_line_paragraph = true;
2070        if i + 1 < lines.len() {
2071            let next_trimmed = lines[i + 1].trim();
2072            // Check if next line continues this paragraph
2073            if !is_block_boundary(next_trimmed) {
2074                is_single_line_paragraph = false;
2075            }
2076        }
2077
2078        // If it's a single line that fits, just add it as-is
2079        if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2080            result.push(line.to_string());
2081            i += 1;
2082            continue;
2083        }
2084
2085        // For regular paragraphs, collect consecutive lines
2086        let mut paragraph_parts = Vec::new();
2087        let mut current_part = vec![line];
2088        i += 1;
2089
2090        // If preserve_breaks is true, treat each line separately
2091        if options.preserve_breaks {
2092            // Don't collect consecutive lines - just reflow this single line
2093            let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2094                Some("\\")
2095            } else if line.ends_with("  ") {
2096                Some("  ")
2097            } else {
2098                None
2099            };
2100            let reflowed = reflow_line(line, options);
2101
2102            // Preserve hard breaks (two trailing spaces or backslash)
2103            if let Some(break_marker) = hard_break_type {
2104                if !reflowed.is_empty() {
2105                    let mut reflowed_with_break = reflowed;
2106                    let last_idx = reflowed_with_break.len() - 1;
2107                    if !has_hard_break(&reflowed_with_break[last_idx]) {
2108                        reflowed_with_break[last_idx].push_str(break_marker);
2109                    }
2110                    result.extend(reflowed_with_break);
2111                }
2112            } else {
2113                result.extend(reflowed);
2114            }
2115        } else {
2116            // Original behavior: collect consecutive lines into a paragraph
2117            while i < lines.len() {
2118                let prev_line = if !current_part.is_empty() {
2119                    current_part.last().unwrap()
2120                } else {
2121                    ""
2122                };
2123                let next_line = lines[i];
2124                let next_trimmed = next_line.trim();
2125
2126                // Stop at empty lines or special blocks
2127                if is_block_boundary(next_trimmed) {
2128                    break;
2129                }
2130
2131                // Check if previous line ends with hard break (two spaces or backslash)
2132                // or is a complete sentence in sentence_per_line mode
2133                let prev_trimmed = prev_line.trim();
2134                let abbreviations = get_abbreviations(&options.abbreviations);
2135                let ends_with_sentence = (prev_trimmed.ends_with('.')
2136                    || prev_trimmed.ends_with('!')
2137                    || prev_trimmed.ends_with('?')
2138                    || prev_trimmed.ends_with(".*")
2139                    || prev_trimmed.ends_with("!*")
2140                    || prev_trimmed.ends_with("?*")
2141                    || prev_trimmed.ends_with("._")
2142                    || prev_trimmed.ends_with("!_")
2143                    || prev_trimmed.ends_with("?_")
2144                    // Quote-terminated sentences (straight and curly quotes)
2145                    || prev_trimmed.ends_with(".\"")
2146                    || prev_trimmed.ends_with("!\"")
2147                    || prev_trimmed.ends_with("?\"")
2148                    || prev_trimmed.ends_with(".'")
2149                    || prev_trimmed.ends_with("!'")
2150                    || prev_trimmed.ends_with("?'")
2151                    || prev_trimmed.ends_with(".\u{201D}")
2152                    || prev_trimmed.ends_with("!\u{201D}")
2153                    || prev_trimmed.ends_with("?\u{201D}")
2154                    || prev_trimmed.ends_with(".\u{2019}")
2155                    || prev_trimmed.ends_with("!\u{2019}")
2156                    || prev_trimmed.ends_with("?\u{2019}"))
2157                    && !text_ends_with_abbreviation(
2158                        prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2159                        &abbreviations,
2160                    );
2161
2162                if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2163                    // Start a new part after hard break or complete sentence
2164                    paragraph_parts.push(current_part.join(" "));
2165                    current_part = vec![next_line];
2166                } else {
2167                    current_part.push(next_line);
2168                }
2169                i += 1;
2170            }
2171
2172            // Add the last part
2173            if !current_part.is_empty() {
2174                if current_part.len() == 1 {
2175                    // Single line, don't add trailing space
2176                    paragraph_parts.push(current_part[0].to_string());
2177                } else {
2178                    paragraph_parts.push(current_part.join(" "));
2179                }
2180            }
2181
2182            // Reflow each part separately, preserving hard breaks
2183            for (j, part) in paragraph_parts.iter().enumerate() {
2184                let reflowed = reflow_line(part, options);
2185                result.extend(reflowed);
2186
2187                // Preserve hard break by ensuring last line of part ends with hard break marker
2188                // Use two spaces as the default hard break format for reflows
2189                // But don't add hard breaks in sentence_per_line mode - lines are already separate
2190                if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2191                    let last_idx = result.len() - 1;
2192                    if !has_hard_break(&result[last_idx]) {
2193                        result[last_idx].push_str("  ");
2194                    }
2195                }
2196            }
2197        }
2198    }
2199
2200    // Preserve trailing newline if the original content had one
2201    let result_text = result.join("\n");
2202    if content.ends_with('\n') && !result_text.ends_with('\n') {
2203        format!("{result_text}\n")
2204    } else {
2205        result_text
2206    }
2207}
2208
2209/// Information about a reflowed paragraph
2210#[derive(Debug, Clone)]
2211pub struct ParagraphReflow {
2212    /// Starting byte offset of the paragraph in the original content
2213    pub start_byte: usize,
2214    /// Ending byte offset of the paragraph in the original content
2215    pub end_byte: usize,
2216    /// The reflowed text for this paragraph
2217    pub reflowed_text: String,
2218}
2219
2220/// Reflow a single paragraph at the specified line number
2221///
2222/// This function finds the paragraph containing the given line number,
2223/// reflows it according to the specified line length, and returns
2224/// information about the paragraph location and its reflowed text.
2225///
2226/// # Arguments
2227///
2228/// * `content` - The full document content
2229/// * `line_number` - The 1-based line number within the paragraph to reflow
2230/// * `line_length` - The target line length for reflowing
2231///
2232/// # Returns
2233///
2234/// Returns `Some(ParagraphReflow)` if a paragraph was found and reflowed,
2235/// or `None` if the line number is out of bounds or the content at that
2236/// line shouldn't be reflowed (e.g., code blocks, headings, etc.)
2237pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
2238    reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
2239}
2240
2241/// Reflow a paragraph at the given line with a specific length mode.
2242pub fn reflow_paragraph_at_line_with_mode(
2243    content: &str,
2244    line_number: usize,
2245    line_length: usize,
2246    length_mode: ReflowLengthMode,
2247) -> Option<ParagraphReflow> {
2248    if line_number == 0 {
2249        return None;
2250    }
2251
2252    let lines: Vec<&str> = content.lines().collect();
2253
2254    // Check if line number is valid (1-based)
2255    if line_number > lines.len() {
2256        return None;
2257    }
2258
2259    let target_idx = line_number - 1; // Convert to 0-based
2260    let target_line = lines[target_idx];
2261    let trimmed = target_line.trim();
2262
2263    // Don't reflow special blocks
2264    if is_paragraph_boundary(trimmed, target_line) {
2265        return None;
2266    }
2267
2268    // Find paragraph start - scan backward until blank line or special block
2269    let mut para_start = target_idx;
2270    while para_start > 0 {
2271        let prev_idx = para_start - 1;
2272        let prev_line = lines[prev_idx];
2273        let prev_trimmed = prev_line.trim();
2274
2275        // Stop at blank line or special blocks
2276        if is_paragraph_boundary(prev_trimmed, prev_line) {
2277            break;
2278        }
2279
2280        para_start = prev_idx;
2281    }
2282
2283    // Find paragraph end - scan forward until blank line or special block
2284    let mut para_end = target_idx;
2285    while para_end + 1 < lines.len() {
2286        let next_idx = para_end + 1;
2287        let next_line = lines[next_idx];
2288        let next_trimmed = next_line.trim();
2289
2290        // Stop at blank line or special blocks
2291        if is_paragraph_boundary(next_trimmed, next_line) {
2292            break;
2293        }
2294
2295        para_end = next_idx;
2296    }
2297
2298    // Extract paragraph lines
2299    let paragraph_lines = &lines[para_start..=para_end];
2300
2301    // Calculate byte offsets
2302    let mut start_byte = 0;
2303    for line in lines.iter().take(para_start) {
2304        start_byte += line.len() + 1; // +1 for newline
2305    }
2306
2307    let mut end_byte = start_byte;
2308    for line in paragraph_lines.iter() {
2309        end_byte += line.len() + 1; // +1 for newline
2310    }
2311
2312    // Track whether the byte range includes a trailing newline
2313    // (it doesn't if this is the last line and the file doesn't end with newline)
2314    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2315
2316    // Adjust end_byte if the last line doesn't have a newline
2317    if !includes_trailing_newline {
2318        end_byte -= 1;
2319    }
2320
2321    // Join paragraph lines and reflow
2322    let paragraph_text = paragraph_lines.join("\n");
2323
2324    // Create reflow options
2325    let options = ReflowOptions {
2326        line_length,
2327        break_on_sentences: true,
2328        preserve_breaks: false,
2329        sentence_per_line: false,
2330        semantic_line_breaks: false,
2331        abbreviations: None,
2332        length_mode,
2333    };
2334
2335    // Reflow the paragraph using reflow_markdown to handle it properly
2336    let reflowed = reflow_markdown(&paragraph_text, &options);
2337
2338    // Ensure reflowed text matches whether the byte range includes a trailing newline
2339    // This is critical: if the range includes a newline, the replacement must too,
2340    // otherwise the next line will get appended to the reflowed paragraph
2341    let reflowed_text = if includes_trailing_newline {
2342        // Range includes newline - ensure reflowed text has one
2343        if reflowed.ends_with('\n') {
2344            reflowed
2345        } else {
2346            format!("{reflowed}\n")
2347        }
2348    } else {
2349        // Range doesn't include newline - ensure reflowed text doesn't have one
2350        if reflowed.ends_with('\n') {
2351            reflowed.trim_end_matches('\n').to_string()
2352        } else {
2353            reflowed
2354        }
2355    };
2356
2357    Some(ParagraphReflow {
2358        start_byte,
2359        end_byte,
2360        reflowed_text,
2361    })
2362}
2363
2364#[cfg(test)]
2365mod tests {
2366    use super::*;
2367
2368    /// Unit test for private helper function text_ends_with_abbreviation()
2369    ///
2370    /// This test stays inline because it tests a private function.
2371    /// All other tests (public API, integration tests) are in tests/utils/text_reflow_test.rs
2372    #[test]
2373    fn test_helper_function_text_ends_with_abbreviation() {
2374        // Test the helper function directly
2375        let abbreviations = get_abbreviations(&None);
2376
2377        // True cases - built-in abbreviations (titles and i.e./e.g.)
2378        assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
2379        assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
2380        assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
2381        assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
2382        assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
2383        assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
2384        assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
2385        assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
2386
2387        // False cases - NOT in built-in list (etc doesn't always have period)
2388        assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
2389        assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
2390        assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
2391        assert!(!text_ends_with_abbreviation("items.", &abbreviations));
2392        assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
2393        assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); // question mark, not period
2394        assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); // exclamation, not period
2395        assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); // question mark
2396        assert!(!text_ends_with_abbreviation("word", &abbreviations)); // no punctuation
2397        assert!(!text_ends_with_abbreviation("", &abbreviations)); // empty string
2398    }
2399
2400    #[test]
2401    fn test_is_unordered_list_marker() {
2402        // Valid unordered list markers
2403        assert!(is_unordered_list_marker("- item"));
2404        assert!(is_unordered_list_marker("* item"));
2405        assert!(is_unordered_list_marker("+ item"));
2406        assert!(is_unordered_list_marker("-")); // lone marker
2407        assert!(is_unordered_list_marker("*"));
2408        assert!(is_unordered_list_marker("+"));
2409
2410        // Not list markers
2411        assert!(!is_unordered_list_marker("---")); // horizontal rule
2412        assert!(!is_unordered_list_marker("***")); // horizontal rule
2413        assert!(!is_unordered_list_marker("- - -")); // horizontal rule
2414        assert!(!is_unordered_list_marker("* * *")); // horizontal rule
2415        assert!(!is_unordered_list_marker("*emphasis*")); // emphasis, not list
2416        assert!(!is_unordered_list_marker("-word")); // no space after marker
2417        assert!(!is_unordered_list_marker("")); // empty
2418        assert!(!is_unordered_list_marker("text")); // plain text
2419        assert!(!is_unordered_list_marker("# heading")); // heading
2420    }
2421
2422    #[test]
2423    fn test_is_block_boundary() {
2424        // Block boundaries
2425        assert!(is_block_boundary("")); // empty line
2426        assert!(is_block_boundary("# Heading")); // ATX heading
2427        assert!(is_block_boundary("## Level 2")); // ATX heading
2428        assert!(is_block_boundary("```rust")); // code fence
2429        assert!(is_block_boundary("~~~")); // tilde code fence
2430        assert!(is_block_boundary("> quote")); // blockquote
2431        assert!(is_block_boundary("| cell |")); // table
2432        assert!(is_block_boundary("[link]: http://example.com")); // reference def
2433        assert!(is_block_boundary("---")); // horizontal rule
2434        assert!(is_block_boundary("***")); // horizontal rule
2435        assert!(is_block_boundary("- item")); // unordered list
2436        assert!(is_block_boundary("* item")); // unordered list
2437        assert!(is_block_boundary("+ item")); // unordered list
2438        assert!(is_block_boundary("1. item")); // ordered list
2439        assert!(is_block_boundary("10. item")); // ordered list
2440        assert!(is_block_boundary(": definition")); // definition list
2441        assert!(is_block_boundary(":::")); // div marker
2442        assert!(is_block_boundary("::::: {.callout-note}")); // div marker with attrs
2443
2444        // NOT block boundaries (paragraph continuation)
2445        assert!(!is_block_boundary("regular text"));
2446        assert!(!is_block_boundary("*emphasis*")); // emphasis, not list
2447        assert!(!is_block_boundary("[link](url)")); // inline link, not reference def
2448        assert!(!is_block_boundary("some words here"));
2449    }
2450
2451    #[test]
2452    fn test_definition_list_boundary_in_single_line_paragraph() {
2453        // Verifies that a definition list item after a single-line paragraph
2454        // is treated as a block boundary, not merged into the paragraph
2455        let options = ReflowOptions {
2456            line_length: 80,
2457            ..Default::default()
2458        };
2459        let input = "Term\n: Definition of the term";
2460        let result = reflow_markdown(input, &options);
2461        // The definition list marker should remain on its own line
2462        assert!(
2463            result.contains(": Definition"),
2464            "Definition list item should not be merged into previous line. Got: {result:?}"
2465        );
2466        let lines: Vec<&str> = result.lines().collect();
2467        assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
2468        assert_eq!(lines[0], "Term");
2469        assert_eq!(lines[1], ": Definition of the term");
2470    }
2471
2472    #[test]
2473    fn test_is_paragraph_boundary() {
2474        // Core block boundary checks are inherited
2475        assert!(is_paragraph_boundary("# Heading", "# Heading"));
2476        assert!(is_paragraph_boundary("- item", "- item"));
2477        assert!(is_paragraph_boundary(":::", ":::"));
2478        assert!(is_paragraph_boundary(": definition", ": definition"));
2479
2480        // Indented code blocks (≥4 spaces or tab)
2481        assert!(is_paragraph_boundary("code", "    code"));
2482        assert!(is_paragraph_boundary("code", "\tcode"));
2483
2484        // Table rows via is_potential_table_row
2485        assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
2486        assert!(is_paragraph_boundary("a | b", "a | b")); // pipe-delimited without leading pipe
2487
2488        // Not paragraph boundaries
2489        assert!(!is_paragraph_boundary("regular text", "regular text"));
2490        assert!(!is_paragraph_boundary("text", "  text")); // 2-space indent is not code
2491    }
2492
2493    #[test]
2494    fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
2495        // Verifies that div markers (:::) are treated as paragraph boundaries
2496        // in reflow_paragraph_at_line, preventing reflow across div boundaries
2497        let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
2498        // Line 3 is the div marker — should not be reflowed
2499        let result = reflow_paragraph_at_line(content, 3, 80);
2500        assert!(result.is_none(), "Div marker line should not be reflowed");
2501    }
2502}