Skip to main content

rumdl_lib/utils/
text_reflow.rs

1//! Text reflow utilities for MD013
2//!
3//! This module implements text wrapping/reflow functionality that preserves
4//! Markdown elements like links, emphasis, code spans, etc.
5
6use crate::utils::element_cache::ElementCache;
7use crate::utils::is_definition_list_item;
8use crate::utils::regex_cache::{
9    DISPLAY_MATH_REGEX, EMAIL_PATTERN, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
10    HUGO_SHORTCODE_REGEX, INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX,
11    LINKED_IMAGE_INLINE_INLINE, LINKED_IMAGE_INLINE_REF, LINKED_IMAGE_REF_INLINE, LINKED_IMAGE_REF_REF,
12    REF_IMAGE_REGEX, REF_LINK_REGEX, SHORTCUT_REF_REGEX, WIKI_LINK_REGEX,
13};
14use crate::utils::sentence_utils::{
15    get_abbreviations, is_cjk_char, is_cjk_sentence_ending, is_closing_quote, is_opening_quote,
16    text_ends_with_abbreviation,
17};
18use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
19use std::collections::HashSet;
20use unicode_width::UnicodeWidthStr;
21
22/// Length calculation mode for reflow
23#[derive(Clone, Copy, Debug, Default, PartialEq)]
24pub enum ReflowLengthMode {
25    /// Count Unicode characters (grapheme clusters)
26    Chars,
27    /// Count visual display width (CJK = 2 columns, emoji = 2, etc.)
28    #[default]
29    Visual,
30    /// Count raw bytes
31    Bytes,
32}
33
34/// Calculate the display length of a string based on the length mode
35fn display_len(s: &str, mode: ReflowLengthMode) -> usize {
36    match mode {
37        ReflowLengthMode::Chars => s.chars().count(),
38        ReflowLengthMode::Visual => s.width(),
39        ReflowLengthMode::Bytes => s.len(),
40    }
41}
42
43/// Options for reflowing text
44#[derive(Clone)]
45pub struct ReflowOptions {
46    /// Target line length
47    pub line_length: usize,
48    /// Whether to break on sentence boundaries when possible
49    pub break_on_sentences: bool,
50    /// Whether to preserve existing line breaks in paragraphs
51    pub preserve_breaks: bool,
52    /// Whether to enforce one sentence per line
53    pub sentence_per_line: bool,
54    /// Whether to use semantic line breaks (cascading split strategy)
55    pub semantic_line_breaks: bool,
56    /// Custom abbreviations for sentence detection
57    /// Periods are optional - both "Dr" and "Dr." work the same
58    /// Custom abbreviations are always added to the built-in defaults
59    pub abbreviations: Option<Vec<String>>,
60    /// How to measure string length for line-length comparisons
61    pub length_mode: ReflowLengthMode,
62}
63
64impl Default for ReflowOptions {
65    fn default() -> Self {
66        Self {
67            line_length: 80,
68            break_on_sentences: true,
69            preserve_breaks: false,
70            sentence_per_line: false,
71            semantic_line_breaks: false,
72            abbreviations: None,
73            length_mode: ReflowLengthMode::default(),
74        }
75    }
76}
77
78/// Detect if a character position is a sentence boundary
79/// Based on the approach from github.com/JoshuaKGoldberg/sentences-per-line
80/// Supports both ASCII punctuation (. ! ?) and CJK punctuation (。 ! ?)
81fn is_sentence_boundary(text: &str, pos: usize, abbreviations: &HashSet<String>) -> bool {
82    let chars: Vec<char> = text.chars().collect();
83
84    if pos + 1 >= chars.len() {
85        return false;
86    }
87
88    let c = chars[pos];
89    let next_char = chars[pos + 1];
90
91    // Check for CJK sentence-ending punctuation (。, !, ?)
92    // CJK punctuation doesn't require space or uppercase after it
93    if is_cjk_sentence_ending(c) {
94        // Skip any trailing emphasis/strikethrough markers
95        let mut after_punct_pos = pos + 1;
96        while after_punct_pos < chars.len()
97            && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
98        {
99            after_punct_pos += 1;
100        }
101
102        // Skip whitespace
103        while after_punct_pos < chars.len() && chars[after_punct_pos].is_whitespace() {
104            after_punct_pos += 1;
105        }
106
107        // Check if we have more content (any non-whitespace)
108        if after_punct_pos >= chars.len() {
109            return false;
110        }
111
112        // Skip leading emphasis/strikethrough markers
113        while after_punct_pos < chars.len()
114            && (chars[after_punct_pos] == '*' || chars[after_punct_pos] == '_' || chars[after_punct_pos] == '~')
115        {
116            after_punct_pos += 1;
117        }
118
119        if after_punct_pos >= chars.len() {
120            return false;
121        }
122
123        // For CJK, we accept any character as the start of the next sentence
124        // (no uppercase requirement, since CJK doesn't have case)
125        return true;
126    }
127
128    // Check for ASCII sentence-ending punctuation
129    if c != '.' && c != '!' && c != '?' {
130        return false;
131    }
132
133    // Must be followed by space, closing quote, or emphasis/strikethrough marker followed by space
134    let (_space_pos, after_space_pos) = if next_char == ' ' {
135        // Normal case: punctuation followed by space
136        (pos + 1, pos + 2)
137    } else if is_closing_quote(next_char) && pos + 2 < chars.len() {
138        // Sentence ends with quote - check what follows the quote
139        if chars[pos + 2] == ' ' {
140            // Just quote followed by space: 'sentence." '
141            (pos + 2, pos + 3)
142        } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_') && pos + 3 < chars.len() && chars[pos + 3] == ' ' {
143            // Quote followed by emphasis: 'sentence."* '
144            (pos + 3, pos + 4)
145        } else if (chars[pos + 2] == '*' || chars[pos + 2] == '_')
146            && pos + 4 < chars.len()
147            && chars[pos + 3] == chars[pos + 2]
148            && chars[pos + 4] == ' '
149        {
150            // Quote followed by bold: 'sentence."** '
151            (pos + 4, pos + 5)
152        } else {
153            return false;
154        }
155    } else if (next_char == '*' || next_char == '_') && pos + 2 < chars.len() && chars[pos + 2] == ' ' {
156        // Sentence ends with emphasis: "sentence.* " or "sentence._ "
157        (pos + 2, pos + 3)
158    } else if (next_char == '*' || next_char == '_')
159        && pos + 3 < chars.len()
160        && chars[pos + 2] == next_char
161        && chars[pos + 3] == ' '
162    {
163        // Sentence ends with bold: "sentence.** " or "sentence.__ "
164        (pos + 3, pos + 4)
165    } else if next_char == '~' && pos + 3 < chars.len() && chars[pos + 2] == '~' && chars[pos + 3] == ' ' {
166        // Sentence ends with strikethrough: "sentence.~~ "
167        (pos + 3, pos + 4)
168    } else {
169        return false;
170    };
171
172    // Skip all whitespace after the space to find the start of the next sentence
173    let mut next_char_pos = after_space_pos;
174    while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
175        next_char_pos += 1;
176    }
177
178    // Check if we reached the end of the string
179    if next_char_pos >= chars.len() {
180        return false;
181    }
182
183    // Skip leading emphasis/strikethrough markers and opening quotes to find the actual first letter
184    let mut first_letter_pos = next_char_pos;
185    while first_letter_pos < chars.len()
186        && (chars[first_letter_pos] == '*'
187            || chars[first_letter_pos] == '_'
188            || chars[first_letter_pos] == '~'
189            || is_opening_quote(chars[first_letter_pos]))
190    {
191        first_letter_pos += 1;
192    }
193
194    // Check if we reached the end after skipping emphasis
195    if first_letter_pos >= chars.len() {
196        return false;
197    }
198
199    // First character of next sentence must be uppercase or CJK
200    let first_char = chars[first_letter_pos];
201    if !first_char.is_uppercase() && !is_cjk_char(first_char) {
202        return false;
203    }
204
205    // Look back to check for common abbreviations (only applies to periods)
206    if pos > 0 && c == '.' {
207        // Convert char index to byte offset for string slicing
208        let byte_offset: usize = chars[..=pos].iter().map(|ch| ch.len_utf8()).sum();
209        if text_ends_with_abbreviation(&text[..byte_offset], abbreviations) {
210            return false;
211        }
212
213        // Check for decimal numbers (e.g., "3.14")
214        // Make sure to check if first_letter_pos is within bounds
215        if chars[pos - 1].is_numeric() && first_letter_pos < chars.len() && chars[first_letter_pos].is_numeric() {
216            return false;
217        }
218    }
219    true
220}
221
222/// Split text into sentences
223pub fn split_into_sentences(text: &str) -> Vec<String> {
224    split_into_sentences_custom(text, &None)
225}
226
227/// Split text into sentences with custom abbreviations
228pub fn split_into_sentences_custom(text: &str, custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
229    let abbreviations = get_abbreviations(custom_abbreviations);
230    split_into_sentences_with_set(text, &abbreviations)
231}
232
233/// Internal function to split text into sentences with a pre-computed abbreviations set
234/// Use this when calling multiple times in a loop to avoid repeatedly computing the set
235fn split_into_sentences_with_set(text: &str, abbreviations: &HashSet<String>) -> Vec<String> {
236    let mut sentences = Vec::new();
237    let mut current_sentence = String::new();
238    let mut chars = text.chars().peekable();
239    let mut pos = 0;
240
241    while let Some(c) = chars.next() {
242        current_sentence.push(c);
243
244        if is_sentence_boundary(text, pos, abbreviations) {
245            // Consume any trailing emphasis/strikethrough markers and quotes (they belong to the current sentence)
246            while let Some(&next) = chars.peek() {
247                if next == '*' || next == '_' || next == '~' || is_closing_quote(next) {
248                    current_sentence.push(chars.next().unwrap());
249                    pos += 1;
250                } else {
251                    break;
252                }
253            }
254
255            // Consume the space after the sentence
256            if chars.peek() == Some(&' ') {
257                chars.next();
258                pos += 1;
259            }
260
261            sentences.push(current_sentence.trim().to_string());
262            current_sentence.clear();
263        }
264
265        pos += 1;
266    }
267
268    // Add any remaining text as the last sentence
269    if !current_sentence.trim().is_empty() {
270        sentences.push(current_sentence.trim().to_string());
271    }
272    sentences
273}
274
275/// Check if a line is a horizontal rule (---, ___, ***)
276fn is_horizontal_rule(line: &str) -> bool {
277    if line.len() < 3 {
278        return false;
279    }
280
281    // Check if line consists only of -, _, or * characters (at least 3)
282    let chars: Vec<char> = line.chars().collect();
283    if chars.is_empty() {
284        return false;
285    }
286
287    let first_char = chars[0];
288    if first_char != '-' && first_char != '_' && first_char != '*' {
289        return false;
290    }
291
292    // All characters should be the same (allowing spaces between)
293    for c in &chars {
294        if *c != first_char && *c != ' ' {
295            return false;
296        }
297    }
298
299    // Count non-space characters
300    let non_space_count = chars.iter().filter(|c| **c != ' ').count();
301    non_space_count >= 3
302}
303
304/// Check if a line is a numbered list item (e.g., "1. ", "10. ")
305fn is_numbered_list_item(line: &str) -> bool {
306    let mut chars = line.chars();
307
308    // Must start with a digit
309    if !chars.next().is_some_and(|c| c.is_numeric()) {
310        return false;
311    }
312
313    // Can have more digits
314    while let Some(c) = chars.next() {
315        if c == '.' {
316            // After period, must have a space (consistent with list marker extraction)
317            // "2019." alone is NOT treated as a list item to avoid false positives
318            return chars.next() == Some(' ');
319        }
320        if !c.is_numeric() {
321            return false;
322        }
323    }
324
325    false
326}
327
328/// Check if a trimmed line is an unordered list item (-, *, + followed by space)
329fn is_unordered_list_marker(s: &str) -> bool {
330    matches!(s.as_bytes().first(), Some(b'-' | b'*' | b'+'))
331        && !is_horizontal_rule(s)
332        && (s.len() == 1 || s.as_bytes().get(1) == Some(&b' '))
333}
334
335/// Shared structural checks for block boundary detection.
336/// Checks elements that only depend on the trimmed line content.
337fn is_block_boundary_core(trimmed: &str) -> bool {
338    trimmed.is_empty()
339        || trimmed.starts_with('#')
340        || trimmed.starts_with("```")
341        || trimmed.starts_with("~~~")
342        || trimmed.starts_with('>')
343        || (trimmed.starts_with('[') && trimmed.contains("]:"))
344        || is_horizontal_rule(trimmed)
345        || is_unordered_list_marker(trimmed)
346        || is_numbered_list_item(trimmed)
347        || is_definition_list_item(trimmed)
348        || trimmed.starts_with(":::")
349}
350
351/// Check if a trimmed line starts a new structural block element.
352/// Used for paragraph boundary detection in `reflow_markdown()`.
353fn is_block_boundary(trimmed: &str) -> bool {
354    is_block_boundary_core(trimmed) || trimmed.starts_with('|')
355}
356
357/// Check if a line starts a new structural block for paragraph boundary detection
358/// in `reflow_paragraph_at_line()`. Extends the core checks with indented code blocks
359/// (≥4 spaces) and table row detection via `is_potential_table_row`.
360fn is_paragraph_boundary(trimmed: &str, line: &str) -> bool {
361    is_block_boundary_core(trimmed)
362        || ElementCache::calculate_indentation_width_default(line) >= 4
363        || crate::utils::table_utils::TableUtils::is_potential_table_row(line)
364}
365
366/// Check if a line ends with a hard break (either two spaces or backslash)
367///
368/// CommonMark supports two formats for hard line breaks:
369/// 1. Two or more trailing spaces
370/// 2. A backslash at the end of the line
371fn has_hard_break(line: &str) -> bool {
372    let line = line.strip_suffix('\r').unwrap_or(line);
373    line.ends_with("  ") || line.ends_with('\\')
374}
375
376/// Check if text ends with sentence-terminating punctuation (. ! ?)
377fn ends_with_sentence_punct(text: &str) -> bool {
378    text.ends_with('.') || text.ends_with('!') || text.ends_with('?')
379}
380
381/// Trim trailing whitespace while preserving hard breaks (two trailing spaces or backslash)
382///
383/// Hard breaks in Markdown can be indicated by:
384/// 1. Two trailing spaces before a newline (traditional)
385/// 2. A backslash at the end of the line (mdformat style)
386fn trim_preserving_hard_break(s: &str) -> String {
387    // Strip trailing \r from CRLF line endings first to handle Windows files
388    let s = s.strip_suffix('\r').unwrap_or(s);
389
390    // Check for backslash hard break (mdformat style)
391    if s.ends_with('\\') {
392        // Preserve the backslash exactly as-is
393        return s.to_string();
394    }
395
396    // Check if there are at least 2 trailing spaces (traditional hard break)
397    if s.ends_with("  ") {
398        // Find the position where non-space content ends
399        let content_end = s.trim_end().len();
400        if content_end == 0 {
401            // String is all whitespace
402            return String::new();
403        }
404        // Preserve exactly 2 trailing spaces for hard break
405        format!("{}  ", &s[..content_end])
406    } else {
407        // No hard break, just trim all trailing whitespace
408        s.trim_end().to_string()
409    }
410}
411
412pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
413    // For sentence-per-line mode, always process regardless of length
414    if options.sentence_per_line {
415        let elements = parse_markdown_elements(line);
416        return reflow_elements_sentence_per_line(&elements, &options.abbreviations);
417    }
418
419    // For semantic line breaks mode, use cascading split strategy
420    if options.semantic_line_breaks {
421        let elements = parse_markdown_elements(line);
422        return reflow_elements_semantic(&elements, options);
423    }
424
425    // Quick check: if line is already short enough or no wrapping requested, return as-is
426    // line_length = 0 means no wrapping (unlimited line length)
427    if options.line_length == 0 || display_len(line, options.length_mode) <= options.line_length {
428        return vec![line.to_string()];
429    }
430
431    // Parse the markdown to identify elements
432    let elements = parse_markdown_elements(line);
433
434    // Reflow the elements into lines
435    reflow_elements(&elements, options)
436}
437
438/// Image source in a linked image structure
439#[derive(Debug, Clone)]
440enum LinkedImageSource {
441    /// Inline image URL: ![alt](url)
442    Inline(String),
443    /// Reference image: ![alt][ref]
444    Reference(String),
445}
446
447/// Link target in a linked image structure
448#[derive(Debug, Clone)]
449enum LinkedImageTarget {
450    /// Inline link URL: ](url)
451    Inline(String),
452    /// Reference link: ][ref]
453    Reference(String),
454}
455
456/// Represents a piece of content in the markdown
457#[derive(Debug, Clone)]
458enum Element {
459    /// Plain text that can be wrapped
460    Text(String),
461    /// A complete markdown inline link [text](url)
462    Link { text: String, url: String },
463    /// A complete markdown reference link [text][ref]
464    ReferenceLink { text: String, reference: String },
465    /// A complete markdown empty reference link [text][]
466    EmptyReferenceLink { text: String },
467    /// A complete markdown shortcut reference link [ref]
468    ShortcutReference { reference: String },
469    /// A complete markdown inline image ![alt](url)
470    InlineImage { alt: String, url: String },
471    /// A complete markdown reference image ![alt][ref]
472    ReferenceImage { alt: String, reference: String },
473    /// A complete markdown empty reference image ![alt][]
474    EmptyReferenceImage { alt: String },
475    /// A clickable image badge in any of 4 forms:
476    /// - [![alt](img-url)](link-url)
477    /// - [![alt][img-ref]](link-url)
478    /// - [![alt](img-url)][link-ref]
479    /// - [![alt][img-ref]][link-ref]
480    LinkedImage {
481        alt: String,
482        img_source: LinkedImageSource,
483        link_target: LinkedImageTarget,
484    },
485    /// Footnote reference [^note]
486    FootnoteReference { note: String },
487    /// Strikethrough text ~~text~~
488    Strikethrough(String),
489    /// Wiki-style link [[wiki]] or [[wiki|text]]
490    WikiLink(String),
491    /// Inline math $math$
492    InlineMath(String),
493    /// Display math $$math$$
494    DisplayMath(String),
495    /// Emoji shortcode :emoji:
496    EmojiShortcode(String),
497    /// Autolink <https://...> or <mailto:...> or <user@domain.com>
498    Autolink(String),
499    /// HTML tag <tag> or </tag> or <tag/>
500    HtmlTag(String),
501    /// HTML entity &nbsp; or &#123;
502    HtmlEntity(String),
503    /// Hugo/Go template shortcode {{< ... >}} or {{% ... %}}
504    HugoShortcode(String),
505    /// Inline code `code`
506    Code(String),
507    /// Bold text **text** or __text__
508    Bold {
509        content: String,
510        /// True if underscore markers (__), false for asterisks (**)
511        underscore: bool,
512    },
513    /// Italic text *text* or _text_
514    Italic {
515        content: String,
516        /// True if underscore marker (_), false for asterisk (*)
517        underscore: bool,
518    },
519}
520
521impl std::fmt::Display for Element {
522    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
523        match self {
524            Element::Text(s) => write!(f, "{s}"),
525            Element::Link { text, url } => write!(f, "[{text}]({url})"),
526            Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
527            Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
528            Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
529            Element::InlineImage { alt, url } => write!(f, "![{alt}]({url})"),
530            Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
531            Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
532            Element::LinkedImage {
533                alt,
534                img_source,
535                link_target,
536            } => {
537                // Build the image part: ![alt](url) or ![alt][ref]
538                let img_part = match img_source {
539                    LinkedImageSource::Inline(url) => format!("![{alt}]({url})"),
540                    LinkedImageSource::Reference(r) => format!("![{alt}][{r}]"),
541                };
542                // Build the link part: (url) or [ref]
543                match link_target {
544                    LinkedImageTarget::Inline(url) => write!(f, "[{img_part}]({url})"),
545                    LinkedImageTarget::Reference(r) => write!(f, "[{img_part}][{r}]"),
546                }
547            }
548            Element::FootnoteReference { note } => write!(f, "[^{note}]"),
549            Element::Strikethrough(s) => write!(f, "~~{s}~~"),
550            Element::WikiLink(s) => write!(f, "[[{s}]]"),
551            Element::InlineMath(s) => write!(f, "${s}$"),
552            Element::DisplayMath(s) => write!(f, "$${s}$$"),
553            Element::EmojiShortcode(s) => write!(f, ":{s}:"),
554            Element::Autolink(s) => write!(f, "{s}"),
555            Element::HtmlTag(s) => write!(f, "{s}"),
556            Element::HtmlEntity(s) => write!(f, "{s}"),
557            Element::HugoShortcode(s) => write!(f, "{s}"),
558            Element::Code(s) => write!(f, "`{s}`"),
559            Element::Bold { content, underscore } => {
560                if *underscore {
561                    write!(f, "__{content}__")
562                } else {
563                    write!(f, "**{content}**")
564                }
565            }
566            Element::Italic { content, underscore } => {
567                if *underscore {
568                    write!(f, "_{content}_")
569                } else {
570                    write!(f, "*{content}*")
571                }
572            }
573        }
574    }
575}
576
577impl Element {
578    /// Calculate the display width of this element using the given length mode.
579    /// This formats the element and computes its width, correctly handling
580    /// visual width for CJK characters and other wide glyphs.
581    fn display_width(&self, mode: ReflowLengthMode) -> usize {
582        let formatted = format!("{self}");
583        display_len(&formatted, mode)
584    }
585}
586
587/// An emphasis or formatting span parsed by pulldown-cmark
588#[derive(Debug, Clone)]
589struct EmphasisSpan {
590    /// Byte offset where the emphasis starts (including markers)
591    start: usize,
592    /// Byte offset where the emphasis ends (after closing markers)
593    end: usize,
594    /// The content inside the emphasis markers
595    content: String,
596    /// Whether this is strong (bold) emphasis
597    is_strong: bool,
598    /// Whether this is strikethrough (~~text~~)
599    is_strikethrough: bool,
600    /// Whether the original used underscore markers (for emphasis only)
601    uses_underscore: bool,
602}
603
604/// Extract emphasis and strikethrough spans from text using pulldown-cmark
605///
606/// This provides CommonMark-compliant emphasis parsing, correctly handling:
607/// - Nested emphasis like `*text **bold** more*`
608/// - Left/right flanking delimiter rules
609/// - Underscore vs asterisk markers
610/// - GFM strikethrough (~~text~~)
611///
612/// Returns spans sorted by start position.
613fn extract_emphasis_spans(text: &str) -> Vec<EmphasisSpan> {
614    let mut spans = Vec::new();
615    let mut options = Options::empty();
616    options.insert(Options::ENABLE_STRIKETHROUGH);
617
618    // Stacks to track nested formatting with their start positions
619    let mut emphasis_stack: Vec<(usize, bool)> = Vec::new(); // (start_byte, uses_underscore)
620    let mut strong_stack: Vec<(usize, bool)> = Vec::new();
621    let mut strikethrough_stack: Vec<usize> = Vec::new();
622
623    let parser = Parser::new_ext(text, options).into_offset_iter();
624
625    for (event, range) in parser {
626        match event {
627            Event::Start(Tag::Emphasis) => {
628                // Check if this uses underscore by looking at the original text
629                let uses_underscore = text.get(range.start..range.start + 1) == Some("_");
630                emphasis_stack.push((range.start, uses_underscore));
631            }
632            Event::End(TagEnd::Emphasis) => {
633                if let Some((start_byte, uses_underscore)) = emphasis_stack.pop() {
634                    // Extract content between the markers (1 char marker on each side)
635                    let content_start = start_byte + 1;
636                    let content_end = range.end - 1;
637                    if content_end > content_start
638                        && let Some(content) = text.get(content_start..content_end)
639                    {
640                        spans.push(EmphasisSpan {
641                            start: start_byte,
642                            end: range.end,
643                            content: content.to_string(),
644                            is_strong: false,
645                            is_strikethrough: false,
646                            uses_underscore,
647                        });
648                    }
649                }
650            }
651            Event::Start(Tag::Strong) => {
652                // Check if this uses underscore by looking at the original text
653                let uses_underscore = text.get(range.start..range.start + 2) == Some("__");
654                strong_stack.push((range.start, uses_underscore));
655            }
656            Event::End(TagEnd::Strong) => {
657                if let Some((start_byte, uses_underscore)) = strong_stack.pop() {
658                    // Extract content between the markers (2 char marker on each side)
659                    let content_start = start_byte + 2;
660                    let content_end = range.end - 2;
661                    if content_end > content_start
662                        && let Some(content) = text.get(content_start..content_end)
663                    {
664                        spans.push(EmphasisSpan {
665                            start: start_byte,
666                            end: range.end,
667                            content: content.to_string(),
668                            is_strong: true,
669                            is_strikethrough: false,
670                            uses_underscore,
671                        });
672                    }
673                }
674            }
675            Event::Start(Tag::Strikethrough) => {
676                strikethrough_stack.push(range.start);
677            }
678            Event::End(TagEnd::Strikethrough) => {
679                if let Some(start_byte) = strikethrough_stack.pop() {
680                    // Extract content between the ~~ markers (2 char marker on each side)
681                    let content_start = start_byte + 2;
682                    let content_end = range.end - 2;
683                    if content_end > content_start
684                        && let Some(content) = text.get(content_start..content_end)
685                    {
686                        spans.push(EmphasisSpan {
687                            start: start_byte,
688                            end: range.end,
689                            content: content.to_string(),
690                            is_strong: false,
691                            is_strikethrough: true,
692                            uses_underscore: false,
693                        });
694                    }
695                }
696            }
697            _ => {}
698        }
699    }
700
701    // Sort by start position
702    spans.sort_by_key(|s| s.start);
703    spans
704}
705
706/// Parse markdown elements from text preserving the raw syntax
707///
708/// Detection order is critical:
709/// 1. Linked images [![alt](img)](link) - must be detected first as atomic units
710/// 2. Inline images ![alt](url) - before links to handle ! prefix
711/// 3. Reference images ![alt][ref] - before reference links
712/// 4. Inline links [text](url) - before reference links
713/// 5. Reference links [text][ref] - before shortcut references
714/// 6. Shortcut reference links [ref] - detected last to avoid false positives
715/// 7. Other elements (code, bold, italic, etc.) - processed normally
716fn parse_markdown_elements(text: &str) -> Vec<Element> {
717    let mut elements = Vec::new();
718    let mut remaining = text;
719
720    // Pre-extract emphasis spans using pulldown-cmark for CommonMark-compliant parsing
721    let emphasis_spans = extract_emphasis_spans(text);
722
723    while !remaining.is_empty() {
724        // Calculate current byte offset in original text
725        let current_offset = text.len() - remaining.len();
726        // Find the earliest occurrence of any markdown pattern
727        let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
728
729        // Check for linked images FIRST (all 4 variants)
730        // Quick literal check: only run expensive regexes if we might have a linked image
731        // Pattern starts with "[!" so check for that first
732        if remaining.contains("[!") {
733            // Pattern 1: [![alt](img)](link) - inline image in inline link
734            if let Ok(Some(m)) = LINKED_IMAGE_INLINE_INLINE.find(remaining)
735                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
736            {
737                earliest_match = Some((m.start(), "linked_image_ii", m));
738            }
739
740            // Pattern 2: [![alt][ref]](link) - reference image in inline link
741            if let Ok(Some(m)) = LINKED_IMAGE_REF_INLINE.find(remaining)
742                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
743            {
744                earliest_match = Some((m.start(), "linked_image_ri", m));
745            }
746
747            // Pattern 3: [![alt](img)][ref] - inline image in reference link
748            if let Ok(Some(m)) = LINKED_IMAGE_INLINE_REF.find(remaining)
749                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
750            {
751                earliest_match = Some((m.start(), "linked_image_ir", m));
752            }
753
754            // Pattern 4: [![alt][ref]][ref] - reference image in reference link
755            if let Ok(Some(m)) = LINKED_IMAGE_REF_REF.find(remaining)
756                && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
757            {
758                earliest_match = Some((m.start(), "linked_image_rr", m));
759            }
760        }
761
762        // Check for images (they start with ! so should be detected before links)
763        // Inline images - ![alt](url)
764        if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
765            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
766        {
767            earliest_match = Some((m.start(), "inline_image", m));
768        }
769
770        // Reference images - ![alt][ref]
771        if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
772            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
773        {
774            earliest_match = Some((m.start(), "ref_image", m));
775        }
776
777        // Check for footnote references - [^note]
778        if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
779            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
780        {
781            earliest_match = Some((m.start(), "footnote_ref", m));
782        }
783
784        // Check for inline links - [text](url)
785        if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
786            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
787        {
788            earliest_match = Some((m.start(), "inline_link", m));
789        }
790
791        // Check for reference links - [text][ref]
792        if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
793            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
794        {
795            earliest_match = Some((m.start(), "ref_link", m));
796        }
797
798        // Check for shortcut reference links - [ref]
799        // Only check if we haven't found an earlier pattern that would conflict
800        if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
801            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
802        {
803            earliest_match = Some((m.start(), "shortcut_ref", m));
804        }
805
806        // Check for wiki-style links - [[wiki]]
807        if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
808            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
809        {
810            earliest_match = Some((m.start(), "wiki_link", m));
811        }
812
813        // Check for display math first (before inline) - $$math$$
814        if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
815            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
816        {
817            earliest_match = Some((m.start(), "display_math", m));
818        }
819
820        // Check for inline math - $math$
821        if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
822            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
823        {
824            earliest_match = Some((m.start(), "inline_math", m));
825        }
826
827        // Note: Strikethrough is now handled by pulldown-cmark in extract_emphasis_spans
828
829        // Check for emoji shortcodes - :emoji:
830        if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
831            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
832        {
833            earliest_match = Some((m.start(), "emoji", m));
834        }
835
836        // Check for HTML entities - &nbsp; etc
837        if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
838            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
839        {
840            earliest_match = Some((m.start(), "html_entity", m));
841        }
842
843        // Check for Hugo shortcodes - {{< ... >}} or {{% ... %}}
844        // Must be checked before other patterns to avoid false sentence breaks
845        if let Ok(Some(m)) = HUGO_SHORTCODE_REGEX.find(remaining)
846            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
847        {
848            earliest_match = Some((m.start(), "hugo_shortcode", m));
849        }
850
851        // Check for HTML tags - <tag> </tag> <tag/>
852        // But exclude autolinks like <https://...> or <mailto:...> or email autolinks <user@domain.com>
853        if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
854            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
855        {
856            // Check if this is an autolink (starts with protocol or mailto:)
857            let matched_text = &remaining[m.start()..m.end()];
858            let is_url_autolink = matched_text.starts_with("<http://")
859                || matched_text.starts_with("<https://")
860                || matched_text.starts_with("<mailto:")
861                || matched_text.starts_with("<ftp://")
862                || matched_text.starts_with("<ftps://");
863
864            // Check if this is an email autolink (per CommonMark spec: <local@domain.tld>)
865            // Use centralized EMAIL_PATTERN for consistency with MD034 and other rules
866            let is_email_autolink = {
867                let content = matched_text.trim_start_matches('<').trim_end_matches('>');
868                EMAIL_PATTERN.is_match(content)
869            };
870
871            if is_url_autolink || is_email_autolink {
872                earliest_match = Some((m.start(), "autolink", m));
873            } else {
874                earliest_match = Some((m.start(), "html_tag", m));
875            }
876        }
877
878        // Find earliest non-link special characters
879        let mut next_special = remaining.len();
880        let mut special_type = "";
881        let mut pulldown_emphasis: Option<&EmphasisSpan> = None;
882
883        // Check for code spans (not handled by pulldown-cmark in this context)
884        if let Some(pos) = remaining.find('`')
885            && pos < next_special
886        {
887            next_special = pos;
888            special_type = "code";
889        }
890
891        // Check for emphasis using pulldown-cmark's pre-extracted spans
892        // Find the earliest emphasis span that starts within remaining text
893        for span in &emphasis_spans {
894            if span.start >= current_offset && span.start < current_offset + remaining.len() {
895                let pos_in_remaining = span.start - current_offset;
896                if pos_in_remaining < next_special {
897                    next_special = pos_in_remaining;
898                    special_type = "pulldown_emphasis";
899                    pulldown_emphasis = Some(span);
900                }
901                break; // Spans are sorted by start position, so first match is earliest
902            }
903        }
904
905        // Determine which pattern to process first
906        let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
907            pos < next_special
908        } else {
909            false
910        };
911
912        if should_process_markdown_link {
913            let (pos, pattern_type, match_obj) = earliest_match.unwrap();
914
915            // Add any text before the match
916            if pos > 0 {
917                elements.push(Element::Text(remaining[..pos].to_string()));
918            }
919
920            // Process the matched pattern
921            match pattern_type {
922                // Pattern 1: [![alt](img)](link) - inline image in inline link
923                "linked_image_ii" => {
924                    if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_INLINE.captures(remaining) {
925                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
926                        let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
927                        let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
928                        elements.push(Element::LinkedImage {
929                            alt: alt.to_string(),
930                            img_source: LinkedImageSource::Inline(img_url.to_string()),
931                            link_target: LinkedImageTarget::Inline(link_url.to_string()),
932                        });
933                        remaining = &remaining[match_obj.end()..];
934                    } else {
935                        elements.push(Element::Text("[".to_string()));
936                        remaining = &remaining[1..];
937                    }
938                }
939                // Pattern 2: [![alt][ref]](link) - reference image in inline link
940                "linked_image_ri" => {
941                    if let Ok(Some(caps)) = LINKED_IMAGE_REF_INLINE.captures(remaining) {
942                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
943                        let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
944                        let link_url = caps.get(3).map(|m| m.as_str()).unwrap_or("");
945                        elements.push(Element::LinkedImage {
946                            alt: alt.to_string(),
947                            img_source: LinkedImageSource::Reference(img_ref.to_string()),
948                            link_target: LinkedImageTarget::Inline(link_url.to_string()),
949                        });
950                        remaining = &remaining[match_obj.end()..];
951                    } else {
952                        elements.push(Element::Text("[".to_string()));
953                        remaining = &remaining[1..];
954                    }
955                }
956                // Pattern 3: [![alt](img)][ref] - inline image in reference link
957                "linked_image_ir" => {
958                    if let Ok(Some(caps)) = LINKED_IMAGE_INLINE_REF.captures(remaining) {
959                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
960                        let img_url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
961                        let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
962                        elements.push(Element::LinkedImage {
963                            alt: alt.to_string(),
964                            img_source: LinkedImageSource::Inline(img_url.to_string()),
965                            link_target: LinkedImageTarget::Reference(link_ref.to_string()),
966                        });
967                        remaining = &remaining[match_obj.end()..];
968                    } else {
969                        elements.push(Element::Text("[".to_string()));
970                        remaining = &remaining[1..];
971                    }
972                }
973                // Pattern 4: [![alt][ref]][ref] - reference image in reference link
974                "linked_image_rr" => {
975                    if let Ok(Some(caps)) = LINKED_IMAGE_REF_REF.captures(remaining) {
976                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
977                        let img_ref = caps.get(2).map(|m| m.as_str()).unwrap_or("");
978                        let link_ref = caps.get(3).map(|m| m.as_str()).unwrap_or("");
979                        elements.push(Element::LinkedImage {
980                            alt: alt.to_string(),
981                            img_source: LinkedImageSource::Reference(img_ref.to_string()),
982                            link_target: LinkedImageTarget::Reference(link_ref.to_string()),
983                        });
984                        remaining = &remaining[match_obj.end()..];
985                    } else {
986                        elements.push(Element::Text("[".to_string()));
987                        remaining = &remaining[1..];
988                    }
989                }
990                "inline_image" => {
991                    if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
992                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
993                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
994                        elements.push(Element::InlineImage {
995                            alt: alt.to_string(),
996                            url: url.to_string(),
997                        });
998                        remaining = &remaining[match_obj.end()..];
999                    } else {
1000                        elements.push(Element::Text("!".to_string()));
1001                        remaining = &remaining[1..];
1002                    }
1003                }
1004                "ref_image" => {
1005                    if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
1006                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1007                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1008
1009                        if reference.is_empty() {
1010                            elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
1011                        } else {
1012                            elements.push(Element::ReferenceImage {
1013                                alt: alt.to_string(),
1014                                reference: reference.to_string(),
1015                            });
1016                        }
1017                        remaining = &remaining[match_obj.end()..];
1018                    } else {
1019                        elements.push(Element::Text("!".to_string()));
1020                        remaining = &remaining[1..];
1021                    }
1022                }
1023                "footnote_ref" => {
1024                    if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
1025                        let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1026                        elements.push(Element::FootnoteReference { note: note.to_string() });
1027                        remaining = &remaining[match_obj.end()..];
1028                    } else {
1029                        elements.push(Element::Text("[".to_string()));
1030                        remaining = &remaining[1..];
1031                    }
1032                }
1033                "inline_link" => {
1034                    if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
1035                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1036                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1037                        elements.push(Element::Link {
1038                            text: text.to_string(),
1039                            url: url.to_string(),
1040                        });
1041                        remaining = &remaining[match_obj.end()..];
1042                    } else {
1043                        // Fallback - shouldn't happen
1044                        elements.push(Element::Text("[".to_string()));
1045                        remaining = &remaining[1..];
1046                    }
1047                }
1048                "ref_link" => {
1049                    if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
1050                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1051                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
1052
1053                        if reference.is_empty() {
1054                            // Empty reference link [text][]
1055                            elements.push(Element::EmptyReferenceLink { text: text.to_string() });
1056                        } else {
1057                            // Regular reference link [text][ref]
1058                            elements.push(Element::ReferenceLink {
1059                                text: text.to_string(),
1060                                reference: reference.to_string(),
1061                            });
1062                        }
1063                        remaining = &remaining[match_obj.end()..];
1064                    } else {
1065                        // Fallback - shouldn't happen
1066                        elements.push(Element::Text("[".to_string()));
1067                        remaining = &remaining[1..];
1068                    }
1069                }
1070                "shortcut_ref" => {
1071                    if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
1072                        let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1073                        elements.push(Element::ShortcutReference {
1074                            reference: reference.to_string(),
1075                        });
1076                        remaining = &remaining[match_obj.end()..];
1077                    } else {
1078                        // Fallback - shouldn't happen
1079                        elements.push(Element::Text("[".to_string()));
1080                        remaining = &remaining[1..];
1081                    }
1082                }
1083                "wiki_link" => {
1084                    if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
1085                        let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1086                        elements.push(Element::WikiLink(content.to_string()));
1087                        remaining = &remaining[match_obj.end()..];
1088                    } else {
1089                        elements.push(Element::Text("[[".to_string()));
1090                        remaining = &remaining[2..];
1091                    }
1092                }
1093                "display_math" => {
1094                    if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
1095                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1096                        elements.push(Element::DisplayMath(math.to_string()));
1097                        remaining = &remaining[match_obj.end()..];
1098                    } else {
1099                        elements.push(Element::Text("$$".to_string()));
1100                        remaining = &remaining[2..];
1101                    }
1102                }
1103                "inline_math" => {
1104                    if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
1105                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1106                        elements.push(Element::InlineMath(math.to_string()));
1107                        remaining = &remaining[match_obj.end()..];
1108                    } else {
1109                        elements.push(Element::Text("$".to_string()));
1110                        remaining = &remaining[1..];
1111                    }
1112                }
1113                // Note: "strikethrough" case removed - now handled by pulldown-cmark
1114                "emoji" => {
1115                    if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
1116                        let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
1117                        elements.push(Element::EmojiShortcode(emoji.to_string()));
1118                        remaining = &remaining[match_obj.end()..];
1119                    } else {
1120                        elements.push(Element::Text(":".to_string()));
1121                        remaining = &remaining[1..];
1122                    }
1123                }
1124                "html_entity" => {
1125                    // HTML entities are captured whole - use as_str() to get just the matched content
1126                    elements.push(Element::HtmlEntity(match_obj.as_str().to_string()));
1127                    remaining = &remaining[match_obj.end()..];
1128                }
1129                "hugo_shortcode" => {
1130                    // Hugo shortcodes are atomic elements - preserve them exactly
1131                    elements.push(Element::HugoShortcode(match_obj.as_str().to_string()));
1132                    remaining = &remaining[match_obj.end()..];
1133                }
1134                "autolink" => {
1135                    // Autolinks are atomic elements - preserve them exactly
1136                    elements.push(Element::Autolink(match_obj.as_str().to_string()));
1137                    remaining = &remaining[match_obj.end()..];
1138                }
1139                "html_tag" => {
1140                    // HTML tags are captured whole - use as_str() to get just the matched content
1141                    elements.push(Element::HtmlTag(match_obj.as_str().to_string()));
1142                    remaining = &remaining[match_obj.end()..];
1143                }
1144                _ => {
1145                    // Unknown pattern, treat as text
1146                    elements.push(Element::Text("[".to_string()));
1147                    remaining = &remaining[1..];
1148                }
1149            }
1150        } else {
1151            // Process non-link special characters
1152
1153            // Add any text before the special character
1154            if next_special > 0 && next_special < remaining.len() {
1155                elements.push(Element::Text(remaining[..next_special].to_string()));
1156                remaining = &remaining[next_special..];
1157            }
1158
1159            // Process the special element
1160            match special_type {
1161                "code" => {
1162                    // Find end of code
1163                    if let Some(code_end) = remaining[1..].find('`') {
1164                        let code = &remaining[1..1 + code_end];
1165                        elements.push(Element::Code(code.to_string()));
1166                        remaining = &remaining[1 + code_end + 1..];
1167                    } else {
1168                        // No closing backtick, treat as text
1169                        elements.push(Element::Text(remaining.to_string()));
1170                        break;
1171                    }
1172                }
1173                "pulldown_emphasis" => {
1174                    // Use pre-extracted emphasis/strikethrough span from pulldown-cmark
1175                    if let Some(span) = pulldown_emphasis {
1176                        let span_len = span.end - span.start;
1177                        if span.is_strikethrough {
1178                            elements.push(Element::Strikethrough(span.content.clone()));
1179                        } else if span.is_strong {
1180                            elements.push(Element::Bold {
1181                                content: span.content.clone(),
1182                                underscore: span.uses_underscore,
1183                            });
1184                        } else {
1185                            elements.push(Element::Italic {
1186                                content: span.content.clone(),
1187                                underscore: span.uses_underscore,
1188                            });
1189                        }
1190                        remaining = &remaining[span_len..];
1191                    } else {
1192                        // Fallback - shouldn't happen
1193                        elements.push(Element::Text(remaining[..1].to_string()));
1194                        remaining = &remaining[1..];
1195                    }
1196                }
1197                _ => {
1198                    // No special elements found, add all remaining text
1199                    elements.push(Element::Text(remaining.to_string()));
1200                    break;
1201                }
1202            }
1203        }
1204    }
1205
1206    elements
1207}
1208
1209/// Reflow elements for sentence-per-line mode
1210fn reflow_elements_sentence_per_line(elements: &[Element], custom_abbreviations: &Option<Vec<String>>) -> Vec<String> {
1211    let abbreviations = get_abbreviations(custom_abbreviations);
1212    let mut lines = Vec::new();
1213    let mut current_line = String::new();
1214
1215    for (idx, element) in elements.iter().enumerate() {
1216        let element_str = format!("{element}");
1217
1218        // For text elements, split into sentences
1219        if let Element::Text(text) = element {
1220            // Simply append text - it already has correct spacing from tokenization
1221            let combined = format!("{current_line}{text}");
1222            // Use the pre-computed abbreviations set to avoid redundant computation
1223            let sentences = split_into_sentences_with_set(&combined, &abbreviations);
1224
1225            if sentences.len() > 1 {
1226                // We found sentence boundaries
1227                for (i, sentence) in sentences.iter().enumerate() {
1228                    if i == 0 {
1229                        // First sentence might continue from previous elements
1230                        // But check if it ends with an abbreviation
1231                        let trimmed = sentence.trim();
1232
1233                        if text_ends_with_abbreviation(trimmed, &abbreviations) {
1234                            // Don't emit yet - this sentence ends with abbreviation, continue accumulating
1235                            current_line = sentence.to_string();
1236                        } else {
1237                            // Normal case - emit the first sentence
1238                            lines.push(sentence.to_string());
1239                            current_line.clear();
1240                        }
1241                    } else if i == sentences.len() - 1 {
1242                        // Last sentence: check if it's complete or incomplete
1243                        let trimmed = sentence.trim();
1244                        let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1245
1246                        if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1247                            // Complete sentence - emit it immediately
1248                            lines.push(sentence.to_string());
1249                            current_line.clear();
1250                        } else {
1251                            // Incomplete sentence - save for next iteration
1252                            current_line = sentence.to_string();
1253                        }
1254                    } else {
1255                        // Complete sentences in the middle
1256                        lines.push(sentence.to_string());
1257                    }
1258                }
1259            } else {
1260                // Single sentence - check if it's complete
1261                let trimmed = combined.trim();
1262
1263                // If the combined result is only whitespace, don't accumulate it.
1264                // This prevents leading spaces on subsequent elements when lines
1265                // are joined with spaces during reflow iteration.
1266                if trimmed.is_empty() {
1267                    continue;
1268                }
1269
1270                let ends_with_sentence_punct = ends_with_sentence_punct(trimmed);
1271
1272                if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed, &abbreviations) {
1273                    // Complete single sentence - emit it
1274                    lines.push(trimmed.to_string());
1275                    current_line.clear();
1276                } else {
1277                    // Incomplete sentence - continue accumulating
1278                    current_line = combined;
1279                }
1280            }
1281        } else if let Element::Italic { content, underscore } = element {
1282            // Handle italic elements - may contain multiple sentences that need continuation
1283            let marker = if *underscore { "_" } else { "*" };
1284            handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1285        } else if let Element::Bold { content, underscore } = element {
1286            // Handle bold elements - may contain multiple sentences that need continuation
1287            let marker = if *underscore { "__" } else { "**" };
1288            handle_emphasis_sentence_split(content, marker, &abbreviations, &mut current_line, &mut lines);
1289        } else if let Element::Strikethrough(content) = element {
1290            // Handle strikethrough elements - may contain multiple sentences that need continuation
1291            handle_emphasis_sentence_split(content, "~~", &abbreviations, &mut current_line, &mut lines);
1292        } else {
1293            // Non-text, non-emphasis elements (Code, Links, etc.)
1294            // Check if this element is adjacent to the preceding text (no space between)
1295            let is_adjacent = if idx > 0 {
1296                match &elements[idx - 1] {
1297                    Element::Text(t) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1298                    _ => true,
1299                }
1300            } else {
1301                false
1302            };
1303
1304            // Add space before element if needed, but not for adjacent elements
1305            if !is_adjacent
1306                && !current_line.is_empty()
1307                && !current_line.ends_with(' ')
1308                && !current_line.ends_with('(')
1309                && !current_line.ends_with('[')
1310            {
1311                current_line.push(' ');
1312            }
1313            current_line.push_str(&element_str);
1314        }
1315    }
1316
1317    // Add any remaining content
1318    if !current_line.is_empty() {
1319        lines.push(current_line.trim().to_string());
1320    }
1321    lines
1322}
1323
1324/// Handle splitting emphasis content at sentence boundaries while preserving markers
1325fn handle_emphasis_sentence_split(
1326    content: &str,
1327    marker: &str,
1328    abbreviations: &HashSet<String>,
1329    current_line: &mut String,
1330    lines: &mut Vec<String>,
1331) {
1332    // Split the emphasis content into sentences
1333    let sentences = split_into_sentences_with_set(content, abbreviations);
1334
1335    if sentences.len() <= 1 {
1336        // Single sentence or no boundaries - treat as atomic
1337        if !current_line.is_empty()
1338            && !current_line.ends_with(' ')
1339            && !current_line.ends_with('(')
1340            && !current_line.ends_with('[')
1341        {
1342            current_line.push(' ');
1343        }
1344        current_line.push_str(marker);
1345        current_line.push_str(content);
1346        current_line.push_str(marker);
1347
1348        // Check if the emphasis content ends with sentence punctuation - if so, emit
1349        let trimmed = content.trim();
1350        let ends_with_punct = ends_with_sentence_punct(trimmed);
1351        if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1352            lines.push(current_line.clone());
1353            current_line.clear();
1354        }
1355    } else {
1356        // Multiple sentences - each gets its own emphasis markers
1357        for (i, sentence) in sentences.iter().enumerate() {
1358            let trimmed = sentence.trim();
1359            if trimmed.is_empty() {
1360                continue;
1361            }
1362
1363            if i == 0 {
1364                // First sentence: combine with current_line and emit
1365                if !current_line.is_empty()
1366                    && !current_line.ends_with(' ')
1367                    && !current_line.ends_with('(')
1368                    && !current_line.ends_with('[')
1369                {
1370                    current_line.push(' ');
1371                }
1372                current_line.push_str(marker);
1373                current_line.push_str(trimmed);
1374                current_line.push_str(marker);
1375
1376                // Check if this is a complete sentence
1377                let ends_with_punct = ends_with_sentence_punct(trimmed);
1378                if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1379                    lines.push(current_line.clone());
1380                    current_line.clear();
1381                }
1382            } else if i == sentences.len() - 1 {
1383                // Last sentence: check if complete
1384                let ends_with_punct = ends_with_sentence_punct(trimmed);
1385
1386                let mut line = String::new();
1387                line.push_str(marker);
1388                line.push_str(trimmed);
1389                line.push_str(marker);
1390
1391                if ends_with_punct && !text_ends_with_abbreviation(trimmed, abbreviations) {
1392                    lines.push(line);
1393                } else {
1394                    // Incomplete - keep in current_line for potential continuation
1395                    *current_line = line;
1396                }
1397            } else {
1398                // Middle sentences: emit with markers
1399                let mut line = String::new();
1400                line.push_str(marker);
1401                line.push_str(trimmed);
1402                line.push_str(marker);
1403                lines.push(line);
1404            }
1405        }
1406    }
1407}
1408
1409/// English break-words used for semantic line break splitting.
1410/// These are conjunctions and relative pronouns where a line break
1411/// reads naturally.
1412const BREAK_WORDS: &[&str] = &[
1413    "and",
1414    "or",
1415    "but",
1416    "nor",
1417    "yet",
1418    "so",
1419    "for",
1420    "which",
1421    "that",
1422    "because",
1423    "when",
1424    "if",
1425    "while",
1426    "where",
1427    "although",
1428    "though",
1429    "unless",
1430    "since",
1431    "after",
1432    "before",
1433    "until",
1434    "as",
1435    "once",
1436    "whether",
1437    "however",
1438    "therefore",
1439    "moreover",
1440    "furthermore",
1441    "nevertheless",
1442    "whereas",
1443];
1444
1445/// Check if a character is clause punctuation for semantic line breaks
1446fn is_clause_punctuation(c: char) -> bool {
1447    matches!(c, ',' | ';' | ':' | '\u{2014}') // comma, semicolon, colon, em dash
1448}
1449
1450/// Compute element spans for a flat text representation of elements.
1451/// Returns Vec of (start, end) byte offsets for non-Text elements,
1452/// so we can check that a split position doesn't fall inside them.
1453fn compute_element_spans(elements: &[Element]) -> Vec<(usize, usize)> {
1454    let mut spans = Vec::new();
1455    let mut offset = 0;
1456    for element in elements {
1457        let rendered = format!("{element}");
1458        let len = rendered.len();
1459        if !matches!(element, Element::Text(_)) {
1460            spans.push((offset, offset + len));
1461        }
1462        offset += len;
1463    }
1464    spans
1465}
1466
1467/// Check if a byte position falls inside any non-Text element span
1468fn is_inside_element(pos: usize, spans: &[(usize, usize)]) -> bool {
1469    spans.iter().any(|(start, end)| pos > *start && pos < *end)
1470}
1471
1472/// Minimum fraction of line_length that the first part of a split must occupy.
1473/// Prevents awkwardly short first lines like "A," or "Note:" on their own.
1474const MIN_SPLIT_RATIO: f64 = 0.3;
1475
1476/// Split a line at the latest clause punctuation that keeps the first part
1477/// within `line_length`. Returns None if no valid split point exists or if
1478/// the split would create an unreasonably short first line.
1479fn split_at_clause_punctuation(
1480    text: &str,
1481    line_length: usize,
1482    element_spans: &[(usize, usize)],
1483    length_mode: ReflowLengthMode,
1484) -> Option<(String, String)> {
1485    let chars: Vec<char> = text.chars().collect();
1486    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1487
1488    // Find the char index where accumulated display width exceeds line_length
1489    let mut width_acc = 0;
1490    let mut search_end_char = 0;
1491    for (idx, &c) in chars.iter().enumerate() {
1492        let c_width = display_len(&c.to_string(), length_mode);
1493        if width_acc + c_width > line_length {
1494            break;
1495        }
1496        width_acc += c_width;
1497        search_end_char = idx + 1;
1498    }
1499
1500    let mut best_pos = None;
1501    for i in (0..search_end_char).rev() {
1502        if is_clause_punctuation(chars[i]) {
1503            // Convert char position to byte position for element span check
1504            let byte_pos: usize = chars[..=i].iter().map(|c| c.len_utf8()).sum();
1505            if !is_inside_element(byte_pos, element_spans) {
1506                best_pos = Some(i);
1507                break;
1508            }
1509        }
1510    }
1511
1512    let pos = best_pos?;
1513
1514    // Reject splits that create very short first lines
1515    let first: String = chars[..=pos].iter().collect();
1516    let first_display_len = display_len(&first, length_mode);
1517    if first_display_len < min_first_len {
1518        return None;
1519    }
1520
1521    // Split after the punctuation character
1522    let rest: String = chars[pos + 1..].iter().collect();
1523    let rest = rest.trim_start().to_string();
1524
1525    if rest.is_empty() {
1526        return None;
1527    }
1528
1529    Some((first, rest))
1530}
1531
1532/// Split a line before the latest break-word that keeps the first part
1533/// within `line_length`. Returns None if no valid split point exists or if
1534/// the split would create an unreasonably short first line.
1535fn split_at_break_word(
1536    text: &str,
1537    line_length: usize,
1538    element_spans: &[(usize, usize)],
1539    length_mode: ReflowLengthMode,
1540) -> Option<(String, String)> {
1541    let lower = text.to_lowercase();
1542    let min_first_len = ((line_length as f64) * MIN_SPLIT_RATIO) as usize;
1543    let mut best_split: Option<(usize, usize)> = None; // (byte_start, word_len_bytes)
1544
1545    for &word in BREAK_WORDS {
1546        let mut search_start = 0;
1547        while let Some(pos) = lower[search_start..].find(word) {
1548            let abs_pos = search_start + pos;
1549
1550            // Verify it's a word boundary: preceded by space, followed by space
1551            let preceded_by_space = abs_pos == 0 || text.as_bytes().get(abs_pos - 1) == Some(&b' ');
1552            let followed_by_space = text.as_bytes().get(abs_pos + word.len()) == Some(&b' ');
1553
1554            if preceded_by_space && followed_by_space {
1555                // The break goes BEFORE the word, so first part ends at abs_pos - 1
1556                let first_part = text[..abs_pos].trim_end();
1557                let first_part_len = display_len(first_part, length_mode);
1558
1559                if first_part_len >= min_first_len
1560                    && first_part_len <= line_length
1561                    && !is_inside_element(abs_pos, element_spans)
1562                {
1563                    // Prefer the latest valid split point
1564                    if best_split.is_none_or(|(prev_pos, _)| abs_pos > prev_pos) {
1565                        best_split = Some((abs_pos, word.len()));
1566                    }
1567                }
1568            }
1569
1570            search_start = abs_pos + word.len();
1571        }
1572    }
1573
1574    let (byte_start, _word_len) = best_split?;
1575
1576    let first = text[..byte_start].trim_end().to_string();
1577    let rest = text[byte_start..].to_string();
1578
1579    if first.is_empty() || rest.trim().is_empty() {
1580        return None;
1581    }
1582
1583    Some((first, rest))
1584}
1585
1586/// Recursively cascade-split a line that exceeds line_length.
1587/// Tries clause punctuation first, then break-words, then word wrap.
1588fn cascade_split_line(
1589    text: &str,
1590    line_length: usize,
1591    abbreviations: &Option<Vec<String>>,
1592    length_mode: ReflowLengthMode,
1593) -> Vec<String> {
1594    if line_length == 0 || display_len(text, length_mode) <= line_length {
1595        return vec![text.to_string()];
1596    }
1597
1598    let elements = parse_markdown_elements(text);
1599    let element_spans = compute_element_spans(&elements);
1600
1601    // Try clause punctuation split
1602    if let Some((first, rest)) = split_at_clause_punctuation(text, line_length, &element_spans, length_mode) {
1603        let mut result = vec![first];
1604        result.extend(cascade_split_line(&rest, line_length, abbreviations, length_mode));
1605        return result;
1606    }
1607
1608    // Try break-word split
1609    if let Some((first, rest)) = split_at_break_word(text, line_length, &element_spans, length_mode) {
1610        let mut result = vec![first];
1611        result.extend(cascade_split_line(&rest, line_length, abbreviations, length_mode));
1612        return result;
1613    }
1614
1615    // Fallback: word wrap using existing reflow_elements
1616    let options = ReflowOptions {
1617        line_length,
1618        break_on_sentences: false,
1619        preserve_breaks: false,
1620        sentence_per_line: false,
1621        semantic_line_breaks: false,
1622        abbreviations: abbreviations.clone(),
1623        length_mode,
1624    };
1625    reflow_elements(&elements, &options)
1626}
1627
1628/// Reflow elements using semantic line breaks strategy:
1629/// 1. Split at sentence boundaries (always)
1630/// 2. For lines exceeding line_length, cascade through clause punct → break-words → word wrap
1631fn reflow_elements_semantic(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1632    // Step 1: Split into sentences using existing sentence-per-line logic
1633    let sentence_lines = reflow_elements_sentence_per_line(elements, &options.abbreviations);
1634
1635    // Step 2: For each sentence line, apply cascading splits if it exceeds line_length
1636    // When line_length is 0 (unlimited), skip cascading — sentence splits only
1637    if options.line_length == 0 {
1638        return sentence_lines;
1639    }
1640
1641    let length_mode = options.length_mode;
1642    let mut result = Vec::new();
1643    for line in sentence_lines {
1644        if display_len(&line, length_mode) <= options.line_length {
1645            result.push(line);
1646        } else {
1647            result.extend(cascade_split_line(
1648                &line,
1649                options.line_length,
1650                &options.abbreviations,
1651                length_mode,
1652            ));
1653        }
1654    }
1655
1656    // Step 3: Merge very short trailing lines back into the previous line.
1657    // Word wrap can produce lines like "was" or "see" on their own, which reads poorly.
1658    let min_line_len = ((options.line_length as f64) * MIN_SPLIT_RATIO) as usize;
1659    let mut merged: Vec<String> = Vec::with_capacity(result.len());
1660    for line in result {
1661        if !merged.is_empty() && display_len(&line, length_mode) < min_line_len && !line.trim().is_empty() {
1662            // Don't merge across sentence boundaries — sentence splits are intentional
1663            let prev_ends_at_sentence = {
1664                let trimmed = merged.last().unwrap().trim_end();
1665                trimmed
1666                    .chars()
1667                    .rev()
1668                    .find(|c| !matches!(c, '"' | '\'' | '\u{201D}' | '\u{2019}' | ')' | ']'))
1669                    .is_some_and(|c| matches!(c, '.' | '!' | '?'))
1670            };
1671
1672            if !prev_ends_at_sentence {
1673                let prev = merged.last_mut().unwrap();
1674                let combined = format!("{prev} {line}");
1675                // Only merge if the combined line doesn't wildly exceed the limit
1676                // (allow up to 10% overflow to avoid orphan words)
1677                if display_len(&combined, length_mode) <= options.line_length + options.line_length / 10 {
1678                    *prev = combined;
1679                    continue;
1680                }
1681            }
1682        }
1683        merged.push(line);
1684    }
1685    merged
1686}
1687
1688/// Find the last space in `line` that is safe to split at.
1689/// Safe spaces are those NOT inside rendered non-Text elements.
1690/// `element_spans` contains (start, end) byte ranges of non-Text elements in the line.
1691/// Find the last space in `line` that is not inside any element span.
1692/// Spans use exclusive bounds (pos > start && pos < end) because element
1693/// delimiters (e.g., `[`, `]`, `(`, `)`, `<`, `>`, `` ` ``) are never
1694/// spaces, so only interior positions need protection.
1695fn rfind_safe_space(line: &str, element_spans: &[(usize, usize)]) -> Option<usize> {
1696    line.char_indices()
1697        .rev()
1698        .map(|(pos, _)| pos)
1699        .find(|&pos| line.as_bytes()[pos] == b' ' && !element_spans.iter().any(|(s, e)| pos > *s && pos < *e))
1700}
1701
1702/// Reflow elements into lines that fit within the line length
1703fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
1704    let mut lines = Vec::new();
1705    let mut current_line = String::new();
1706    let mut current_length = 0;
1707    // Track byte spans of non-Text elements in current_line for safe splitting
1708    let mut current_line_element_spans: Vec<(usize, usize)> = Vec::new();
1709    let length_mode = options.length_mode;
1710
1711    for (idx, element) in elements.iter().enumerate() {
1712        let element_str = format!("{element}");
1713        let element_len = element.display_width(length_mode);
1714
1715        // Determine adjacency from the original elements, not from current_line.
1716        // Elements are adjacent when there's no whitespace between them in the source:
1717        // - Text("v") → HugoShortcode("{{<...>}}") = adjacent (text has no trailing space)
1718        // - Text(" and ") → InlineLink("[a](url)") = NOT adjacent (text has trailing space)
1719        // - HugoShortcode("{{<...>}}") → Text(",") = adjacent (text has no leading space)
1720        let is_adjacent_to_prev = if idx > 0 {
1721            match (&elements[idx - 1], element) {
1722                (Element::Text(t), _) => !t.is_empty() && !t.ends_with(char::is_whitespace),
1723                (_, Element::Text(t)) => !t.is_empty() && !t.starts_with(char::is_whitespace),
1724                _ => true,
1725            }
1726        } else {
1727            false
1728        };
1729
1730        // For text elements that might need breaking
1731        if let Element::Text(text) = element {
1732            // Check if original text had leading whitespace
1733            let has_leading_space = text.starts_with(char::is_whitespace);
1734            // If this is a text element, always process it word by word
1735            let words: Vec<&str> = text.split_whitespace().collect();
1736
1737            for (i, word) in words.iter().enumerate() {
1738                let word_len = display_len(word, length_mode);
1739                // Check if this "word" is just punctuation that should stay attached
1740                let is_trailing_punct = word
1741                    .chars()
1742                    .all(|c| matches!(c, ',' | '.' | ':' | ';' | '!' | '?' | ')' | ']' | '}'));
1743
1744                // First word of text adjacent to preceding non-text element
1745                // must stay attached (e.g., shortcode followed by punctuation or text)
1746                let is_first_adjacent = i == 0 && is_adjacent_to_prev;
1747
1748                if is_first_adjacent {
1749                    // Attach directly without space, preventing line break
1750                    if current_length + word_len > options.line_length && current_length > 0 {
1751                        // Would exceed — break before the adjacent group
1752                        // Use element-aware space search to avoid splitting inside links/code/etc.
1753                        if let Some(last_space) = rfind_safe_space(&current_line, &current_line_element_spans) {
1754                            let before = current_line[..last_space].trim_end().to_string();
1755                            let after = current_line[last_space + 1..].to_string();
1756                            lines.push(before);
1757                            current_line = format!("{after}{word}");
1758                            current_length = display_len(&current_line, length_mode);
1759                            current_line_element_spans.clear();
1760                        } else {
1761                            current_line.push_str(word);
1762                            current_length += word_len;
1763                        }
1764                    } else {
1765                        current_line.push_str(word);
1766                        current_length += word_len;
1767                    }
1768                } else if current_length > 0
1769                    && current_length + 1 + word_len > options.line_length
1770                    && !is_trailing_punct
1771                {
1772                    // Start a new line (but never for trailing punctuation)
1773                    lines.push(current_line.trim().to_string());
1774                    current_line = word.to_string();
1775                    current_length = word_len;
1776                    current_line_element_spans.clear();
1777                } else {
1778                    // Add word to current line
1779                    // Only add space if: we have content AND (this isn't the first word OR original had leading space)
1780                    // AND this isn't trailing punctuation (which attaches directly)
1781                    if current_length > 0 && (i > 0 || has_leading_space) && !is_trailing_punct {
1782                        current_line.push(' ');
1783                        current_length += 1;
1784                    }
1785                    current_line.push_str(word);
1786                    current_length += word_len;
1787                }
1788            }
1789        } else {
1790            // For non-text elements (code, links, references), treat as atomic units
1791            // These should never be broken across lines
1792
1793            if is_adjacent_to_prev {
1794                // Adjacent to preceding text — attach directly without space
1795                if current_length + element_len > options.line_length {
1796                    // Would exceed limit — break before the adjacent word group
1797                    // Use element-aware space search to avoid splitting inside links/code/etc.
1798                    if let Some(last_space) = rfind_safe_space(&current_line, &current_line_element_spans) {
1799                        let before = current_line[..last_space].trim_end().to_string();
1800                        let after = current_line[last_space + 1..].to_string();
1801                        lines.push(before);
1802                        current_line = format!("{after}{element_str}");
1803                        current_length = display_len(&current_line, length_mode);
1804                        current_line_element_spans.clear();
1805                        // Record the element span in the new current_line
1806                        let start = after.len();
1807                        current_line_element_spans.push((start, start + element_str.len()));
1808                    } else {
1809                        // No safe space to break at — accept the long line
1810                        let start = current_line.len();
1811                        current_line.push_str(&element_str);
1812                        current_length += element_len;
1813                        current_line_element_spans.push((start, current_line.len()));
1814                    }
1815                } else {
1816                    let start = current_line.len();
1817                    current_line.push_str(&element_str);
1818                    current_length += element_len;
1819                    current_line_element_spans.push((start, current_line.len()));
1820                }
1821            } else if current_length > 0 && current_length + 1 + element_len > options.line_length {
1822                // Not adjacent, would exceed — start new line
1823                lines.push(current_line.trim().to_string());
1824                current_line = element_str.clone();
1825                current_length = element_len;
1826                current_line_element_spans.clear();
1827                current_line_element_spans.push((0, element_str.len()));
1828            } else {
1829                // Not adjacent, fits — add with space
1830                let ends_with_opener =
1831                    current_line.ends_with('(') || current_line.ends_with('[') || current_line.ends_with('{');
1832                if current_length > 0 && !ends_with_opener {
1833                    current_line.push(' ');
1834                    current_length += 1;
1835                }
1836                let start = current_line.len();
1837                current_line.push_str(&element_str);
1838                current_length += element_len;
1839                current_line_element_spans.push((start, current_line.len()));
1840            }
1841        }
1842    }
1843
1844    // Don't forget the last line
1845    if !current_line.is_empty() {
1846        lines.push(current_line.trim_end().to_string());
1847    }
1848
1849    lines
1850}
1851
1852/// Reflow markdown content preserving structure
1853pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
1854    let lines: Vec<&str> = content.lines().collect();
1855    let mut result = Vec::new();
1856    let mut i = 0;
1857
1858    while i < lines.len() {
1859        let line = lines[i];
1860        let trimmed = line.trim();
1861
1862        // Preserve empty lines
1863        if trimmed.is_empty() {
1864            result.push(String::new());
1865            i += 1;
1866            continue;
1867        }
1868
1869        // Preserve headings as-is
1870        if trimmed.starts_with('#') {
1871            result.push(line.to_string());
1872            i += 1;
1873            continue;
1874        }
1875
1876        // Preserve Quarto/Pandoc div markers (:::) as-is
1877        if trimmed.starts_with(":::") {
1878            result.push(line.to_string());
1879            i += 1;
1880            continue;
1881        }
1882
1883        // Preserve fenced code blocks
1884        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
1885            result.push(line.to_string());
1886            i += 1;
1887            // Copy lines until closing fence
1888            while i < lines.len() {
1889                result.push(lines[i].to_string());
1890                if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
1891                    i += 1;
1892                    break;
1893                }
1894                i += 1;
1895            }
1896            continue;
1897        }
1898
1899        // Preserve indented code blocks (4+ columns accounting for tab expansion)
1900        if ElementCache::calculate_indentation_width_default(line) >= 4 {
1901            // Collect all consecutive indented lines
1902            result.push(line.to_string());
1903            i += 1;
1904            while i < lines.len() {
1905                let next_line = lines[i];
1906                // Continue if next line is also indented or empty (empty lines in code blocks are ok)
1907                if ElementCache::calculate_indentation_width_default(next_line) >= 4 || next_line.trim().is_empty() {
1908                    result.push(next_line.to_string());
1909                    i += 1;
1910                } else {
1911                    break;
1912                }
1913            }
1914            continue;
1915        }
1916
1917        // Preserve block quotes (but reflow their content)
1918        if trimmed.starts_with('>') {
1919            // find() returns byte position which is correct for str slicing
1920            // The unwrap is safe because we already verified trimmed starts with '>'
1921            let gt_pos = line.find('>').expect("'>' must exist since trimmed.starts_with('>')");
1922            let quote_prefix = line[0..gt_pos + 1].to_string();
1923            let quote_content = &line[quote_prefix.len()..].trim_start();
1924
1925            let reflowed = reflow_line(quote_content, options);
1926            for reflowed_line in reflowed.iter() {
1927                result.push(format!("{quote_prefix} {reflowed_line}"));
1928            }
1929            i += 1;
1930            continue;
1931        }
1932
1933        // Preserve horizontal rules first (before checking for lists)
1934        if is_horizontal_rule(trimmed) {
1935            result.push(line.to_string());
1936            i += 1;
1937            continue;
1938        }
1939
1940        // Preserve lists (but not horizontal rules)
1941        if is_unordered_list_marker(trimmed) || is_numbered_list_item(trimmed) {
1942            // Find the list marker and preserve indentation
1943            let indent = line.len() - line.trim_start().len();
1944            let indent_str = " ".repeat(indent);
1945
1946            // For numbered lists, find the period and the space after it
1947            // For bullet lists, find the marker and the space after it
1948            let mut marker_end = indent;
1949            let mut content_start = indent;
1950
1951            if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
1952                // Numbered list: find the period
1953                if let Some(period_pos) = line[indent..].find('.') {
1954                    marker_end = indent + period_pos + 1; // Include the period
1955                    content_start = marker_end;
1956                    // Skip any spaces after the period to find content start
1957                    // Use byte-based check since content_start is a byte index
1958                    // This is safe because space is ASCII (single byte)
1959                    while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1960                        content_start += 1;
1961                    }
1962                }
1963            } else {
1964                // Bullet list: marker is single character
1965                marker_end = indent + 1; // Just the marker character
1966                content_start = marker_end;
1967                // Skip any spaces after the marker
1968                // Use byte-based check since content_start is a byte index
1969                // This is safe because space is ASCII (single byte)
1970                while content_start < line.len() && line.as_bytes().get(content_start) == Some(&b' ') {
1971                    content_start += 1;
1972                }
1973            }
1974
1975            let marker = &line[indent..marker_end];
1976
1977            // Collect all content for this list item (including continuation lines)
1978            // Preserve hard breaks (2 trailing spaces) while trimming excessive whitespace
1979            let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
1980            i += 1;
1981
1982            // Collect continuation lines (indented lines that are part of this list item)
1983            while i < lines.len() {
1984                let next_line = lines[i];
1985                let next_trimmed = next_line.trim();
1986
1987                // Stop if we hit an empty line or another list item or special block
1988                if is_block_boundary(next_trimmed) {
1989                    break;
1990                }
1991
1992                // Check if this line is indented (continuation of list item)
1993                let next_indent = next_line.len() - next_line.trim_start().len();
1994                if next_indent >= content_start {
1995                    // This is a continuation line - add its content
1996                    // Preserve hard breaks while trimming excessive whitespace
1997                    let trimmed_start = next_line.trim_start();
1998                    list_content.push(trim_preserving_hard_break(trimmed_start));
1999                    i += 1;
2000                } else {
2001                    // Not indented enough, not part of this list item
2002                    break;
2003                }
2004            }
2005
2006            // Join content, but respect hard breaks (lines ending with 2 spaces or backslash)
2007            // Hard breaks should prevent joining with the next line
2008            let combined_content = if options.preserve_breaks {
2009                list_content[0].clone()
2010            } else {
2011                // Check if any lines have hard breaks - if so, preserve the structure
2012                let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
2013                if has_hard_breaks {
2014                    // Don't join lines with hard breaks - keep them separate with newlines
2015                    list_content.join("\n")
2016                } else {
2017                    // No hard breaks, safe to join with spaces
2018                    list_content.join(" ")
2019                }
2020            };
2021
2022            // Calculate the proper indentation for continuation lines
2023            let trimmed_marker = marker;
2024            let continuation_spaces = content_start;
2025
2026            // Adjust line length to account for list marker and space
2027            let prefix_length = indent + trimmed_marker.len() + 1;
2028
2029            // Create adjusted options with reduced line length
2030            let adjusted_options = ReflowOptions {
2031                line_length: options.line_length.saturating_sub(prefix_length),
2032                ..options.clone()
2033            };
2034
2035            let reflowed = reflow_line(&combined_content, &adjusted_options);
2036            for (j, reflowed_line) in reflowed.iter().enumerate() {
2037                if j == 0 {
2038                    result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
2039                } else {
2040                    // Continuation lines aligned with text after marker
2041                    let continuation_indent = " ".repeat(continuation_spaces);
2042                    result.push(format!("{continuation_indent}{reflowed_line}"));
2043                }
2044            }
2045            continue;
2046        }
2047
2048        // Preserve tables
2049        if crate::utils::table_utils::TableUtils::is_potential_table_row(line) {
2050            result.push(line.to_string());
2051            i += 1;
2052            continue;
2053        }
2054
2055        // Preserve reference definitions
2056        if trimmed.starts_with('[') && line.contains("]:") {
2057            result.push(line.to_string());
2058            i += 1;
2059            continue;
2060        }
2061
2062        // Preserve definition list items (extended markdown)
2063        if is_definition_list_item(trimmed) {
2064            result.push(line.to_string());
2065            i += 1;
2066            continue;
2067        }
2068
2069        // Check if this is a single line that doesn't need processing
2070        let mut is_single_line_paragraph = true;
2071        if i + 1 < lines.len() {
2072            let next_trimmed = lines[i + 1].trim();
2073            // Check if next line continues this paragraph
2074            if !is_block_boundary(next_trimmed) {
2075                is_single_line_paragraph = false;
2076            }
2077        }
2078
2079        // If it's a single line that fits, just add it as-is
2080        if is_single_line_paragraph && display_len(line, options.length_mode) <= options.line_length {
2081            result.push(line.to_string());
2082            i += 1;
2083            continue;
2084        }
2085
2086        // For regular paragraphs, collect consecutive lines
2087        let mut paragraph_parts = Vec::new();
2088        let mut current_part = vec![line];
2089        i += 1;
2090
2091        // If preserve_breaks is true, treat each line separately
2092        if options.preserve_breaks {
2093            // Don't collect consecutive lines - just reflow this single line
2094            let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
2095                Some("\\")
2096            } else if line.ends_with("  ") {
2097                Some("  ")
2098            } else {
2099                None
2100            };
2101            let reflowed = reflow_line(line, options);
2102
2103            // Preserve hard breaks (two trailing spaces or backslash)
2104            if let Some(break_marker) = hard_break_type {
2105                if !reflowed.is_empty() {
2106                    let mut reflowed_with_break = reflowed;
2107                    let last_idx = reflowed_with_break.len() - 1;
2108                    if !has_hard_break(&reflowed_with_break[last_idx]) {
2109                        reflowed_with_break[last_idx].push_str(break_marker);
2110                    }
2111                    result.extend(reflowed_with_break);
2112                }
2113            } else {
2114                result.extend(reflowed);
2115            }
2116        } else {
2117            // Original behavior: collect consecutive lines into a paragraph
2118            while i < lines.len() {
2119                let prev_line = if !current_part.is_empty() {
2120                    current_part.last().unwrap()
2121                } else {
2122                    ""
2123                };
2124                let next_line = lines[i];
2125                let next_trimmed = next_line.trim();
2126
2127                // Stop at empty lines or special blocks
2128                if is_block_boundary(next_trimmed) {
2129                    break;
2130                }
2131
2132                // Check if previous line ends with hard break (two spaces or backslash)
2133                // or is a complete sentence in sentence_per_line mode
2134                let prev_trimmed = prev_line.trim();
2135                let abbreviations = get_abbreviations(&options.abbreviations);
2136                let ends_with_sentence = (prev_trimmed.ends_with('.')
2137                    || prev_trimmed.ends_with('!')
2138                    || prev_trimmed.ends_with('?')
2139                    || prev_trimmed.ends_with(".*")
2140                    || prev_trimmed.ends_with("!*")
2141                    || prev_trimmed.ends_with("?*")
2142                    || prev_trimmed.ends_with("._")
2143                    || prev_trimmed.ends_with("!_")
2144                    || prev_trimmed.ends_with("?_")
2145                    // Quote-terminated sentences (straight and curly quotes)
2146                    || prev_trimmed.ends_with(".\"")
2147                    || prev_trimmed.ends_with("!\"")
2148                    || prev_trimmed.ends_with("?\"")
2149                    || prev_trimmed.ends_with(".'")
2150                    || prev_trimmed.ends_with("!'")
2151                    || prev_trimmed.ends_with("?'")
2152                    || prev_trimmed.ends_with(".\u{201D}")
2153                    || prev_trimmed.ends_with("!\u{201D}")
2154                    || prev_trimmed.ends_with("?\u{201D}")
2155                    || prev_trimmed.ends_with(".\u{2019}")
2156                    || prev_trimmed.ends_with("!\u{2019}")
2157                    || prev_trimmed.ends_with("?\u{2019}"))
2158                    && !text_ends_with_abbreviation(
2159                        prev_trimmed.trim_end_matches(['*', '_', '"', '\'', '\u{201D}', '\u{2019}']),
2160                        &abbreviations,
2161                    );
2162
2163                if has_hard_break(prev_line) || (options.sentence_per_line && ends_with_sentence) {
2164                    // Start a new part after hard break or complete sentence
2165                    paragraph_parts.push(current_part.join(" "));
2166                    current_part = vec![next_line];
2167                } else {
2168                    current_part.push(next_line);
2169                }
2170                i += 1;
2171            }
2172
2173            // Add the last part
2174            if !current_part.is_empty() {
2175                if current_part.len() == 1 {
2176                    // Single line, don't add trailing space
2177                    paragraph_parts.push(current_part[0].to_string());
2178                } else {
2179                    paragraph_parts.push(current_part.join(" "));
2180                }
2181            }
2182
2183            // Reflow each part separately, preserving hard breaks
2184            for (j, part) in paragraph_parts.iter().enumerate() {
2185                let reflowed = reflow_line(part, options);
2186                result.extend(reflowed);
2187
2188                // Preserve hard break by ensuring last line of part ends with hard break marker
2189                // Use two spaces as the default hard break format for reflows
2190                // But don't add hard breaks in sentence_per_line mode - lines are already separate
2191                if j < paragraph_parts.len() - 1 && !result.is_empty() && !options.sentence_per_line {
2192                    let last_idx = result.len() - 1;
2193                    if !has_hard_break(&result[last_idx]) {
2194                        result[last_idx].push_str("  ");
2195                    }
2196                }
2197            }
2198        }
2199    }
2200
2201    // Preserve trailing newline if the original content had one
2202    let result_text = result.join("\n");
2203    if content.ends_with('\n') && !result_text.ends_with('\n') {
2204        format!("{result_text}\n")
2205    } else {
2206        result_text
2207    }
2208}
2209
2210/// Information about a reflowed paragraph
2211#[derive(Debug, Clone)]
2212pub struct ParagraphReflow {
2213    /// Starting byte offset of the paragraph in the original content
2214    pub start_byte: usize,
2215    /// Ending byte offset of the paragraph in the original content
2216    pub end_byte: usize,
2217    /// The reflowed text for this paragraph
2218    pub reflowed_text: String,
2219}
2220
2221/// Reflow a single paragraph at the specified line number
2222///
2223/// This function finds the paragraph containing the given line number,
2224/// reflows it according to the specified line length, and returns
2225/// information about the paragraph location and its reflowed text.
2226///
2227/// # Arguments
2228///
2229/// * `content` - The full document content
2230/// * `line_number` - The 1-based line number within the paragraph to reflow
2231/// * `line_length` - The target line length for reflowing
2232///
2233/// # Returns
2234///
2235/// Returns `Some(ParagraphReflow)` if a paragraph was found and reflowed,
2236/// or `None` if the line number is out of bounds or the content at that
2237/// line shouldn't be reflowed (e.g., code blocks, headings, etc.)
2238pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
2239    reflow_paragraph_at_line_with_mode(content, line_number, line_length, ReflowLengthMode::default())
2240}
2241
2242/// Reflow a paragraph at the given line with a specific length mode.
2243pub fn reflow_paragraph_at_line_with_mode(
2244    content: &str,
2245    line_number: usize,
2246    line_length: usize,
2247    length_mode: ReflowLengthMode,
2248) -> Option<ParagraphReflow> {
2249    if line_number == 0 {
2250        return None;
2251    }
2252
2253    let lines: Vec<&str> = content.lines().collect();
2254
2255    // Check if line number is valid (1-based)
2256    if line_number > lines.len() {
2257        return None;
2258    }
2259
2260    let target_idx = line_number - 1; // Convert to 0-based
2261    let target_line = lines[target_idx];
2262    let trimmed = target_line.trim();
2263
2264    // Don't reflow special blocks
2265    if is_paragraph_boundary(trimmed, target_line) {
2266        return None;
2267    }
2268
2269    // Find paragraph start - scan backward until blank line or special block
2270    let mut para_start = target_idx;
2271    while para_start > 0 {
2272        let prev_idx = para_start - 1;
2273        let prev_line = lines[prev_idx];
2274        let prev_trimmed = prev_line.trim();
2275
2276        // Stop at blank line or special blocks
2277        if is_paragraph_boundary(prev_trimmed, prev_line) {
2278            break;
2279        }
2280
2281        para_start = prev_idx;
2282    }
2283
2284    // Find paragraph end - scan forward until blank line or special block
2285    let mut para_end = target_idx;
2286    while para_end + 1 < lines.len() {
2287        let next_idx = para_end + 1;
2288        let next_line = lines[next_idx];
2289        let next_trimmed = next_line.trim();
2290
2291        // Stop at blank line or special blocks
2292        if is_paragraph_boundary(next_trimmed, next_line) {
2293            break;
2294        }
2295
2296        para_end = next_idx;
2297    }
2298
2299    // Extract paragraph lines
2300    let paragraph_lines = &lines[para_start..=para_end];
2301
2302    // Calculate byte offsets
2303    let mut start_byte = 0;
2304    for line in lines.iter().take(para_start) {
2305        start_byte += line.len() + 1; // +1 for newline
2306    }
2307
2308    let mut end_byte = start_byte;
2309    for line in paragraph_lines.iter() {
2310        end_byte += line.len() + 1; // +1 for newline
2311    }
2312
2313    // Track whether the byte range includes a trailing newline
2314    // (it doesn't if this is the last line and the file doesn't end with newline)
2315    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
2316
2317    // Adjust end_byte if the last line doesn't have a newline
2318    if !includes_trailing_newline {
2319        end_byte -= 1;
2320    }
2321
2322    // Join paragraph lines and reflow
2323    let paragraph_text = paragraph_lines.join("\n");
2324
2325    // Create reflow options
2326    let options = ReflowOptions {
2327        line_length,
2328        break_on_sentences: true,
2329        preserve_breaks: false,
2330        sentence_per_line: false,
2331        semantic_line_breaks: false,
2332        abbreviations: None,
2333        length_mode,
2334    };
2335
2336    // Reflow the paragraph using reflow_markdown to handle it properly
2337    let reflowed = reflow_markdown(&paragraph_text, &options);
2338
2339    // Ensure reflowed text matches whether the byte range includes a trailing newline
2340    // This is critical: if the range includes a newline, the replacement must too,
2341    // otherwise the next line will get appended to the reflowed paragraph
2342    let reflowed_text = if includes_trailing_newline {
2343        // Range includes newline - ensure reflowed text has one
2344        if reflowed.ends_with('\n') {
2345            reflowed
2346        } else {
2347            format!("{reflowed}\n")
2348        }
2349    } else {
2350        // Range doesn't include newline - ensure reflowed text doesn't have one
2351        if reflowed.ends_with('\n') {
2352            reflowed.trim_end_matches('\n').to_string()
2353        } else {
2354            reflowed
2355        }
2356    };
2357
2358    Some(ParagraphReflow {
2359        start_byte,
2360        end_byte,
2361        reflowed_text,
2362    })
2363}
2364
2365#[cfg(test)]
2366mod tests {
2367    use super::*;
2368
2369    /// Unit test for private helper function text_ends_with_abbreviation()
2370    ///
2371    /// This test stays inline because it tests a private function.
2372    /// All other tests (public API, integration tests) are in tests/utils/text_reflow_test.rs
2373    #[test]
2374    fn test_helper_function_text_ends_with_abbreviation() {
2375        // Test the helper function directly
2376        let abbreviations = get_abbreviations(&None);
2377
2378        // True cases - built-in abbreviations (titles and i.e./e.g.)
2379        assert!(text_ends_with_abbreviation("Dr.", &abbreviations));
2380        assert!(text_ends_with_abbreviation("word Dr.", &abbreviations));
2381        assert!(text_ends_with_abbreviation("e.g.", &abbreviations));
2382        assert!(text_ends_with_abbreviation("i.e.", &abbreviations));
2383        assert!(text_ends_with_abbreviation("Mr.", &abbreviations));
2384        assert!(text_ends_with_abbreviation("Mrs.", &abbreviations));
2385        assert!(text_ends_with_abbreviation("Ms.", &abbreviations));
2386        assert!(text_ends_with_abbreviation("Prof.", &abbreviations));
2387
2388        // False cases - NOT in built-in list (etc doesn't always have period)
2389        assert!(!text_ends_with_abbreviation("etc.", &abbreviations));
2390        assert!(!text_ends_with_abbreviation("paradigms.", &abbreviations));
2391        assert!(!text_ends_with_abbreviation("programs.", &abbreviations));
2392        assert!(!text_ends_with_abbreviation("items.", &abbreviations));
2393        assert!(!text_ends_with_abbreviation("systems.", &abbreviations));
2394        assert!(!text_ends_with_abbreviation("Dr?", &abbreviations)); // question mark, not period
2395        assert!(!text_ends_with_abbreviation("Mr!", &abbreviations)); // exclamation, not period
2396        assert!(!text_ends_with_abbreviation("paradigms?", &abbreviations)); // question mark
2397        assert!(!text_ends_with_abbreviation("word", &abbreviations)); // no punctuation
2398        assert!(!text_ends_with_abbreviation("", &abbreviations)); // empty string
2399    }
2400
2401    #[test]
2402    fn test_is_unordered_list_marker() {
2403        // Valid unordered list markers
2404        assert!(is_unordered_list_marker("- item"));
2405        assert!(is_unordered_list_marker("* item"));
2406        assert!(is_unordered_list_marker("+ item"));
2407        assert!(is_unordered_list_marker("-")); // lone marker
2408        assert!(is_unordered_list_marker("*"));
2409        assert!(is_unordered_list_marker("+"));
2410
2411        // Not list markers
2412        assert!(!is_unordered_list_marker("---")); // horizontal rule
2413        assert!(!is_unordered_list_marker("***")); // horizontal rule
2414        assert!(!is_unordered_list_marker("- - -")); // horizontal rule
2415        assert!(!is_unordered_list_marker("* * *")); // horizontal rule
2416        assert!(!is_unordered_list_marker("*emphasis*")); // emphasis, not list
2417        assert!(!is_unordered_list_marker("-word")); // no space after marker
2418        assert!(!is_unordered_list_marker("")); // empty
2419        assert!(!is_unordered_list_marker("text")); // plain text
2420        assert!(!is_unordered_list_marker("# heading")); // heading
2421    }
2422
2423    #[test]
2424    fn test_is_block_boundary() {
2425        // Block boundaries
2426        assert!(is_block_boundary("")); // empty line
2427        assert!(is_block_boundary("# Heading")); // ATX heading
2428        assert!(is_block_boundary("## Level 2")); // ATX heading
2429        assert!(is_block_boundary("```rust")); // code fence
2430        assert!(is_block_boundary("~~~")); // tilde code fence
2431        assert!(is_block_boundary("> quote")); // blockquote
2432        assert!(is_block_boundary("| cell |")); // table
2433        assert!(is_block_boundary("[link]: http://example.com")); // reference def
2434        assert!(is_block_boundary("---")); // horizontal rule
2435        assert!(is_block_boundary("***")); // horizontal rule
2436        assert!(is_block_boundary("- item")); // unordered list
2437        assert!(is_block_boundary("* item")); // unordered list
2438        assert!(is_block_boundary("+ item")); // unordered list
2439        assert!(is_block_boundary("1. item")); // ordered list
2440        assert!(is_block_boundary("10. item")); // ordered list
2441        assert!(is_block_boundary(": definition")); // definition list
2442        assert!(is_block_boundary(":::")); // div marker
2443        assert!(is_block_boundary("::::: {.callout-note}")); // div marker with attrs
2444
2445        // NOT block boundaries (paragraph continuation)
2446        assert!(!is_block_boundary("regular text"));
2447        assert!(!is_block_boundary("*emphasis*")); // emphasis, not list
2448        assert!(!is_block_boundary("[link](url)")); // inline link, not reference def
2449        assert!(!is_block_boundary("some words here"));
2450    }
2451
2452    #[test]
2453    fn test_definition_list_boundary_in_single_line_paragraph() {
2454        // Verifies that a definition list item after a single-line paragraph
2455        // is treated as a block boundary, not merged into the paragraph
2456        let options = ReflowOptions {
2457            line_length: 80,
2458            ..Default::default()
2459        };
2460        let input = "Term\n: Definition of the term";
2461        let result = reflow_markdown(input, &options);
2462        // The definition list marker should remain on its own line
2463        assert!(
2464            result.contains(": Definition"),
2465            "Definition list item should not be merged into previous line. Got: {result:?}"
2466        );
2467        let lines: Vec<&str> = result.lines().collect();
2468        assert_eq!(lines.len(), 2, "Should remain two separate lines. Got: {lines:?}");
2469        assert_eq!(lines[0], "Term");
2470        assert_eq!(lines[1], ": Definition of the term");
2471    }
2472
2473    #[test]
2474    fn test_is_paragraph_boundary() {
2475        // Core block boundary checks are inherited
2476        assert!(is_paragraph_boundary("# Heading", "# Heading"));
2477        assert!(is_paragraph_boundary("- item", "- item"));
2478        assert!(is_paragraph_boundary(":::", ":::"));
2479        assert!(is_paragraph_boundary(": definition", ": definition"));
2480
2481        // Indented code blocks (≥4 spaces or tab)
2482        assert!(is_paragraph_boundary("code", "    code"));
2483        assert!(is_paragraph_boundary("code", "\tcode"));
2484
2485        // Table rows via is_potential_table_row
2486        assert!(is_paragraph_boundary("| a | b |", "| a | b |"));
2487        assert!(is_paragraph_boundary("a | b", "a | b")); // pipe-delimited without leading pipe
2488
2489        // Not paragraph boundaries
2490        assert!(!is_paragraph_boundary("regular text", "regular text"));
2491        assert!(!is_paragraph_boundary("text", "  text")); // 2-space indent is not code
2492    }
2493
2494    #[test]
2495    fn test_div_marker_boundary_in_reflow_paragraph_at_line() {
2496        // Verifies that div markers (:::) are treated as paragraph boundaries
2497        // in reflow_paragraph_at_line, preventing reflow across div boundaries
2498        let content = "Some paragraph text here.\n\n::: {.callout-note}\nThis is a callout.\n:::\n";
2499        // Line 3 is the div marker — should not be reflowed
2500        let result = reflow_paragraph_at_line(content, 3, 80);
2501        assert!(result.is_none(), "Div marker line should not be reflowed");
2502    }
2503}