rumdl_lib/utils/
text_reflow.rs

1//! Text reflow utilities for MD013
2//!
3//! This module implements text wrapping/reflow functionality that preserves
4//! Markdown elements like links, emphasis, code spans, etc.
5
6use crate::utils::regex_cache::{
7    DISPLAY_MATH_REGEX, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
8    INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, REF_IMAGE_REGEX, REF_LINK_REGEX,
9    SHORTCUT_REF_REGEX, STRIKETHROUGH_FANCY_REGEX, WIKI_LINK_REGEX,
10};
11/// Options for reflowing text
12#[derive(Clone)]
13pub struct ReflowOptions {
14    /// Target line length
15    pub line_length: usize,
16    /// Whether to break on sentence boundaries when possible
17    pub break_on_sentences: bool,
18    /// Whether to preserve existing line breaks in paragraphs
19    pub preserve_breaks: bool,
20    /// Whether to enforce one sentence per line
21    pub sentence_per_line: bool,
22}
23
24impl Default for ReflowOptions {
25    fn default() -> Self {
26        Self {
27            line_length: 80,
28            break_on_sentences: true,
29            preserve_breaks: false,
30            sentence_per_line: false,
31        }
32    }
33}
34
35/// Detect if a character position is a sentence boundary
36/// Based on the approach from github.com/JoshuaKGoldberg/sentences-per-line
37fn is_sentence_boundary(text: &str, pos: usize) -> bool {
38    let chars: Vec<char> = text.chars().collect();
39
40    if pos + 1 >= chars.len() {
41        return false;
42    }
43
44    // Check for sentence-ending punctuation
45    let c = chars[pos];
46    if c != '.' && c != '!' && c != '?' {
47        return false;
48    }
49
50    // Must be followed by at least one space
51    if chars[pos + 1] != ' ' {
52        return false;
53    }
54
55    // Skip all whitespace after the punctuation to find the start of the next sentence
56    let mut next_char_pos = pos + 2;
57    while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
58        next_char_pos += 1;
59    }
60
61    // Check if we reached the end of the string
62    if next_char_pos >= chars.len() {
63        return false;
64    }
65
66    // Next character after space(s) must be uppercase (new sentence indicator)
67    if !chars[next_char_pos].is_uppercase() {
68        return false;
69    }
70
71    // Look back to check for common abbreviations
72    if pos > 0 {
73        // Abbreviation list similar to sentences-per-line
74        let prev_word = &text[..pos];
75        let ignored_words = [
76            "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr", "Jr",
77        ];
78        for word in &ignored_words {
79            if prev_word.to_lowercase().ends_with(&word.to_lowercase()) {
80                return false;
81            }
82        }
83
84        // Check for decimal numbers (e.g., "3.14")
85        // Make sure to check if next_char_pos is within bounds
86        if pos > 0 && chars[pos - 1].is_numeric() && next_char_pos < chars.len() && chars[next_char_pos].is_numeric() {
87            return false;
88        }
89    }
90    true
91}
92
93/// Split text into sentences
94pub fn split_into_sentences(text: &str) -> Vec<String> {
95    let mut sentences = Vec::new();
96    let mut current_sentence = String::new();
97    let mut chars = text.chars().peekable();
98    let mut pos = 0;
99
100    while let Some(c) = chars.next() {
101        current_sentence.push(c);
102
103        if is_sentence_boundary(text, pos) {
104            // Include the space after sentence if it exists
105            if chars.peek() == Some(&' ') {
106                chars.next();
107                pos += 1;
108            }
109            sentences.push(current_sentence.trim().to_string());
110            current_sentence.clear();
111        }
112
113        pos += 1;
114    }
115
116    // Add any remaining text as the last sentence
117    if !current_sentence.trim().is_empty() {
118        sentences.push(current_sentence.trim().to_string());
119    }
120    sentences
121}
122
123/// Check if a line is a horizontal rule (---, ___, ***)
124fn is_horizontal_rule(line: &str) -> bool {
125    if line.len() < 3 {
126        return false;
127    }
128
129    // Check if line consists only of -, _, or * characters (at least 3)
130    let chars: Vec<char> = line.chars().collect();
131    if chars.is_empty() {
132        return false;
133    }
134
135    let first_char = chars[0];
136    if first_char != '-' && first_char != '_' && first_char != '*' {
137        return false;
138    }
139
140    // All characters should be the same (allowing spaces between)
141    for c in &chars {
142        if *c != first_char && *c != ' ' {
143            return false;
144        }
145    }
146
147    // Count non-space characters
148    let non_space_count = chars.iter().filter(|c| **c != ' ').count();
149    non_space_count >= 3
150}
151
152/// Check if a line is a numbered list item (e.g., "1. ", "10. ")
153fn is_numbered_list_item(line: &str) -> bool {
154    let mut chars = line.chars();
155
156    // Must start with a digit
157    if !chars.next().is_some_and(|c| c.is_numeric()) {
158        return false;
159    }
160
161    // Can have more digits
162    while let Some(c) = chars.next() {
163        if c == '.' {
164            // After period, must have a space or be end of line
165            return chars.next().is_none_or(|c| c == ' ');
166        }
167        if !c.is_numeric() {
168            return false;
169        }
170    }
171
172    false
173}
174
175/// Check if a line ends with a hard break (either two spaces or backslash)
176///
177/// CommonMark supports two formats for hard line breaks:
178/// 1. Two or more trailing spaces
179/// 2. A backslash at the end of the line
180fn has_hard_break(line: &str) -> bool {
181    let line = line.strip_suffix('\r').unwrap_or(line);
182    line.ends_with("  ") || line.ends_with('\\')
183}
184
185/// Trim trailing whitespace while preserving hard breaks (two trailing spaces or backslash)
186///
187/// Hard breaks in Markdown can be indicated by:
188/// 1. Two trailing spaces before a newline (traditional)
189/// 2. A backslash at the end of the line (mdformat style)
190fn trim_preserving_hard_break(s: &str) -> String {
191    // Strip trailing \r from CRLF line endings first to handle Windows files
192    let s = s.strip_suffix('\r').unwrap_or(s);
193
194    // Check for backslash hard break (mdformat style)
195    if s.ends_with('\\') {
196        // Preserve the backslash exactly as-is
197        return s.to_string();
198    }
199
200    // Check if there are at least 2 trailing spaces (traditional hard break)
201    if s.ends_with("  ") {
202        // Find the position where non-space content ends
203        let content_end = s.trim_end().len();
204        if content_end == 0 {
205            // String is all whitespace
206            return String::new();
207        }
208        // Preserve exactly 2 trailing spaces for hard break
209        format!("{}  ", &s[..content_end])
210    } else {
211        // No hard break, just trim all trailing whitespace
212        s.trim_end().to_string()
213    }
214}
215
216pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
217    // For sentence-per-line mode, always process regardless of length
218    if options.sentence_per_line {
219        let elements = parse_markdown_elements(line);
220        return reflow_elements_sentence_per_line(&elements);
221    }
222
223    // Quick check: if line is already short enough, return as-is
224    if line.chars().count() <= options.line_length {
225        return vec![line.to_string()];
226    }
227
228    // Parse the markdown to identify elements
229    let elements = parse_markdown_elements(line);
230
231    // Reflow the elements into lines
232    reflow_elements(&elements, options)
233}
234
235/// Represents a piece of content in the markdown
236#[derive(Debug, Clone)]
237enum Element {
238    /// Plain text that can be wrapped
239    Text(String),
240    /// A complete markdown inline link [text](url)
241    Link { text: String, url: String },
242    /// A complete markdown reference link [text][ref]
243    ReferenceLink { text: String, reference: String },
244    /// A complete markdown empty reference link [text][]
245    EmptyReferenceLink { text: String },
246    /// A complete markdown shortcut reference link [ref]
247    ShortcutReference { reference: String },
248    /// A complete markdown inline image ![alt](url)
249    InlineImage { alt: String, url: String },
250    /// A complete markdown reference image ![alt][ref]
251    ReferenceImage { alt: String, reference: String },
252    /// A complete markdown empty reference image ![alt][]
253    EmptyReferenceImage { alt: String },
254    /// Footnote reference [^note]
255    FootnoteReference { note: String },
256    /// Strikethrough text ~~text~~
257    Strikethrough(String),
258    /// Wiki-style link [[wiki]] or [[wiki|text]]
259    WikiLink(String),
260    /// Inline math $math$
261    InlineMath(String),
262    /// Display math $$math$$
263    DisplayMath(String),
264    /// Emoji shortcode :emoji:
265    EmojiShortcode(String),
266    /// HTML tag <tag> or </tag> or <tag/>
267    HtmlTag(String),
268    /// HTML entity &nbsp; or &#123;
269    HtmlEntity(String),
270    /// Inline code `code`
271    Code(String),
272    /// Bold text **text**
273    Bold(String),
274    /// Italic text *text*
275    Italic(String),
276}
277
278impl std::fmt::Display for Element {
279    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
280        match self {
281            Element::Text(s) => write!(f, "{s}"),
282            Element::Link { text, url } => write!(f, "[{text}]({url})"),
283            Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
284            Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
285            Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
286            Element::InlineImage { alt, url } => write!(f, "![{alt}]({url})"),
287            Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
288            Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
289            Element::FootnoteReference { note } => write!(f, "[^{note}]"),
290            Element::Strikethrough(s) => write!(f, "~~{s}~~"),
291            Element::WikiLink(s) => write!(f, "[[{s}]]"),
292            Element::InlineMath(s) => write!(f, "${s}$"),
293            Element::DisplayMath(s) => write!(f, "$${s}$$"),
294            Element::EmojiShortcode(s) => write!(f, ":{s}:"),
295            Element::HtmlTag(s) => write!(f, "{s}"),
296            Element::HtmlEntity(s) => write!(f, "{s}"),
297            Element::Code(s) => write!(f, "`{s}`"),
298            Element::Bold(s) => write!(f, "**{s}**"),
299            Element::Italic(s) => write!(f, "*{s}*"),
300        }
301    }
302}
303
304impl Element {
305    fn len(&self) -> usize {
306        match self {
307            Element::Text(s) => s.chars().count(),
308            Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, // [text](url)
309            Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, // [text][ref]
310            Element::EmptyReferenceLink { text } => text.chars().count() + 4, // [text][]
311            Element::ShortcutReference { reference } => reference.chars().count() + 2, // [ref]
312            Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, // ![alt](url)
313            Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, // ![alt][ref]
314            Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, // ![alt][]
315            Element::FootnoteReference { note } => note.chars().count() + 3, // [^note]
316            Element::Strikethrough(s) => s.chars().count() + 4,              // ~~text~~
317            Element::WikiLink(s) => s.chars().count() + 4,                   // [[wiki]]
318            Element::InlineMath(s) => s.chars().count() + 2,                 // $math$
319            Element::DisplayMath(s) => s.chars().count() + 4,                // $$math$$
320            Element::EmojiShortcode(s) => s.chars().count() + 2,             // :emoji:
321            Element::HtmlTag(s) => s.chars().count(),                        // <tag> - already includes brackets
322            Element::HtmlEntity(s) => s.chars().count(),                     // &nbsp; - already complete
323            Element::Code(s) => s.chars().count() + 2,                       // `code`
324            Element::Bold(s) => s.chars().count() + 4,                       // **text**
325            Element::Italic(s) => s.chars().count() + 2,                     // *text*
326        }
327    }
328}
329
330/// Parse markdown elements from text preserving the raw syntax
331///
332/// Detection order is critical:
333/// 1. Inline links [text](url) - must be detected first to avoid conflicts
334/// 2. Reference links [text][ref] - detected before shortcut references
335/// 3. Empty reference links [text][] - a special case of reference links
336/// 4. Shortcut reference links [ref] - detected last to avoid false positives
337/// 5. Other elements (code, bold, italic) - processed normally
338fn parse_markdown_elements(text: &str) -> Vec<Element> {
339    let mut elements = Vec::new();
340    let mut remaining = text;
341
342    while !remaining.is_empty() {
343        // Find the earliest occurrence of any markdown pattern
344        let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
345
346        // Check for images first (they start with ! so should be detected before links)
347        // Inline images - ![alt](url)
348        if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
349            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
350        {
351            earliest_match = Some((m.start(), "inline_image", m));
352        }
353
354        // Reference images - ![alt][ref]
355        if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
356            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
357        {
358            earliest_match = Some((m.start(), "ref_image", m));
359        }
360
361        // Check for footnote references - [^note]
362        if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
363            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
364        {
365            earliest_match = Some((m.start(), "footnote_ref", m));
366        }
367
368        // Check for inline links - [text](url)
369        if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
370            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
371        {
372            earliest_match = Some((m.start(), "inline_link", m));
373        }
374
375        // Check for reference links - [text][ref]
376        if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
377            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
378        {
379            earliest_match = Some((m.start(), "ref_link", m));
380        }
381
382        // Check for shortcut reference links - [ref]
383        // Only check if we haven't found an earlier pattern that would conflict
384        if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
385            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
386        {
387            earliest_match = Some((m.start(), "shortcut_ref", m));
388        }
389
390        // Check for wiki-style links - [[wiki]]
391        if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
392            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
393        {
394            earliest_match = Some((m.start(), "wiki_link", m));
395        }
396
397        // Check for display math first (before inline) - $$math$$
398        if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
399            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
400        {
401            earliest_match = Some((m.start(), "display_math", m));
402        }
403
404        // Check for inline math - $math$
405        if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
406            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
407        {
408            earliest_match = Some((m.start(), "inline_math", m));
409        }
410
411        // Check for strikethrough - ~~text~~
412        if let Ok(Some(m)) = STRIKETHROUGH_FANCY_REGEX.find(remaining)
413            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
414        {
415            earliest_match = Some((m.start(), "strikethrough", m));
416        }
417
418        // Check for emoji shortcodes - :emoji:
419        if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
420            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
421        {
422            earliest_match = Some((m.start(), "emoji", m));
423        }
424
425        // Check for HTML entities - &nbsp; etc
426        if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
427            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
428        {
429            earliest_match = Some((m.start(), "html_entity", m));
430        }
431
432        // Check for HTML tags - <tag> </tag> <tag/>
433        // But exclude autolinks like <https://...> or <mailto:...>
434        if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
435            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
436        {
437            // Check if this is an autolink (starts with protocol or mailto:)
438            let matched_text = &remaining[m.start()..m.end()];
439            let is_autolink = matched_text.starts_with("<http://")
440                || matched_text.starts_with("<https://")
441                || matched_text.starts_with("<mailto:")
442                || matched_text.starts_with("<ftp://")
443                || matched_text.starts_with("<ftps://");
444
445            if !is_autolink {
446                earliest_match = Some((m.start(), "html_tag", m));
447            }
448        }
449
450        // Find earliest non-link special characters
451        let mut next_special = remaining.len();
452        let mut special_type = "";
453
454        if let Some(pos) = remaining.find('`')
455            && pos < next_special
456        {
457            next_special = pos;
458            special_type = "code";
459        }
460        if let Some(pos) = remaining.find("**")
461            && pos < next_special
462        {
463            next_special = pos;
464            special_type = "bold";
465        }
466        if let Some(pos) = remaining.find('*')
467            && pos < next_special
468            && !remaining[pos..].starts_with("**")
469        {
470            next_special = pos;
471            special_type = "italic";
472        }
473
474        // Determine which pattern to process first
475        let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
476            pos < next_special
477        } else {
478            false
479        };
480
481        if should_process_markdown_link {
482            let (pos, pattern_type, match_obj) = earliest_match.unwrap();
483
484            // Add any text before the match
485            if pos > 0 {
486                elements.push(Element::Text(remaining[..pos].to_string()));
487            }
488
489            // Process the matched pattern
490            match pattern_type {
491                "inline_image" => {
492                    if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
493                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
494                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
495                        elements.push(Element::InlineImage {
496                            alt: alt.to_string(),
497                            url: url.to_string(),
498                        });
499                        remaining = &remaining[match_obj.end()..];
500                    } else {
501                        elements.push(Element::Text("!".to_string()));
502                        remaining = &remaining[1..];
503                    }
504                }
505                "ref_image" => {
506                    if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
507                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
508                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
509
510                        if reference.is_empty() {
511                            elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
512                        } else {
513                            elements.push(Element::ReferenceImage {
514                                alt: alt.to_string(),
515                                reference: reference.to_string(),
516                            });
517                        }
518                        remaining = &remaining[match_obj.end()..];
519                    } else {
520                        elements.push(Element::Text("!".to_string()));
521                        remaining = &remaining[1..];
522                    }
523                }
524                "footnote_ref" => {
525                    if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
526                        let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
527                        elements.push(Element::FootnoteReference { note: note.to_string() });
528                        remaining = &remaining[match_obj.end()..];
529                    } else {
530                        elements.push(Element::Text("[".to_string()));
531                        remaining = &remaining[1..];
532                    }
533                }
534                "inline_link" => {
535                    if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
536                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
537                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
538                        elements.push(Element::Link {
539                            text: text.to_string(),
540                            url: url.to_string(),
541                        });
542                        remaining = &remaining[match_obj.end()..];
543                    } else {
544                        // Fallback - shouldn't happen
545                        elements.push(Element::Text("[".to_string()));
546                        remaining = &remaining[1..];
547                    }
548                }
549                "ref_link" => {
550                    if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
551                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
552                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
553
554                        if reference.is_empty() {
555                            // Empty reference link [text][]
556                            elements.push(Element::EmptyReferenceLink { text: text.to_string() });
557                        } else {
558                            // Regular reference link [text][ref]
559                            elements.push(Element::ReferenceLink {
560                                text: text.to_string(),
561                                reference: reference.to_string(),
562                            });
563                        }
564                        remaining = &remaining[match_obj.end()..];
565                    } else {
566                        // Fallback - shouldn't happen
567                        elements.push(Element::Text("[".to_string()));
568                        remaining = &remaining[1..];
569                    }
570                }
571                "shortcut_ref" => {
572                    if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
573                        let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
574                        elements.push(Element::ShortcutReference {
575                            reference: reference.to_string(),
576                        });
577                        remaining = &remaining[match_obj.end()..];
578                    } else {
579                        // Fallback - shouldn't happen
580                        elements.push(Element::Text("[".to_string()));
581                        remaining = &remaining[1..];
582                    }
583                }
584                "wiki_link" => {
585                    if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
586                        let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
587                        elements.push(Element::WikiLink(content.to_string()));
588                        remaining = &remaining[match_obj.end()..];
589                    } else {
590                        elements.push(Element::Text("[[".to_string()));
591                        remaining = &remaining[2..];
592                    }
593                }
594                "display_math" => {
595                    if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
596                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
597                        elements.push(Element::DisplayMath(math.to_string()));
598                        remaining = &remaining[match_obj.end()..];
599                    } else {
600                        elements.push(Element::Text("$$".to_string()));
601                        remaining = &remaining[2..];
602                    }
603                }
604                "inline_math" => {
605                    if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
606                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
607                        elements.push(Element::InlineMath(math.to_string()));
608                        remaining = &remaining[match_obj.end()..];
609                    } else {
610                        elements.push(Element::Text("$".to_string()));
611                        remaining = &remaining[1..];
612                    }
613                }
614                "strikethrough" => {
615                    if let Ok(Some(caps)) = STRIKETHROUGH_FANCY_REGEX.captures(remaining) {
616                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
617                        elements.push(Element::Strikethrough(text.to_string()));
618                        remaining = &remaining[match_obj.end()..];
619                    } else {
620                        elements.push(Element::Text("~~".to_string()));
621                        remaining = &remaining[2..];
622                    }
623                }
624                "emoji" => {
625                    if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
626                        let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
627                        elements.push(Element::EmojiShortcode(emoji.to_string()));
628                        remaining = &remaining[match_obj.end()..];
629                    } else {
630                        elements.push(Element::Text(":".to_string()));
631                        remaining = &remaining[1..];
632                    }
633                }
634                "html_entity" => {
635                    // HTML entities are captured whole
636                    elements.push(Element::HtmlEntity(remaining[..match_obj.end()].to_string()));
637                    remaining = &remaining[match_obj.end()..];
638                }
639                "html_tag" => {
640                    // HTML tags are captured whole
641                    elements.push(Element::HtmlTag(remaining[..match_obj.end()].to_string()));
642                    remaining = &remaining[match_obj.end()..];
643                }
644                _ => {
645                    // Unknown pattern, treat as text
646                    elements.push(Element::Text("[".to_string()));
647                    remaining = &remaining[1..];
648                }
649            }
650        } else {
651            // Process non-link special characters
652
653            // Add any text before the special character
654            if next_special > 0 && next_special < remaining.len() {
655                elements.push(Element::Text(remaining[..next_special].to_string()));
656                remaining = &remaining[next_special..];
657            }
658
659            // Process the special element
660            match special_type {
661                "code" => {
662                    // Find end of code
663                    if let Some(code_end) = remaining[1..].find('`') {
664                        let code = &remaining[1..1 + code_end];
665                        elements.push(Element::Code(code.to_string()));
666                        remaining = &remaining[1 + code_end + 1..];
667                    } else {
668                        // No closing backtick, treat as text
669                        elements.push(Element::Text(remaining.to_string()));
670                        break;
671                    }
672                }
673                "bold" => {
674                    // Check for bold text
675                    if let Some(bold_end) = remaining[2..].find("**") {
676                        let bold_text = &remaining[2..2 + bold_end];
677                        elements.push(Element::Bold(bold_text.to_string()));
678                        remaining = &remaining[2 + bold_end + 2..];
679                    } else {
680                        // No closing **, treat as text
681                        elements.push(Element::Text("**".to_string()));
682                        remaining = &remaining[2..];
683                    }
684                }
685                "italic" => {
686                    // Check for italic text
687                    if let Some(italic_end) = remaining[1..].find('*') {
688                        let italic_text = &remaining[1..1 + italic_end];
689                        elements.push(Element::Italic(italic_text.to_string()));
690                        remaining = &remaining[1 + italic_end + 1..];
691                    } else {
692                        // No closing *, treat as text
693                        elements.push(Element::Text("*".to_string()));
694                        remaining = &remaining[1..];
695                    }
696                }
697                _ => {
698                    // No special elements found, add all remaining text
699                    elements.push(Element::Text(remaining.to_string()));
700                    break;
701                }
702            }
703        }
704    }
705
706    elements
707}
708
709/// Reflow elements for sentence-per-line mode
710fn reflow_elements_sentence_per_line(elements: &[Element]) -> Vec<String> {
711    let mut lines = Vec::new();
712    let mut current_line = String::new();
713
714    for element in elements.iter() {
715        let element_str = format!("{element}");
716
717        // For text elements, split into sentences
718        if let Element::Text(text) = element {
719            // Simply append text - it already has correct spacing from tokenization
720            let combined = format!("{current_line}{text}");
721            let sentences = split_into_sentences(&combined);
722
723            if sentences.len() > 1 {
724                // We found sentence boundaries
725                for (i, sentence) in sentences.iter().enumerate() {
726                    if i == 0 {
727                        // First sentence might continue from previous elements
728                        // But check if it ends with an abbreviation
729                        let trimmed = sentence.trim();
730                        let ends_with_sentence_punct =
731                            trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
732                        let ends_with_abbreviation = if ends_with_sentence_punct {
733                            // Strip the final punctuation before checking abbreviations
734                            let without_punct = trimmed
735                                .trim_end_matches('.')
736                                .trim_end_matches('!')
737                                .trim_end_matches('?');
738                            let ignored_words = [
739                                "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr",
740                                "Jr",
741                            ];
742                            ignored_words
743                                .iter()
744                                .any(|word| without_punct.to_lowercase().ends_with(&word.to_lowercase()))
745                        } else {
746                            false
747                        };
748
749                        if ends_with_abbreviation {
750                            // Don't emit yet - this sentence ends with abbreviation, continue accumulating
751                            current_line = sentence.to_string();
752                        } else {
753                            // Normal case - emit the first sentence
754                            lines.push(sentence.to_string());
755                            current_line.clear();
756                        }
757                    } else if i == sentences.len() - 1 {
758                        // Last sentence: check if it's complete or incomplete
759                        let trimmed = sentence.trim();
760                        let ends_with_sentence_punct =
761                            trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
762
763                        // Check if it ends with an abbreviation
764                        let ends_with_abbreviation = if ends_with_sentence_punct {
765                            // Strip the final punctuation before checking abbreviations
766                            let without_punct = trimmed
767                                .trim_end_matches('.')
768                                .trim_end_matches('!')
769                                .trim_end_matches('?');
770                            let ignored_words = [
771                                "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr",
772                                "Jr",
773                            ];
774                            ignored_words
775                                .iter()
776                                .any(|word| without_punct.to_lowercase().ends_with(&word.to_lowercase()))
777                        } else {
778                            false
779                        };
780
781                        if ends_with_sentence_punct && !ends_with_abbreviation {
782                            // Complete sentence - emit it immediately
783                            lines.push(sentence.to_string());
784                            current_line.clear();
785                        } else {
786                            // Incomplete sentence - save for next iteration
787                            current_line = sentence.to_string();
788                        }
789                    } else {
790                        // Complete sentences in the middle
791                        lines.push(sentence.to_string());
792                    }
793                }
794            } else {
795                // No sentence boundary found, continue accumulating
796                current_line = combined;
797            }
798        } else {
799            // Non-text elements (Code, Bold, Italic, etc.)
800            // Add space before element if needed (unless it's after an opening paren/bracket)
801            if !current_line.is_empty()
802                && !current_line.ends_with(' ')
803                && !current_line.ends_with('(')
804                && !current_line.ends_with('[')
805            {
806                current_line.push(' ');
807            }
808            current_line.push_str(&element_str);
809        }
810    }
811
812    // Add any remaining content
813    if !current_line.is_empty() {
814        lines.push(current_line.trim().to_string());
815    }
816    lines
817}
818
819/// Reflow elements into lines that fit within the line length
820fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
821    let mut lines = Vec::new();
822    let mut current_line = String::new();
823    let mut current_length = 0;
824
825    for element in elements {
826        let element_str = format!("{element}");
827        let element_len = element.len();
828
829        // For text elements that might need breaking
830        if let Element::Text(text) = element {
831            // If this is a text element, always process it word by word
832            let words: Vec<&str> = text.split_whitespace().collect();
833
834            for word in words {
835                let word_len = word.chars().count();
836                if current_length > 0 && current_length + 1 + word_len > options.line_length {
837                    // Start a new line
838                    lines.push(current_line.trim().to_string());
839                    current_line = word.to_string();
840                    current_length = word_len;
841                } else {
842                    // Add word to current line
843                    if current_length > 0 {
844                        current_line.push(' ');
845                        current_length += 1;
846                    }
847                    current_line.push_str(word);
848                    current_length += word_len;
849                }
850            }
851        } else {
852            // For non-text elements (code, links, references), treat as atomic units
853            // These should never be broken across lines
854            if current_length > 0 && current_length + 1 + element_len > options.line_length {
855                // Start a new line
856                lines.push(current_line.trim().to_string());
857                current_line = element_str;
858                current_length = element_len;
859            } else {
860                // Add element to current line
861                if current_length > 0 {
862                    current_line.push(' ');
863                    current_length += 1;
864                }
865                current_line.push_str(&element_str);
866                current_length += element_len;
867            }
868        }
869    }
870
871    // Don't forget the last line
872    if !current_line.is_empty() {
873        lines.push(current_line.trim_end().to_string());
874    }
875
876    lines
877}
878
879/// Reflow markdown content preserving structure
880pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
881    let lines: Vec<&str> = content.lines().collect();
882    let mut result = Vec::new();
883    let mut i = 0;
884
885    while i < lines.len() {
886        let line = lines[i];
887        let trimmed = line.trim();
888
889        // Preserve empty lines
890        if trimmed.is_empty() {
891            result.push(String::new());
892            i += 1;
893            continue;
894        }
895
896        // Preserve headings as-is
897        if trimmed.starts_with('#') {
898            result.push(line.to_string());
899            i += 1;
900            continue;
901        }
902
903        // Preserve fenced code blocks
904        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
905            result.push(line.to_string());
906            i += 1;
907            // Copy lines until closing fence
908            while i < lines.len() {
909                result.push(lines[i].to_string());
910                if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
911                    i += 1;
912                    break;
913                }
914                i += 1;
915            }
916            continue;
917        }
918
919        // Preserve indented code blocks (4+ spaces or 1+ tab)
920        if line.starts_with("    ") || line.starts_with("\t") {
921            // Collect all consecutive indented lines
922            result.push(line.to_string());
923            i += 1;
924            while i < lines.len() {
925                let next_line = lines[i];
926                // Continue if next line is also indented or empty (empty lines in code blocks are ok)
927                if next_line.starts_with("    ") || next_line.starts_with("\t") || next_line.trim().is_empty() {
928                    result.push(next_line.to_string());
929                    i += 1;
930                } else {
931                    break;
932                }
933            }
934            continue;
935        }
936
937        // Preserve block quotes (but reflow their content)
938        if trimmed.starts_with('>') {
939            let quote_prefix = line[0..line.find('>').unwrap() + 1].to_string();
940            let quote_content = &line[quote_prefix.len()..].trim_start();
941
942            let reflowed = reflow_line(quote_content, options);
943            for reflowed_line in reflowed.iter() {
944                result.push(format!("{quote_prefix} {reflowed_line}"));
945            }
946            i += 1;
947            continue;
948        }
949
950        // Preserve horizontal rules first (before checking for lists)
951        if is_horizontal_rule(trimmed) {
952            result.push(line.to_string());
953            i += 1;
954            continue;
955        }
956
957        // Preserve lists (but not horizontal rules)
958        if (trimmed.starts_with('-') && !is_horizontal_rule(trimmed))
959            || (trimmed.starts_with('*') && !is_horizontal_rule(trimmed))
960            || trimmed.starts_with('+')
961            || is_numbered_list_item(trimmed)
962        {
963            // Find the list marker and preserve indentation
964            let indent = line.len() - line.trim_start().len();
965            let indent_str = " ".repeat(indent);
966
967            // For numbered lists, find the period and the space after it
968            // For bullet lists, find the marker and the space after it
969            let mut marker_end = indent;
970            let mut content_start = indent;
971
972            if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
973                // Numbered list: find the period
974                if let Some(period_pos) = line[indent..].find('.') {
975                    marker_end = indent + period_pos + 1; // Include the period
976                    content_start = marker_end;
977                    // Skip any spaces after the period to find content start
978                    while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
979                        content_start += 1;
980                    }
981                }
982            } else {
983                // Bullet list: marker is single character
984                marker_end = indent + 1; // Just the marker character
985                content_start = marker_end;
986                // Skip any spaces after the marker
987                while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
988                    content_start += 1;
989                }
990            }
991
992            let marker = &line[indent..marker_end];
993
994            // Collect all content for this list item (including continuation lines)
995            // Preserve hard breaks (2 trailing spaces) while trimming excessive whitespace
996            let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
997            i += 1;
998
999            // Collect continuation lines (indented lines that are part of this list item)
1000            while i < lines.len() {
1001                let next_line = lines[i];
1002                let next_trimmed = next_line.trim();
1003
1004                // Stop if we hit an empty line or another list item or special block
1005                if next_trimmed.is_empty()
1006                    || next_trimmed.starts_with('#')
1007                    || next_trimmed.starts_with("```")
1008                    || next_trimmed.starts_with("~~~")
1009                    || next_trimmed.starts_with('>')
1010                    || next_trimmed.starts_with('|')
1011                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1012                    || is_horizontal_rule(next_trimmed)
1013                    || (next_trimmed.starts_with('-')
1014                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1015                    || (next_trimmed.starts_with('*')
1016                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1017                    || (next_trimmed.starts_with('+')
1018                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1019                    || is_numbered_list_item(next_trimmed)
1020                {
1021                    break;
1022                }
1023
1024                // Check if this line is indented (continuation of list item)
1025                let next_indent = next_line.len() - next_line.trim_start().len();
1026                if next_indent >= content_start {
1027                    // This is a continuation line - add its content
1028                    // Preserve hard breaks while trimming excessive whitespace
1029                    let trimmed_start = next_line.trim_start();
1030                    list_content.push(trim_preserving_hard_break(trimmed_start));
1031                    i += 1;
1032                } else {
1033                    // Not indented enough, not part of this list item
1034                    break;
1035                }
1036            }
1037
1038            // Join content, but respect hard breaks (lines ending with 2 spaces or backslash)
1039            // Hard breaks should prevent joining with the next line
1040            let combined_content = if options.preserve_breaks {
1041                list_content[0].clone()
1042            } else {
1043                // Check if any lines have hard breaks - if so, preserve the structure
1044                let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
1045                if has_hard_breaks {
1046                    // Don't join lines with hard breaks - keep them separate with newlines
1047                    list_content.join("\n")
1048                } else {
1049                    // No hard breaks, safe to join with spaces
1050                    list_content.join(" ")
1051                }
1052            };
1053
1054            // Calculate the proper indentation for continuation lines
1055            let trimmed_marker = marker;
1056            let continuation_spaces = content_start;
1057
1058            // Adjust line length to account for list marker and space
1059            let prefix_length = indent + trimmed_marker.len() + 1;
1060
1061            // Create adjusted options with reduced line length
1062            let adjusted_options = ReflowOptions {
1063                line_length: options.line_length.saturating_sub(prefix_length),
1064                ..options.clone()
1065            };
1066
1067            let reflowed = reflow_line(&combined_content, &adjusted_options);
1068            for (j, reflowed_line) in reflowed.iter().enumerate() {
1069                if j == 0 {
1070                    result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
1071                } else {
1072                    // Continuation lines aligned with text after marker
1073                    let continuation_indent = " ".repeat(continuation_spaces);
1074                    result.push(format!("{continuation_indent}{reflowed_line}"));
1075                }
1076            }
1077            continue;
1078        }
1079
1080        // Preserve tables
1081        if trimmed.contains('|') {
1082            result.push(line.to_string());
1083            i += 1;
1084            continue;
1085        }
1086
1087        // Preserve reference definitions
1088        if trimmed.starts_with('[') && line.contains("]:") {
1089            result.push(line.to_string());
1090            i += 1;
1091            continue;
1092        }
1093
1094        // Check if this is a single line that doesn't need processing
1095        let mut is_single_line_paragraph = true;
1096        if i + 1 < lines.len() {
1097            let next_line = lines[i + 1];
1098            let next_trimmed = next_line.trim();
1099            // Check if next line starts a new block
1100            if !next_trimmed.is_empty()
1101                && !next_trimmed.starts_with('#')
1102                && !next_trimmed.starts_with("```")
1103                && !next_trimmed.starts_with("~~~")
1104                && !next_trimmed.starts_with('>')
1105                && !next_trimmed.starts_with('|')
1106                && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
1107                && !is_horizontal_rule(next_trimmed)
1108                && !(next_trimmed.starts_with('-')
1109                    && !is_horizontal_rule(next_trimmed)
1110                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1111                && !(next_trimmed.starts_with('*')
1112                    && !is_horizontal_rule(next_trimmed)
1113                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1114                && !(next_trimmed.starts_with('+')
1115                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1116                && !is_numbered_list_item(next_trimmed)
1117            {
1118                is_single_line_paragraph = false;
1119            }
1120        }
1121
1122        // If it's a single line that fits, just add it as-is
1123        if is_single_line_paragraph && line.chars().count() <= options.line_length {
1124            result.push(line.to_string());
1125            i += 1;
1126            continue;
1127        }
1128
1129        // For regular paragraphs, collect consecutive lines
1130        let mut paragraph_parts = Vec::new();
1131        let mut current_part = vec![line];
1132        i += 1;
1133
1134        // If preserve_breaks is true, treat each line separately
1135        if options.preserve_breaks {
1136            // Don't collect consecutive lines - just reflow this single line
1137            let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
1138                Some("\\")
1139            } else if line.ends_with("  ") {
1140                Some("  ")
1141            } else {
1142                None
1143            };
1144            let reflowed = reflow_line(line, options);
1145
1146            // Preserve hard breaks (two trailing spaces or backslash)
1147            if let Some(break_marker) = hard_break_type {
1148                if !reflowed.is_empty() {
1149                    let mut reflowed_with_break = reflowed;
1150                    let last_idx = reflowed_with_break.len() - 1;
1151                    if !has_hard_break(&reflowed_with_break[last_idx]) {
1152                        reflowed_with_break[last_idx].push_str(break_marker);
1153                    }
1154                    result.extend(reflowed_with_break);
1155                }
1156            } else {
1157                result.extend(reflowed);
1158            }
1159        } else {
1160            // Original behavior: collect consecutive lines into a paragraph
1161            while i < lines.len() {
1162                let prev_line = if !current_part.is_empty() {
1163                    current_part.last().unwrap()
1164                } else {
1165                    ""
1166                };
1167                let next_line = lines[i];
1168                let next_trimmed = next_line.trim();
1169
1170                // Stop at empty lines or special blocks
1171                if next_trimmed.is_empty()
1172                    || next_trimmed.starts_with('#')
1173                    || next_trimmed.starts_with("```")
1174                    || next_trimmed.starts_with("~~~")
1175                    || next_trimmed.starts_with('>')
1176                    || next_trimmed.starts_with('|')
1177                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1178                    || is_horizontal_rule(next_trimmed)
1179                    || (next_trimmed.starts_with('-')
1180                        && !is_horizontal_rule(next_trimmed)
1181                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1182                    || (next_trimmed.starts_with('*')
1183                        && !is_horizontal_rule(next_trimmed)
1184                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1185                    || (next_trimmed.starts_with('+')
1186                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1187                    || is_numbered_list_item(next_trimmed)
1188                {
1189                    break;
1190                }
1191
1192                // Check if previous line ends with hard break (two spaces or backslash)
1193                if has_hard_break(prev_line) {
1194                    // Start a new part after hard break
1195                    paragraph_parts.push(current_part.join(" "));
1196                    current_part = vec![next_line];
1197                } else {
1198                    current_part.push(next_line);
1199                }
1200                i += 1;
1201            }
1202
1203            // Add the last part
1204            if !current_part.is_empty() {
1205                if current_part.len() == 1 {
1206                    // Single line, don't add trailing space
1207                    paragraph_parts.push(current_part[0].to_string());
1208                } else {
1209                    paragraph_parts.push(current_part.join(" "));
1210                }
1211            }
1212
1213            // Reflow each part separately, preserving hard breaks
1214            for (j, part) in paragraph_parts.iter().enumerate() {
1215                let reflowed = reflow_line(part, options);
1216                result.extend(reflowed);
1217
1218                // Preserve hard break by ensuring last line of part ends with hard break marker
1219                // Use two spaces as the default hard break format for reflows
1220                if j < paragraph_parts.len() - 1 && !result.is_empty() {
1221                    let last_idx = result.len() - 1;
1222                    if !has_hard_break(&result[last_idx]) {
1223                        result[last_idx].push_str("  ");
1224                    }
1225                }
1226            }
1227        }
1228    }
1229
1230    // Preserve trailing newline if the original content had one
1231    let result_text = result.join("\n");
1232    if content.ends_with('\n') && !result_text.ends_with('\n') {
1233        format!("{result_text}\n")
1234    } else {
1235        result_text
1236    }
1237}
1238
1239/// Information about a reflowed paragraph
1240#[derive(Debug, Clone)]
1241pub struct ParagraphReflow {
1242    /// Starting byte offset of the paragraph in the original content
1243    pub start_byte: usize,
1244    /// Ending byte offset of the paragraph in the original content
1245    pub end_byte: usize,
1246    /// The reflowed text for this paragraph
1247    pub reflowed_text: String,
1248}
1249
1250/// Reflow a single paragraph at the specified line number
1251///
1252/// This function finds the paragraph containing the given line number,
1253/// reflows it according to the specified line length, and returns
1254/// information about the paragraph location and its reflowed text.
1255///
1256/// # Arguments
1257///
1258/// * `content` - The full document content
1259/// * `line_number` - The 1-based line number within the paragraph to reflow
1260/// * `line_length` - The target line length for reflowing
1261///
1262/// # Returns
1263///
1264/// Returns `Some(ParagraphReflow)` if a paragraph was found and reflowed,
1265/// or `None` if the line number is out of bounds or the content at that
1266/// line shouldn't be reflowed (e.g., code blocks, headings, etc.)
1267pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
1268    if line_number == 0 {
1269        return None;
1270    }
1271
1272    let lines: Vec<&str> = content.lines().collect();
1273
1274    // Check if line number is valid (1-based)
1275    if line_number > lines.len() {
1276        return None;
1277    }
1278
1279    let target_idx = line_number - 1; // Convert to 0-based
1280    let target_line = lines[target_idx];
1281    let trimmed = target_line.trim();
1282
1283    // Don't reflow special blocks
1284    if trimmed.is_empty()
1285        || trimmed.starts_with('#')
1286        || trimmed.starts_with("```")
1287        || trimmed.starts_with("~~~")
1288        || target_line.starts_with("    ")
1289        || target_line.starts_with('\t')
1290        || trimmed.starts_with('>')
1291        || trimmed.contains('|') // Tables
1292        || (trimmed.starts_with('[') && target_line.contains("]:")) // Reference definitions
1293        || is_horizontal_rule(trimmed)
1294        || ((trimmed.starts_with('-') || trimmed.starts_with('*') || trimmed.starts_with('+'))
1295            && !is_horizontal_rule(trimmed)
1296            && (trimmed.len() == 1 || trimmed.chars().nth(1) == Some(' ')))
1297        || is_numbered_list_item(trimmed)
1298    {
1299        return None;
1300    }
1301
1302    // Find paragraph start - scan backward until blank line or special block
1303    let mut para_start = target_idx;
1304    while para_start > 0 {
1305        let prev_idx = para_start - 1;
1306        let prev_line = lines[prev_idx];
1307        let prev_trimmed = prev_line.trim();
1308
1309        // Stop at blank line or special blocks
1310        if prev_trimmed.is_empty()
1311            || prev_trimmed.starts_with('#')
1312            || prev_trimmed.starts_with("```")
1313            || prev_trimmed.starts_with("~~~")
1314            || prev_line.starts_with("    ")
1315            || prev_line.starts_with('\t')
1316            || prev_trimmed.starts_with('>')
1317            || prev_trimmed.contains('|')
1318            || (prev_trimmed.starts_with('[') && prev_line.contains("]:"))
1319            || is_horizontal_rule(prev_trimmed)
1320            || ((prev_trimmed.starts_with('-') || prev_trimmed.starts_with('*') || prev_trimmed.starts_with('+'))
1321                && !is_horizontal_rule(prev_trimmed)
1322                && (prev_trimmed.len() == 1 || prev_trimmed.chars().nth(1) == Some(' ')))
1323            || is_numbered_list_item(prev_trimmed)
1324        {
1325            break;
1326        }
1327
1328        para_start = prev_idx;
1329    }
1330
1331    // Find paragraph end - scan forward until blank line or special block
1332    let mut para_end = target_idx;
1333    while para_end + 1 < lines.len() {
1334        let next_idx = para_end + 1;
1335        let next_line = lines[next_idx];
1336        let next_trimmed = next_line.trim();
1337
1338        // Stop at blank line or special blocks
1339        if next_trimmed.is_empty()
1340            || next_trimmed.starts_with('#')
1341            || next_trimmed.starts_with("```")
1342            || next_trimmed.starts_with("~~~")
1343            || next_line.starts_with("    ")
1344            || next_line.starts_with('\t')
1345            || next_trimmed.starts_with('>')
1346            || next_trimmed.contains('|')
1347            || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1348            || is_horizontal_rule(next_trimmed)
1349            || ((next_trimmed.starts_with('-') || next_trimmed.starts_with('*') || next_trimmed.starts_with('+'))
1350                && !is_horizontal_rule(next_trimmed)
1351                && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1352            || is_numbered_list_item(next_trimmed)
1353        {
1354            break;
1355        }
1356
1357        para_end = next_idx;
1358    }
1359
1360    // Extract paragraph lines
1361    let paragraph_lines = &lines[para_start..=para_end];
1362
1363    // Calculate byte offsets
1364    let mut start_byte = 0;
1365    for line in lines.iter().take(para_start) {
1366        start_byte += line.len() + 1; // +1 for newline
1367    }
1368
1369    let mut end_byte = start_byte;
1370    for line in paragraph_lines.iter() {
1371        end_byte += line.len() + 1; // +1 for newline
1372    }
1373
1374    // Track whether the byte range includes a trailing newline
1375    // (it doesn't if this is the last line and the file doesn't end with newline)
1376    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
1377
1378    // Adjust end_byte if the last line doesn't have a newline
1379    if !includes_trailing_newline {
1380        end_byte -= 1;
1381    }
1382
1383    // Join paragraph lines and reflow
1384    let paragraph_text = paragraph_lines.join("\n");
1385
1386    // Create reflow options
1387    let options = ReflowOptions {
1388        line_length,
1389        break_on_sentences: true,
1390        preserve_breaks: false,
1391        sentence_per_line: false,
1392    };
1393
1394    // Reflow the paragraph using reflow_markdown to handle it properly
1395    let reflowed = reflow_markdown(&paragraph_text, &options);
1396
1397    // Ensure reflowed text matches whether the byte range includes a trailing newline
1398    // This is critical: if the range includes a newline, the replacement must too,
1399    // otherwise the next line will get appended to the reflowed paragraph
1400    let reflowed_text = if includes_trailing_newline {
1401        // Range includes newline - ensure reflowed text has one
1402        if reflowed.ends_with('\n') {
1403            reflowed
1404        } else {
1405            format!("{reflowed}\n")
1406        }
1407    } else {
1408        // Range doesn't include newline - ensure reflowed text doesn't have one
1409        if reflowed.ends_with('\n') {
1410            reflowed.trim_end_matches('\n').to_string()
1411        } else {
1412            reflowed
1413        }
1414    };
1415
1416    Some(ParagraphReflow {
1417        start_byte,
1418        end_byte,
1419        reflowed_text,
1420    })
1421}
1422
1423#[cfg(test)]
1424mod tests {
1425    use super::*;
1426
1427    #[test]
1428    fn test_list_item_trailing_whitespace_removal() {
1429        // Test for issue #76 - hard breaks (2 trailing spaces) should be preserved
1430        // and prevent reflowing
1431        let input = "1. First line with trailing spaces   \n    Second line with trailing spaces  \n    Third line\n";
1432
1433        let options = ReflowOptions {
1434            line_length: 999999,
1435            break_on_sentences: true, // MD013 uses true by default
1436            preserve_breaks: false,
1437            sentence_per_line: false,
1438        };
1439
1440        let result = reflow_markdown(input, &options);
1441
1442        // Should not contain 3+ consecutive spaces (which would indicate
1443        // trailing whitespace became mid-line whitespace)
1444        assert!(
1445            !result.contains("   "),
1446            "Result should not contain 3+ consecutive spaces: {result:?}"
1447        );
1448
1449        // Hard breaks should be preserved (exactly 2 trailing spaces)
1450        assert!(result.contains("  \n"), "Hard breaks should be preserved: {result:?}");
1451
1452        // Should NOT be reflowed into a single line because hard breaks are present
1453        // The content should maintain its line structure
1454        assert!(
1455            result.lines().count() >= 2,
1456            "Should have multiple lines (not reflowed due to hard breaks), got: {}",
1457            result.lines().count()
1458        );
1459    }
1460
1461    #[test]
1462    fn test_reflow_simple_text() {
1463        let options = ReflowOptions {
1464            line_length: 20,
1465            ..Default::default()
1466        };
1467
1468        let input = "This is a very long line that needs to be wrapped";
1469        let result = reflow_line(input, &options);
1470
1471        assert_eq!(result.len(), 3);
1472        assert!(result[0].chars().count() <= 20);
1473        assert!(result[1].chars().count() <= 20);
1474        assert!(result[2].chars().count() <= 20);
1475    }
1476
1477    #[test]
1478    fn test_preserve_inline_code() {
1479        let options = ReflowOptions {
1480            line_length: 30,
1481            ..Default::default()
1482        };
1483
1484        let result = reflow_line("This line has `inline code` that should be preserved", &options);
1485        // Verify inline code is not broken
1486        let joined = result.join(" ");
1487        assert!(joined.contains("`inline code`"));
1488    }
1489
1490    #[test]
1491    fn test_preserve_links() {
1492        let options = ReflowOptions {
1493            line_length: 40,
1494            ..Default::default()
1495        };
1496
1497        let text = "Check out [this link](https://example.com/very/long/url) for more info";
1498        let result = reflow_line(text, &options);
1499
1500        // Verify link is preserved intact
1501        let joined = result.join(" ");
1502        assert!(joined.contains("[this link](https://example.com/very/long/url)"));
1503    }
1504
1505    #[test]
1506    fn test_reference_link_patterns_fixed() {
1507        let options = ReflowOptions {
1508            line_length: 30,
1509            break_on_sentences: true,
1510            preserve_breaks: false,
1511            sentence_per_line: false,
1512        };
1513
1514        // Test cases that verify reference links are preserved as atomic units
1515        let test_cases = vec![
1516            // Reference link: [text][ref] - should be preserved intact
1517            ("Check out [text][ref] for details", vec!["[text][ref]"]),
1518            // Empty reference: [text][] - should be preserved intact
1519            ("See [text][] for info", vec!["[text][]"]),
1520            // Shortcut reference: [homepage] - should be preserved intact
1521            ("Visit [homepage] today", vec!["[homepage]"]),
1522            // Multiple reference links in one line
1523            (
1524                "Links: [first][ref1] and [second][ref2] here",
1525                vec!["[first][ref1]", "[second][ref2]"],
1526            ),
1527            // Mixed inline and reference links
1528            (
1529                "See [inline](url) and [reference][ref] links",
1530                vec!["[inline](url)", "[reference][ref]"],
1531            ),
1532        ];
1533
1534        for (input, expected_patterns) in test_cases {
1535            println!("\nTesting: {input}");
1536            let result = reflow_line(input, &options);
1537            let joined = result.join(" ");
1538            println!("Result:  {joined}");
1539
1540            // Verify all expected patterns are preserved
1541            for expected_pattern in expected_patterns {
1542                assert!(
1543                    joined.contains(expected_pattern),
1544                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1545                );
1546            }
1547
1548            // Verify no broken patterns exist (spaces inside brackets)
1549            assert!(
1550                !joined.contains("[ ") || !joined.contains("] ["),
1551                "Detected broken reference link pattern with spaces inside brackets in '{joined}'"
1552            );
1553        }
1554    }
1555
1556    #[test]
1557    fn test_sentence_detection_basic() {
1558        // Test basic sentence detection
1559        assert!(is_sentence_boundary("Hello. World", 5));
1560        assert!(is_sentence_boundary("Test! Another", 4));
1561        assert!(is_sentence_boundary("Question? Answer", 8));
1562
1563        // Test non-boundaries
1564        assert!(!is_sentence_boundary("Hello world", 5));
1565        assert!(!is_sentence_boundary("Test.com", 4));
1566        assert!(!is_sentence_boundary("3.14 pi", 1));
1567    }
1568
1569    #[test]
1570    fn test_sentence_detection_abbreviations() {
1571        // Common abbreviations should not be treated as sentence boundaries
1572        assert!(!is_sentence_boundary("Mr. Smith", 2));
1573        assert!(!is_sentence_boundary("Dr. Jones", 2));
1574        assert!(!is_sentence_boundary("e.g. example", 3));
1575        assert!(!is_sentence_boundary("i.e. that is", 3));
1576        assert!(!is_sentence_boundary("etc. items", 3));
1577
1578        // But sentence after abbreviation should be a boundary
1579        assert!(is_sentence_boundary("Mr. Smith arrived. Next sentence.", 17));
1580    }
1581
1582    #[test]
1583    fn test_split_into_sentences() {
1584        let text = "First sentence. Second sentence. Third one!";
1585        let sentences = split_into_sentences(text);
1586        assert_eq!(sentences.len(), 3);
1587        assert_eq!(sentences[0], "First sentence.");
1588        assert_eq!(sentences[1], "Second sentence.");
1589        assert_eq!(sentences[2], "Third one!");
1590
1591        // Test with abbreviations
1592        let text2 = "Mr. Smith met Dr. Jones.";
1593        let sentences2 = split_into_sentences(text2);
1594        assert_eq!(sentences2.len(), 1);
1595        assert_eq!(sentences2[0], "Mr. Smith met Dr. Jones.");
1596
1597        // Test single sentence
1598        let text3 = "This is a single sentence.";
1599        let sentences3 = split_into_sentences(text3);
1600        assert_eq!(sentences3.len(), 1);
1601        assert_eq!(sentences3[0], "This is a single sentence.");
1602    }
1603
1604    #[test]
1605    fn test_sentence_per_line_reflow() {
1606        let options = ReflowOptions {
1607            line_length: 80,
1608            break_on_sentences: true,
1609            preserve_breaks: false,
1610            sentence_per_line: true,
1611        };
1612
1613        // Test basic sentence splitting
1614        let input = "First sentence. Second sentence. Third sentence.";
1615        let result = reflow_line(input, &options);
1616        assert_eq!(result.len(), 3);
1617        assert_eq!(result[0], "First sentence.");
1618        assert_eq!(result[1], "Second sentence.");
1619        assert_eq!(result[2], "Third sentence.");
1620
1621        // Test with markdown elements
1622        let input2 = "This has **bold**. And [a link](url).";
1623        let result2 = reflow_line(input2, &options);
1624        assert_eq!(result2.len(), 2);
1625        assert_eq!(result2[0], "This has **bold**.");
1626        assert_eq!(result2[1], "And [a link](url).");
1627    }
1628
1629    #[test]
1630    fn test_sentence_per_line_with_backticks() {
1631        let options = ReflowOptions {
1632            line_length: 80,
1633            break_on_sentences: true,
1634            preserve_breaks: false,
1635            sentence_per_line: true,
1636        };
1637
1638        let input = "This sentence has `code` in it. And this has `more code` too.";
1639        let result = reflow_line(input, &options);
1640        assert_eq!(result.len(), 2);
1641        assert_eq!(result[0], "This sentence has `code` in it.");
1642        assert_eq!(result[1], "And this has `more code` too.");
1643    }
1644
1645    #[test]
1646    fn test_sentence_per_line_with_backticks_in_parens() {
1647        let options = ReflowOptions {
1648            line_length: 80,
1649            break_on_sentences: true,
1650            preserve_breaks: false,
1651            sentence_per_line: true,
1652        };
1653
1654        let input = "Configure in (`.rumdl.toml` or `pyproject.toml`). Next sentence.";
1655        let result = reflow_line(input, &options);
1656        assert_eq!(result.len(), 2);
1657        assert_eq!(result[0], "Configure in (`.rumdl.toml` or `pyproject.toml`).");
1658        assert_eq!(result[1], "Next sentence.");
1659    }
1660
1661    #[test]
1662    fn test_sentence_per_line_with_questions_exclamations() {
1663        let options = ReflowOptions {
1664            line_length: 80,
1665            break_on_sentences: true,
1666            preserve_breaks: false,
1667            sentence_per_line: true,
1668        };
1669
1670        let input = "Is this a question? Yes it is! And a statement.";
1671        let result = reflow_line(input, &options);
1672        assert_eq!(result.len(), 3);
1673        assert_eq!(result[0], "Is this a question?");
1674        assert_eq!(result[1], "Yes it is!");
1675        assert_eq!(result[2], "And a statement.");
1676    }
1677
1678    #[test]
1679    fn test_split_sentences_issue_124() {
1680        // Test the actual text from issue #124
1681        let text = "If you are sure that all data structures exposed in a `PyModule` are thread-safe, then pass `gil_used = false` as a parameter to the `pymodule` procedural macro declaring the module or call `PyModule::gil_used` on a `PyModule` instance.  For example:";
1682
1683        let sentences = split_into_sentences(text);
1684
1685        // This should detect 2 sentences:
1686        // 1. "If you are sure ... on a `PyModule` instance."
1687        // 2. "For example:"
1688        assert_eq!(sentences.len(), 2, "Should detect 2 sentences in the text");
1689    }
1690
1691    #[test]
1692    fn test_reference_link_edge_cases() {
1693        let options = ReflowOptions {
1694            line_length: 40,
1695            break_on_sentences: true,
1696            preserve_breaks: false,
1697            sentence_per_line: false,
1698        };
1699
1700        // Test cases for edge cases and potential conflicts
1701        let test_cases = vec![
1702            // Escaped brackets should be treated as regular text
1703            ("Text with \\[escaped\\] brackets", vec!["\\[escaped\\]"]),
1704            // Nested brackets in reference links
1705            (
1706                "Link [text with [nested] content][ref]",
1707                vec!["[text with [nested] content][ref]"],
1708            ),
1709            // Reference link followed by inline link
1710            (
1711                "First [ref][link] then [inline](url)",
1712                vec!["[ref][link]", "[inline](url)"],
1713            ),
1714            // Shortcut reference that might conflict with other patterns
1715            ("Array [0] and reference [link] here", vec!["[0]", "[link]"]),
1716            // Empty reference with complex text
1717            (
1718                "Complex [text with *emphasis*][] reference",
1719                vec!["[text with *emphasis*][]"],
1720            ),
1721        ];
1722
1723        for (input, expected_patterns) in test_cases {
1724            println!("\nTesting edge case: {input}");
1725            let result = reflow_line(input, &options);
1726            let joined = result.join(" ");
1727            println!("Result: {joined}");
1728
1729            // Verify all expected patterns are preserved
1730            for expected_pattern in expected_patterns {
1731                assert!(
1732                    joined.contains(expected_pattern),
1733                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1734                );
1735            }
1736        }
1737    }
1738
1739    #[test]
1740    fn test_reflow_with_emphasis() {
1741        let options = ReflowOptions {
1742            line_length: 25,
1743            ..Default::default()
1744        };
1745
1746        let result = reflow_line("This is *emphasized* and **strong** text that needs wrapping", &options);
1747
1748        // Verify emphasis markers are preserved
1749        let joined = result.join(" ");
1750        assert!(joined.contains("*emphasized*"));
1751        assert!(joined.contains("**strong**"));
1752    }
1753
1754    #[test]
1755    fn test_image_patterns_preserved() {
1756        let options = ReflowOptions {
1757            line_length: 30,
1758            ..Default::default()
1759        };
1760
1761        // Test cases for image patterns
1762        let test_cases = vec![
1763            // Inline image
1764            (
1765                "Check out ![alt text](image.png) for details",
1766                vec!["![alt text](image.png)"],
1767            ),
1768            // Reference image
1769            ("See ![image][ref] for info", vec!["![image][ref]"]),
1770            // Empty reference image
1771            ("Visit ![homepage][] today", vec!["![homepage][]"]),
1772            // Multiple images
1773            (
1774                "Images: ![first](a.png) and ![second][ref2]",
1775                vec!["![first](a.png)", "![second][ref2]"],
1776            ),
1777        ];
1778
1779        for (input, expected_patterns) in test_cases {
1780            println!("\nTesting: {input}");
1781            let result = reflow_line(input, &options);
1782            let joined = result.join(" ");
1783            println!("Result:  {joined}");
1784
1785            for expected_pattern in expected_patterns {
1786                assert!(
1787                    joined.contains(expected_pattern),
1788                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1789                );
1790            }
1791        }
1792    }
1793
1794    #[test]
1795    fn test_extended_markdown_patterns() {
1796        let options = ReflowOptions {
1797            line_length: 40,
1798            ..Default::default()
1799        };
1800
1801        let test_cases = vec![
1802            // Strikethrough
1803            ("Text with ~~strikethrough~~ preserved", vec!["~~strikethrough~~"]),
1804            // Wiki links
1805            (
1806                "Check [[wiki link]] and [[page|display]]",
1807                vec!["[[wiki link]]", "[[page|display]]"],
1808            ),
1809            // Math
1810            (
1811                "Inline $x^2 + y^2$ and display $$\\int f(x) dx$$",
1812                vec!["$x^2 + y^2$", "$$\\int f(x) dx$$"],
1813            ),
1814            // Emoji
1815            ("Use :smile: and :heart: emojis", vec![":smile:", ":heart:"]),
1816            // HTML tags
1817            (
1818                "Text with <span>tag</span> and <br/>",
1819                vec!["<span>", "</span>", "<br/>"],
1820            ),
1821            // HTML entities
1822            ("Non-breaking&nbsp;space and em&mdash;dash", vec!["&nbsp;", "&mdash;"]),
1823        ];
1824
1825        for (input, expected_patterns) in test_cases {
1826            let result = reflow_line(input, &options);
1827            let joined = result.join(" ");
1828
1829            for pattern in expected_patterns {
1830                assert!(
1831                    joined.contains(pattern),
1832                    "Expected '{pattern}' to be preserved in '{input}', but got '{joined}'"
1833                );
1834            }
1835        }
1836    }
1837
1838    #[test]
1839    fn test_complex_mixed_patterns() {
1840        let options = ReflowOptions {
1841            line_length: 50,
1842            ..Default::default()
1843        };
1844
1845        // Test that multiple pattern types work together
1846        let input = "Line with **bold**, `code`, [link](url), ![image](img), ~~strike~~, $math$, :emoji:, and <tag> all together";
1847        let result = reflow_line(input, &options);
1848        let joined = result.join(" ");
1849
1850        // All patterns should be preserved
1851        assert!(joined.contains("**bold**"));
1852        assert!(joined.contains("`code`"));
1853        assert!(joined.contains("[link](url)"));
1854        assert!(joined.contains("![image](img)"));
1855        assert!(joined.contains("~~strike~~"));
1856        assert!(joined.contains("$math$"));
1857        assert!(joined.contains(":emoji:"));
1858        assert!(joined.contains("<tag>"));
1859    }
1860
1861    #[test]
1862    fn test_footnote_patterns_preserved() {
1863        let options = ReflowOptions {
1864            line_length: 40,
1865            ..Default::default()
1866        };
1867
1868        let test_cases = vec![
1869            // Single footnote
1870            ("This has a footnote[^1] reference", vec!["[^1]"]),
1871            // Multiple footnotes
1872            ("Text with [^first] and [^second] notes", vec!["[^first]", "[^second]"]),
1873            // Long footnote name
1874            ("Reference to [^long-footnote-name] here", vec!["[^long-footnote-name]"]),
1875        ];
1876
1877        for (input, expected_patterns) in test_cases {
1878            let result = reflow_line(input, &options);
1879            let joined = result.join(" ");
1880
1881            for expected_pattern in expected_patterns {
1882                assert!(
1883                    joined.contains(expected_pattern),
1884                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1885                );
1886            }
1887        }
1888    }
1889
1890    #[test]
1891    fn test_reflow_markdown_numbered_lists() {
1892        // Test for issue #83: numbered lists with proper formatting
1893        let options = ReflowOptions {
1894            line_length: 50,
1895            ..Default::default()
1896        };
1897
1898        let content = r#"1. List `manifest` to find the manifest with the largest ID. Say it's `00000000000000000002.manifest` in this example.
18992. Short item
19003. Another long item that definitely exceeds the fifty character limit and needs wrapping"#;
1901
1902        let result = reflow_markdown(content, &options);
1903
1904        // Define exact expected output
1905        let expected = r#"1. List `manifest` to find the manifest with the
1906   largest ID. Say it's
1907   `00000000000000000002.manifest` in this
1908   example.
19092. Short item
19103. Another long item that definitely exceeds the
1911   fifty character limit and needs wrapping"#;
1912
1913        assert_eq!(
1914            result, expected,
1915            "Numbered lists should be reflowed with proper markers and indentation.\nExpected:\n{expected}\nGot:\n{result}"
1916        );
1917    }
1918
1919    #[test]
1920    fn test_reflow_markdown_bullet_lists() {
1921        let options = ReflowOptions {
1922            line_length: 40,
1923            ..Default::default()
1924        };
1925
1926        let content = r#"- First bullet point with a very long line that needs wrapping
1927* Second bullet using asterisk
1928+ Third bullet using plus sign
1929- Short one"#;
1930
1931        let result = reflow_markdown(content, &options);
1932
1933        // Define exact expected output - each bullet type preserved with proper indentation
1934        let expected = r#"- First bullet point with a very long
1935  line that needs wrapping
1936* Second bullet using asterisk
1937+ Third bullet using plus sign
1938- Short one"#;
1939
1940        assert_eq!(
1941            result, expected,
1942            "Bullet lists should preserve markers and indent continuations with 2 spaces.\nExpected:\n{expected}\nGot:\n{result}"
1943        );
1944    }
1945
1946    #[test]
1947    fn test_ie_abbreviation_split_debug() {
1948        let input = "This results in extracting directly from the input object, i.e. `obj.extract()`, rather than trying to access an item or attribute.";
1949
1950        let options = ReflowOptions {
1951            line_length: 80,
1952            break_on_sentences: true,
1953            preserve_breaks: false,
1954            sentence_per_line: true,
1955        };
1956
1957        let result = reflow_line(input, &options);
1958
1959        // Should be 1 sentence, not split after "i.e."
1960        assert_eq!(result.len(), 1, "Should not split after i.e. abbreviation");
1961    }
1962
1963    #[test]
1964    fn test_ie_abbreviation_paragraph() {
1965        // Test the full paragraph from the file that's causing the issue
1966        let input = "The `pyo3(transparent)` attribute can be used on structs with exactly one field.\nThis results in extracting directly from the input object, i.e. `obj.extract()`, rather than trying to access an item or attribute.\nThis behaviour is enabled per default for newtype structs and tuple-variants with a single field.";
1967
1968        let options = ReflowOptions {
1969            line_length: 80,
1970            break_on_sentences: true,
1971            preserve_breaks: false,
1972            sentence_per_line: true,
1973        };
1974
1975        let result = reflow_markdown(input, &options);
1976
1977        // Should be 3 sentences, not 4 (shouldn't split after "i.e.")
1978        let line_count = result.lines().count();
1979        assert_eq!(line_count, 3, "Should have 3 sentences, not {line_count}");
1980
1981        // Verify the second line contains the full sentence
1982        let lines: Vec<&str> = result.lines().collect();
1983        assert!(lines.len() >= 2, "Should have at least 2 lines");
1984        assert!(lines[1].contains("i.e."), "Second line should contain 'i.e.'");
1985        assert!(
1986            lines[1].contains("`obj.extract()`"),
1987            "Second line should contain the code span"
1988        );
1989        assert!(
1990            lines[1].contains("attribute."),
1991            "Second line should end with 'attribute.'"
1992        );
1993    }
1994}