rumdl_lib/utils/
text_reflow.rs

1//! Text reflow utilities for MD013
2//!
3//! This module implements text wrapping/reflow functionality that preserves
4//! Markdown elements like links, emphasis, code spans, etc.
5
6use crate::utils::is_definition_list_item;
7use crate::utils::regex_cache::{
8    DISPLAY_MATH_REGEX, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
9    INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, REF_IMAGE_REGEX, REF_LINK_REGEX,
10    SHORTCUT_REF_REGEX, STRIKETHROUGH_FANCY_REGEX, WIKI_LINK_REGEX,
11};
12/// Options for reflowing text
13#[derive(Clone)]
14pub struct ReflowOptions {
15    /// Target line length
16    pub line_length: usize,
17    /// Whether to break on sentence boundaries when possible
18    pub break_on_sentences: bool,
19    /// Whether to preserve existing line breaks in paragraphs
20    pub preserve_breaks: bool,
21    /// Whether to enforce one sentence per line
22    pub sentence_per_line: bool,
23}
24
25impl Default for ReflowOptions {
26    fn default() -> Self {
27        Self {
28            line_length: 80,
29            break_on_sentences: true,
30            preserve_breaks: false,
31            sentence_per_line: false,
32        }
33    }
34}
35
36/// Detect if a character position is a sentence boundary
37/// Based on the approach from github.com/JoshuaKGoldberg/sentences-per-line
38fn is_sentence_boundary(text: &str, pos: usize) -> bool {
39    let chars: Vec<char> = text.chars().collect();
40
41    if pos + 1 >= chars.len() {
42        return false;
43    }
44
45    // Check for sentence-ending punctuation
46    let c = chars[pos];
47    if c != '.' && c != '!' && c != '?' {
48        return false;
49    }
50
51    // Must be followed by at least one space
52    if chars[pos + 1] != ' ' {
53        return false;
54    }
55
56    // Skip all whitespace after the punctuation to find the start of the next sentence
57    let mut next_char_pos = pos + 2;
58    while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
59        next_char_pos += 1;
60    }
61
62    // Check if we reached the end of the string
63    if next_char_pos >= chars.len() {
64        return false;
65    }
66
67    // Next character after space(s) must be uppercase (new sentence indicator)
68    if !chars[next_char_pos].is_uppercase() {
69        return false;
70    }
71
72    // Look back to check for common abbreviations
73    if pos > 0 {
74        // Abbreviation list similar to sentences-per-line
75        let prev_word = &text[..pos];
76        let ignored_words = [
77            "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr", "Jr",
78        ];
79        for word in &ignored_words {
80            if prev_word.to_lowercase().ends_with(&word.to_lowercase()) {
81                return false;
82            }
83        }
84
85        // Check for decimal numbers (e.g., "3.14")
86        // Make sure to check if next_char_pos is within bounds
87        if pos > 0 && chars[pos - 1].is_numeric() && next_char_pos < chars.len() && chars[next_char_pos].is_numeric() {
88            return false;
89        }
90    }
91    true
92}
93
94/// Split text into sentences
95pub fn split_into_sentences(text: &str) -> Vec<String> {
96    let mut sentences = Vec::new();
97    let mut current_sentence = String::new();
98    let mut chars = text.chars().peekable();
99    let mut pos = 0;
100
101    while let Some(c) = chars.next() {
102        current_sentence.push(c);
103
104        if is_sentence_boundary(text, pos) {
105            // Include the space after sentence if it exists
106            if chars.peek() == Some(&' ') {
107                chars.next();
108                pos += 1;
109            }
110            sentences.push(current_sentence.trim().to_string());
111            current_sentence.clear();
112        }
113
114        pos += 1;
115    }
116
117    // Add any remaining text as the last sentence
118    if !current_sentence.trim().is_empty() {
119        sentences.push(current_sentence.trim().to_string());
120    }
121    sentences
122}
123
124/// Check if a line is a horizontal rule (---, ___, ***)
125fn is_horizontal_rule(line: &str) -> bool {
126    if line.len() < 3 {
127        return false;
128    }
129
130    // Check if line consists only of -, _, or * characters (at least 3)
131    let chars: Vec<char> = line.chars().collect();
132    if chars.is_empty() {
133        return false;
134    }
135
136    let first_char = chars[0];
137    if first_char != '-' && first_char != '_' && first_char != '*' {
138        return false;
139    }
140
141    // All characters should be the same (allowing spaces between)
142    for c in &chars {
143        if *c != first_char && *c != ' ' {
144            return false;
145        }
146    }
147
148    // Count non-space characters
149    let non_space_count = chars.iter().filter(|c| **c != ' ').count();
150    non_space_count >= 3
151}
152
153/// Check if a line is a numbered list item (e.g., "1. ", "10. ")
154fn is_numbered_list_item(line: &str) -> bool {
155    let mut chars = line.chars();
156
157    // Must start with a digit
158    if !chars.next().is_some_and(|c| c.is_numeric()) {
159        return false;
160    }
161
162    // Can have more digits
163    while let Some(c) = chars.next() {
164        if c == '.' {
165            // After period, must have a space or be end of line
166            return chars.next().is_none_or(|c| c == ' ');
167        }
168        if !c.is_numeric() {
169            return false;
170        }
171    }
172
173    false
174}
175
176/// Check if a line ends with a hard break (either two spaces or backslash)
177///
178/// CommonMark supports two formats for hard line breaks:
179/// 1. Two or more trailing spaces
180/// 2. A backslash at the end of the line
181fn has_hard_break(line: &str) -> bool {
182    let line = line.strip_suffix('\r').unwrap_or(line);
183    line.ends_with("  ") || line.ends_with('\\')
184}
185
186/// Trim trailing whitespace while preserving hard breaks (two trailing spaces or backslash)
187///
188/// Hard breaks in Markdown can be indicated by:
189/// 1. Two trailing spaces before a newline (traditional)
190/// 2. A backslash at the end of the line (mdformat style)
191fn trim_preserving_hard_break(s: &str) -> String {
192    // Strip trailing \r from CRLF line endings first to handle Windows files
193    let s = s.strip_suffix('\r').unwrap_or(s);
194
195    // Check for backslash hard break (mdformat style)
196    if s.ends_with('\\') {
197        // Preserve the backslash exactly as-is
198        return s.to_string();
199    }
200
201    // Check if there are at least 2 trailing spaces (traditional hard break)
202    if s.ends_with("  ") {
203        // Find the position where non-space content ends
204        let content_end = s.trim_end().len();
205        if content_end == 0 {
206            // String is all whitespace
207            return String::new();
208        }
209        // Preserve exactly 2 trailing spaces for hard break
210        format!("{}  ", &s[..content_end])
211    } else {
212        // No hard break, just trim all trailing whitespace
213        s.trim_end().to_string()
214    }
215}
216
217pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
218    // For sentence-per-line mode, always process regardless of length
219    if options.sentence_per_line {
220        let elements = parse_markdown_elements(line);
221        return reflow_elements_sentence_per_line(&elements);
222    }
223
224    // Quick check: if line is already short enough, return as-is
225    if line.chars().count() <= options.line_length {
226        return vec![line.to_string()];
227    }
228
229    // Parse the markdown to identify elements
230    let elements = parse_markdown_elements(line);
231
232    // Reflow the elements into lines
233    reflow_elements(&elements, options)
234}
235
236/// Represents a piece of content in the markdown
237#[derive(Debug, Clone)]
238enum Element {
239    /// Plain text that can be wrapped
240    Text(String),
241    /// A complete markdown inline link [text](url)
242    Link { text: String, url: String },
243    /// A complete markdown reference link [text][ref]
244    ReferenceLink { text: String, reference: String },
245    /// A complete markdown empty reference link [text][]
246    EmptyReferenceLink { text: String },
247    /// A complete markdown shortcut reference link [ref]
248    ShortcutReference { reference: String },
249    /// A complete markdown inline image ![alt](url)
250    InlineImage { alt: String, url: String },
251    /// A complete markdown reference image ![alt][ref]
252    ReferenceImage { alt: String, reference: String },
253    /// A complete markdown empty reference image ![alt][]
254    EmptyReferenceImage { alt: String },
255    /// Footnote reference [^note]
256    FootnoteReference { note: String },
257    /// Strikethrough text ~~text~~
258    Strikethrough(String),
259    /// Wiki-style link [[wiki]] or [[wiki|text]]
260    WikiLink(String),
261    /// Inline math $math$
262    InlineMath(String),
263    /// Display math $$math$$
264    DisplayMath(String),
265    /// Emoji shortcode :emoji:
266    EmojiShortcode(String),
267    /// HTML tag <tag> or </tag> or <tag/>
268    HtmlTag(String),
269    /// HTML entity &nbsp; or &#123;
270    HtmlEntity(String),
271    /// Inline code `code`
272    Code(String),
273    /// Bold text **text**
274    Bold(String),
275    /// Italic text *text*
276    Italic(String),
277}
278
279impl std::fmt::Display for Element {
280    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
281        match self {
282            Element::Text(s) => write!(f, "{s}"),
283            Element::Link { text, url } => write!(f, "[{text}]({url})"),
284            Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
285            Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
286            Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
287            Element::InlineImage { alt, url } => write!(f, "![{alt}]({url})"),
288            Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
289            Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
290            Element::FootnoteReference { note } => write!(f, "[^{note}]"),
291            Element::Strikethrough(s) => write!(f, "~~{s}~~"),
292            Element::WikiLink(s) => write!(f, "[[{s}]]"),
293            Element::InlineMath(s) => write!(f, "${s}$"),
294            Element::DisplayMath(s) => write!(f, "$${s}$$"),
295            Element::EmojiShortcode(s) => write!(f, ":{s}:"),
296            Element::HtmlTag(s) => write!(f, "{s}"),
297            Element::HtmlEntity(s) => write!(f, "{s}"),
298            Element::Code(s) => write!(f, "`{s}`"),
299            Element::Bold(s) => write!(f, "**{s}**"),
300            Element::Italic(s) => write!(f, "*{s}*"),
301        }
302    }
303}
304
305impl Element {
306    fn len(&self) -> usize {
307        match self {
308            Element::Text(s) => s.chars().count(),
309            Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, // [text](url)
310            Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, // [text][ref]
311            Element::EmptyReferenceLink { text } => text.chars().count() + 4, // [text][]
312            Element::ShortcutReference { reference } => reference.chars().count() + 2, // [ref]
313            Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, // ![alt](url)
314            Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, // ![alt][ref]
315            Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, // ![alt][]
316            Element::FootnoteReference { note } => note.chars().count() + 3, // [^note]
317            Element::Strikethrough(s) => s.chars().count() + 4,              // ~~text~~
318            Element::WikiLink(s) => s.chars().count() + 4,                   // [[wiki]]
319            Element::InlineMath(s) => s.chars().count() + 2,                 // $math$
320            Element::DisplayMath(s) => s.chars().count() + 4,                // $$math$$
321            Element::EmojiShortcode(s) => s.chars().count() + 2,             // :emoji:
322            Element::HtmlTag(s) => s.chars().count(),                        // <tag> - already includes brackets
323            Element::HtmlEntity(s) => s.chars().count(),                     // &nbsp; - already complete
324            Element::Code(s) => s.chars().count() + 2,                       // `code`
325            Element::Bold(s) => s.chars().count() + 4,                       // **text**
326            Element::Italic(s) => s.chars().count() + 2,                     // *text*
327        }
328    }
329}
330
331/// Parse markdown elements from text preserving the raw syntax
332///
333/// Detection order is critical:
334/// 1. Inline links [text](url) - must be detected first to avoid conflicts
335/// 2. Reference links [text][ref] - detected before shortcut references
336/// 3. Empty reference links [text][] - a special case of reference links
337/// 4. Shortcut reference links [ref] - detected last to avoid false positives
338/// 5. Other elements (code, bold, italic) - processed normally
339fn parse_markdown_elements(text: &str) -> Vec<Element> {
340    let mut elements = Vec::new();
341    let mut remaining = text;
342
343    while !remaining.is_empty() {
344        // Find the earliest occurrence of any markdown pattern
345        let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
346
347        // Check for images first (they start with ! so should be detected before links)
348        // Inline images - ![alt](url)
349        if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
350            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
351        {
352            earliest_match = Some((m.start(), "inline_image", m));
353        }
354
355        // Reference images - ![alt][ref]
356        if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
357            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
358        {
359            earliest_match = Some((m.start(), "ref_image", m));
360        }
361
362        // Check for footnote references - [^note]
363        if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
364            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
365        {
366            earliest_match = Some((m.start(), "footnote_ref", m));
367        }
368
369        // Check for inline links - [text](url)
370        if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
371            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
372        {
373            earliest_match = Some((m.start(), "inline_link", m));
374        }
375
376        // Check for reference links - [text][ref]
377        if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
378            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
379        {
380            earliest_match = Some((m.start(), "ref_link", m));
381        }
382
383        // Check for shortcut reference links - [ref]
384        // Only check if we haven't found an earlier pattern that would conflict
385        if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
386            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
387        {
388            earliest_match = Some((m.start(), "shortcut_ref", m));
389        }
390
391        // Check for wiki-style links - [[wiki]]
392        if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
393            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
394        {
395            earliest_match = Some((m.start(), "wiki_link", m));
396        }
397
398        // Check for display math first (before inline) - $$math$$
399        if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
400            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
401        {
402            earliest_match = Some((m.start(), "display_math", m));
403        }
404
405        // Check for inline math - $math$
406        if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
407            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
408        {
409            earliest_match = Some((m.start(), "inline_math", m));
410        }
411
412        // Check for strikethrough - ~~text~~
413        if let Ok(Some(m)) = STRIKETHROUGH_FANCY_REGEX.find(remaining)
414            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
415        {
416            earliest_match = Some((m.start(), "strikethrough", m));
417        }
418
419        // Check for emoji shortcodes - :emoji:
420        if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
421            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
422        {
423            earliest_match = Some((m.start(), "emoji", m));
424        }
425
426        // Check for HTML entities - &nbsp; etc
427        if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
428            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
429        {
430            earliest_match = Some((m.start(), "html_entity", m));
431        }
432
433        // Check for HTML tags - <tag> </tag> <tag/>
434        // But exclude autolinks like <https://...> or <mailto:...>
435        if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
436            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
437        {
438            // Check if this is an autolink (starts with protocol or mailto:)
439            let matched_text = &remaining[m.start()..m.end()];
440            let is_autolink = matched_text.starts_with("<http://")
441                || matched_text.starts_with("<https://")
442                || matched_text.starts_with("<mailto:")
443                || matched_text.starts_with("<ftp://")
444                || matched_text.starts_with("<ftps://");
445
446            if !is_autolink {
447                earliest_match = Some((m.start(), "html_tag", m));
448            }
449        }
450
451        // Find earliest non-link special characters
452        let mut next_special = remaining.len();
453        let mut special_type = "";
454
455        if let Some(pos) = remaining.find('`')
456            && pos < next_special
457        {
458            next_special = pos;
459            special_type = "code";
460        }
461        if let Some(pos) = remaining.find("**")
462            && pos < next_special
463        {
464            next_special = pos;
465            special_type = "bold";
466        }
467        if let Some(pos) = remaining.find('*')
468            && pos < next_special
469            && !remaining[pos..].starts_with("**")
470        {
471            next_special = pos;
472            special_type = "italic";
473        }
474
475        // Determine which pattern to process first
476        let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
477            pos < next_special
478        } else {
479            false
480        };
481
482        if should_process_markdown_link {
483            let (pos, pattern_type, match_obj) = earliest_match.unwrap();
484
485            // Add any text before the match
486            if pos > 0 {
487                elements.push(Element::Text(remaining[..pos].to_string()));
488            }
489
490            // Process the matched pattern
491            match pattern_type {
492                "inline_image" => {
493                    if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
494                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
495                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
496                        elements.push(Element::InlineImage {
497                            alt: alt.to_string(),
498                            url: url.to_string(),
499                        });
500                        remaining = &remaining[match_obj.end()..];
501                    } else {
502                        elements.push(Element::Text("!".to_string()));
503                        remaining = &remaining[1..];
504                    }
505                }
506                "ref_image" => {
507                    if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
508                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
509                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
510
511                        if reference.is_empty() {
512                            elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
513                        } else {
514                            elements.push(Element::ReferenceImage {
515                                alt: alt.to_string(),
516                                reference: reference.to_string(),
517                            });
518                        }
519                        remaining = &remaining[match_obj.end()..];
520                    } else {
521                        elements.push(Element::Text("!".to_string()));
522                        remaining = &remaining[1..];
523                    }
524                }
525                "footnote_ref" => {
526                    if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
527                        let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
528                        elements.push(Element::FootnoteReference { note: note.to_string() });
529                        remaining = &remaining[match_obj.end()..];
530                    } else {
531                        elements.push(Element::Text("[".to_string()));
532                        remaining = &remaining[1..];
533                    }
534                }
535                "inline_link" => {
536                    if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
537                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
538                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
539                        elements.push(Element::Link {
540                            text: text.to_string(),
541                            url: url.to_string(),
542                        });
543                        remaining = &remaining[match_obj.end()..];
544                    } else {
545                        // Fallback - shouldn't happen
546                        elements.push(Element::Text("[".to_string()));
547                        remaining = &remaining[1..];
548                    }
549                }
550                "ref_link" => {
551                    if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
552                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
553                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
554
555                        if reference.is_empty() {
556                            // Empty reference link [text][]
557                            elements.push(Element::EmptyReferenceLink { text: text.to_string() });
558                        } else {
559                            // Regular reference link [text][ref]
560                            elements.push(Element::ReferenceLink {
561                                text: text.to_string(),
562                                reference: reference.to_string(),
563                            });
564                        }
565                        remaining = &remaining[match_obj.end()..];
566                    } else {
567                        // Fallback - shouldn't happen
568                        elements.push(Element::Text("[".to_string()));
569                        remaining = &remaining[1..];
570                    }
571                }
572                "shortcut_ref" => {
573                    if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
574                        let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
575                        elements.push(Element::ShortcutReference {
576                            reference: reference.to_string(),
577                        });
578                        remaining = &remaining[match_obj.end()..];
579                    } else {
580                        // Fallback - shouldn't happen
581                        elements.push(Element::Text("[".to_string()));
582                        remaining = &remaining[1..];
583                    }
584                }
585                "wiki_link" => {
586                    if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
587                        let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
588                        elements.push(Element::WikiLink(content.to_string()));
589                        remaining = &remaining[match_obj.end()..];
590                    } else {
591                        elements.push(Element::Text("[[".to_string()));
592                        remaining = &remaining[2..];
593                    }
594                }
595                "display_math" => {
596                    if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
597                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
598                        elements.push(Element::DisplayMath(math.to_string()));
599                        remaining = &remaining[match_obj.end()..];
600                    } else {
601                        elements.push(Element::Text("$$".to_string()));
602                        remaining = &remaining[2..];
603                    }
604                }
605                "inline_math" => {
606                    if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
607                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
608                        elements.push(Element::InlineMath(math.to_string()));
609                        remaining = &remaining[match_obj.end()..];
610                    } else {
611                        elements.push(Element::Text("$".to_string()));
612                        remaining = &remaining[1..];
613                    }
614                }
615                "strikethrough" => {
616                    if let Ok(Some(caps)) = STRIKETHROUGH_FANCY_REGEX.captures(remaining) {
617                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
618                        elements.push(Element::Strikethrough(text.to_string()));
619                        remaining = &remaining[match_obj.end()..];
620                    } else {
621                        elements.push(Element::Text("~~".to_string()));
622                        remaining = &remaining[2..];
623                    }
624                }
625                "emoji" => {
626                    if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
627                        let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
628                        elements.push(Element::EmojiShortcode(emoji.to_string()));
629                        remaining = &remaining[match_obj.end()..];
630                    } else {
631                        elements.push(Element::Text(":".to_string()));
632                        remaining = &remaining[1..];
633                    }
634                }
635                "html_entity" => {
636                    // HTML entities are captured whole
637                    elements.push(Element::HtmlEntity(remaining[..match_obj.end()].to_string()));
638                    remaining = &remaining[match_obj.end()..];
639                }
640                "html_tag" => {
641                    // HTML tags are captured whole
642                    elements.push(Element::HtmlTag(remaining[..match_obj.end()].to_string()));
643                    remaining = &remaining[match_obj.end()..];
644                }
645                _ => {
646                    // Unknown pattern, treat as text
647                    elements.push(Element::Text("[".to_string()));
648                    remaining = &remaining[1..];
649                }
650            }
651        } else {
652            // Process non-link special characters
653
654            // Add any text before the special character
655            if next_special > 0 && next_special < remaining.len() {
656                elements.push(Element::Text(remaining[..next_special].to_string()));
657                remaining = &remaining[next_special..];
658            }
659
660            // Process the special element
661            match special_type {
662                "code" => {
663                    // Find end of code
664                    if let Some(code_end) = remaining[1..].find('`') {
665                        let code = &remaining[1..1 + code_end];
666                        elements.push(Element::Code(code.to_string()));
667                        remaining = &remaining[1 + code_end + 1..];
668                    } else {
669                        // No closing backtick, treat as text
670                        elements.push(Element::Text(remaining.to_string()));
671                        break;
672                    }
673                }
674                "bold" => {
675                    // Check for bold text
676                    if let Some(bold_end) = remaining[2..].find("**") {
677                        let bold_text = &remaining[2..2 + bold_end];
678                        elements.push(Element::Bold(bold_text.to_string()));
679                        remaining = &remaining[2 + bold_end + 2..];
680                    } else {
681                        // No closing **, treat as text
682                        elements.push(Element::Text("**".to_string()));
683                        remaining = &remaining[2..];
684                    }
685                }
686                "italic" => {
687                    // Check for italic text
688                    if let Some(italic_end) = remaining[1..].find('*') {
689                        let italic_text = &remaining[1..1 + italic_end];
690                        elements.push(Element::Italic(italic_text.to_string()));
691                        remaining = &remaining[1 + italic_end + 1..];
692                    } else {
693                        // No closing *, treat as text
694                        elements.push(Element::Text("*".to_string()));
695                        remaining = &remaining[1..];
696                    }
697                }
698                _ => {
699                    // No special elements found, add all remaining text
700                    elements.push(Element::Text(remaining.to_string()));
701                    break;
702                }
703            }
704        }
705    }
706
707    elements
708}
709
710/// Reflow elements for sentence-per-line mode
711fn reflow_elements_sentence_per_line(elements: &[Element]) -> Vec<String> {
712    let mut lines = Vec::new();
713    let mut current_line = String::new();
714
715    for element in elements.iter() {
716        let element_str = format!("{element}");
717
718        // For text elements, split into sentences
719        if let Element::Text(text) = element {
720            // Simply append text - it already has correct spacing from tokenization
721            let combined = format!("{current_line}{text}");
722            let sentences = split_into_sentences(&combined);
723
724            if sentences.len() > 1 {
725                // We found sentence boundaries
726                for (i, sentence) in sentences.iter().enumerate() {
727                    if i == 0 {
728                        // First sentence might continue from previous elements
729                        // But check if it ends with an abbreviation
730                        let trimmed = sentence.trim();
731                        let ends_with_sentence_punct =
732                            trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
733                        let ends_with_abbreviation = if ends_with_sentence_punct {
734                            // Strip the final punctuation before checking abbreviations
735                            let without_punct = trimmed
736                                .trim_end_matches('.')
737                                .trim_end_matches('!')
738                                .trim_end_matches('?');
739                            let ignored_words = [
740                                "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr",
741                                "Jr",
742                            ];
743                            ignored_words
744                                .iter()
745                                .any(|word| without_punct.to_lowercase().ends_with(&word.to_lowercase()))
746                        } else {
747                            false
748                        };
749
750                        if ends_with_abbreviation {
751                            // Don't emit yet - this sentence ends with abbreviation, continue accumulating
752                            current_line = sentence.to_string();
753                        } else {
754                            // Normal case - emit the first sentence
755                            lines.push(sentence.to_string());
756                            current_line.clear();
757                        }
758                    } else if i == sentences.len() - 1 {
759                        // Last sentence: check if it's complete or incomplete
760                        let trimmed = sentence.trim();
761                        let ends_with_sentence_punct =
762                            trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
763
764                        // Check if it ends with an abbreviation
765                        let ends_with_abbreviation = if ends_with_sentence_punct {
766                            // Strip the final punctuation before checking abbreviations
767                            let without_punct = trimmed
768                                .trim_end_matches('.')
769                                .trim_end_matches('!')
770                                .trim_end_matches('?');
771                            let ignored_words = [
772                                "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr",
773                                "Jr",
774                            ];
775                            ignored_words
776                                .iter()
777                                .any(|word| without_punct.to_lowercase().ends_with(&word.to_lowercase()))
778                        } else {
779                            false
780                        };
781
782                        if ends_with_sentence_punct && !ends_with_abbreviation {
783                            // Complete sentence - emit it immediately
784                            lines.push(sentence.to_string());
785                            current_line.clear();
786                        } else {
787                            // Incomplete sentence - save for next iteration
788                            current_line = sentence.to_string();
789                        }
790                    } else {
791                        // Complete sentences in the middle
792                        lines.push(sentence.to_string());
793                    }
794                }
795            } else {
796                // No sentence boundary found, continue accumulating
797                current_line = combined;
798            }
799        } else {
800            // Non-text elements (Code, Bold, Italic, etc.)
801            // Add space before element if needed (unless it's after an opening paren/bracket)
802            if !current_line.is_empty()
803                && !current_line.ends_with(' ')
804                && !current_line.ends_with('(')
805                && !current_line.ends_with('[')
806            {
807                current_line.push(' ');
808            }
809            current_line.push_str(&element_str);
810        }
811    }
812
813    // Add any remaining content
814    if !current_line.is_empty() {
815        lines.push(current_line.trim().to_string());
816    }
817    lines
818}
819
820/// Reflow elements into lines that fit within the line length
821fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
822    let mut lines = Vec::new();
823    let mut current_line = String::new();
824    let mut current_length = 0;
825
826    for element in elements {
827        let element_str = format!("{element}");
828        let element_len = element.len();
829
830        // For text elements that might need breaking
831        if let Element::Text(text) = element {
832            // If this is a text element, always process it word by word
833            let words: Vec<&str> = text.split_whitespace().collect();
834
835            for word in words {
836                let word_len = word.chars().count();
837                if current_length > 0 && current_length + 1 + word_len > options.line_length {
838                    // Start a new line
839                    lines.push(current_line.trim().to_string());
840                    current_line = word.to_string();
841                    current_length = word_len;
842                } else {
843                    // Add word to current line
844                    if current_length > 0 {
845                        current_line.push(' ');
846                        current_length += 1;
847                    }
848                    current_line.push_str(word);
849                    current_length += word_len;
850                }
851            }
852        } else {
853            // For non-text elements (code, links, references), treat as atomic units
854            // These should never be broken across lines
855            if current_length > 0 && current_length + 1 + element_len > options.line_length {
856                // Start a new line
857                lines.push(current_line.trim().to_string());
858                current_line = element_str;
859                current_length = element_len;
860            } else {
861                // Add element to current line
862                if current_length > 0 {
863                    current_line.push(' ');
864                    current_length += 1;
865                }
866                current_line.push_str(&element_str);
867                current_length += element_len;
868            }
869        }
870    }
871
872    // Don't forget the last line
873    if !current_line.is_empty() {
874        lines.push(current_line.trim_end().to_string());
875    }
876
877    lines
878}
879
880/// Reflow markdown content preserving structure
881pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
882    let lines: Vec<&str> = content.lines().collect();
883    let mut result = Vec::new();
884    let mut i = 0;
885
886    while i < lines.len() {
887        let line = lines[i];
888        let trimmed = line.trim();
889
890        // Preserve empty lines
891        if trimmed.is_empty() {
892            result.push(String::new());
893            i += 1;
894            continue;
895        }
896
897        // Preserve headings as-is
898        if trimmed.starts_with('#') {
899            result.push(line.to_string());
900            i += 1;
901            continue;
902        }
903
904        // Preserve fenced code blocks
905        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
906            result.push(line.to_string());
907            i += 1;
908            // Copy lines until closing fence
909            while i < lines.len() {
910                result.push(lines[i].to_string());
911                if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
912                    i += 1;
913                    break;
914                }
915                i += 1;
916            }
917            continue;
918        }
919
920        // Preserve indented code blocks (4+ spaces or 1+ tab)
921        if line.starts_with("    ") || line.starts_with("\t") {
922            // Collect all consecutive indented lines
923            result.push(line.to_string());
924            i += 1;
925            while i < lines.len() {
926                let next_line = lines[i];
927                // Continue if next line is also indented or empty (empty lines in code blocks are ok)
928                if next_line.starts_with("    ") || next_line.starts_with("\t") || next_line.trim().is_empty() {
929                    result.push(next_line.to_string());
930                    i += 1;
931                } else {
932                    break;
933                }
934            }
935            continue;
936        }
937
938        // Preserve block quotes (but reflow their content)
939        if trimmed.starts_with('>') {
940            let quote_prefix = line[0..line.find('>').unwrap() + 1].to_string();
941            let quote_content = &line[quote_prefix.len()..].trim_start();
942
943            let reflowed = reflow_line(quote_content, options);
944            for reflowed_line in reflowed.iter() {
945                result.push(format!("{quote_prefix} {reflowed_line}"));
946            }
947            i += 1;
948            continue;
949        }
950
951        // Preserve horizontal rules first (before checking for lists)
952        if is_horizontal_rule(trimmed) {
953            result.push(line.to_string());
954            i += 1;
955            continue;
956        }
957
958        // Preserve lists (but not horizontal rules)
959        if (trimmed.starts_with('-') && !is_horizontal_rule(trimmed))
960            || (trimmed.starts_with('*') && !is_horizontal_rule(trimmed))
961            || trimmed.starts_with('+')
962            || is_numbered_list_item(trimmed)
963        {
964            // Find the list marker and preserve indentation
965            let indent = line.len() - line.trim_start().len();
966            let indent_str = " ".repeat(indent);
967
968            // For numbered lists, find the period and the space after it
969            // For bullet lists, find the marker and the space after it
970            let mut marker_end = indent;
971            let mut content_start = indent;
972
973            if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
974                // Numbered list: find the period
975                if let Some(period_pos) = line[indent..].find('.') {
976                    marker_end = indent + period_pos + 1; // Include the period
977                    content_start = marker_end;
978                    // Skip any spaces after the period to find content start
979                    while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
980                        content_start += 1;
981                    }
982                }
983            } else {
984                // Bullet list: marker is single character
985                marker_end = indent + 1; // Just the marker character
986                content_start = marker_end;
987                // Skip any spaces after the marker
988                while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
989                    content_start += 1;
990                }
991            }
992
993            let marker = &line[indent..marker_end];
994
995            // Collect all content for this list item (including continuation lines)
996            // Preserve hard breaks (2 trailing spaces) while trimming excessive whitespace
997            let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
998            i += 1;
999
1000            // Collect continuation lines (indented lines that are part of this list item)
1001            while i < lines.len() {
1002                let next_line = lines[i];
1003                let next_trimmed = next_line.trim();
1004
1005                // Stop if we hit an empty line or another list item or special block
1006                if next_trimmed.is_empty()
1007                    || next_trimmed.starts_with('#')
1008                    || next_trimmed.starts_with("```")
1009                    || next_trimmed.starts_with("~~~")
1010                    || next_trimmed.starts_with('>')
1011                    || next_trimmed.starts_with('|')
1012                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1013                    || is_horizontal_rule(next_trimmed)
1014                    || (next_trimmed.starts_with('-')
1015                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1016                    || (next_trimmed.starts_with('*')
1017                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1018                    || (next_trimmed.starts_with('+')
1019                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1020                    || is_numbered_list_item(next_trimmed)
1021                    || is_definition_list_item(next_trimmed)
1022                {
1023                    break;
1024                }
1025
1026                // Check if this line is indented (continuation of list item)
1027                let next_indent = next_line.len() - next_line.trim_start().len();
1028                if next_indent >= content_start {
1029                    // This is a continuation line - add its content
1030                    // Preserve hard breaks while trimming excessive whitespace
1031                    let trimmed_start = next_line.trim_start();
1032                    list_content.push(trim_preserving_hard_break(trimmed_start));
1033                    i += 1;
1034                } else {
1035                    // Not indented enough, not part of this list item
1036                    break;
1037                }
1038            }
1039
1040            // Join content, but respect hard breaks (lines ending with 2 spaces or backslash)
1041            // Hard breaks should prevent joining with the next line
1042            let combined_content = if options.preserve_breaks {
1043                list_content[0].clone()
1044            } else {
1045                // Check if any lines have hard breaks - if so, preserve the structure
1046                let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
1047                if has_hard_breaks {
1048                    // Don't join lines with hard breaks - keep them separate with newlines
1049                    list_content.join("\n")
1050                } else {
1051                    // No hard breaks, safe to join with spaces
1052                    list_content.join(" ")
1053                }
1054            };
1055
1056            // Calculate the proper indentation for continuation lines
1057            let trimmed_marker = marker;
1058            let continuation_spaces = content_start;
1059
1060            // Adjust line length to account for list marker and space
1061            let prefix_length = indent + trimmed_marker.len() + 1;
1062
1063            // Create adjusted options with reduced line length
1064            let adjusted_options = ReflowOptions {
1065                line_length: options.line_length.saturating_sub(prefix_length),
1066                ..options.clone()
1067            };
1068
1069            let reflowed = reflow_line(&combined_content, &adjusted_options);
1070            for (j, reflowed_line) in reflowed.iter().enumerate() {
1071                if j == 0 {
1072                    result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
1073                } else {
1074                    // Continuation lines aligned with text after marker
1075                    let continuation_indent = " ".repeat(continuation_spaces);
1076                    result.push(format!("{continuation_indent}{reflowed_line}"));
1077                }
1078            }
1079            continue;
1080        }
1081
1082        // Preserve tables
1083        if trimmed.contains('|') {
1084            result.push(line.to_string());
1085            i += 1;
1086            continue;
1087        }
1088
1089        // Preserve reference definitions
1090        if trimmed.starts_with('[') && line.contains("]:") {
1091            result.push(line.to_string());
1092            i += 1;
1093            continue;
1094        }
1095
1096        // Preserve definition list items (extended markdown)
1097        if is_definition_list_item(trimmed) {
1098            result.push(line.to_string());
1099            i += 1;
1100            continue;
1101        }
1102
1103        // Check if this is a single line that doesn't need processing
1104        let mut is_single_line_paragraph = true;
1105        if i + 1 < lines.len() {
1106            let next_line = lines[i + 1];
1107            let next_trimmed = next_line.trim();
1108            // Check if next line starts a new block
1109            if !next_trimmed.is_empty()
1110                && !next_trimmed.starts_with('#')
1111                && !next_trimmed.starts_with("```")
1112                && !next_trimmed.starts_with("~~~")
1113                && !next_trimmed.starts_with('>')
1114                && !next_trimmed.starts_with('|')
1115                && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
1116                && !is_horizontal_rule(next_trimmed)
1117                && !(next_trimmed.starts_with('-')
1118                    && !is_horizontal_rule(next_trimmed)
1119                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1120                && !(next_trimmed.starts_with('*')
1121                    && !is_horizontal_rule(next_trimmed)
1122                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1123                && !(next_trimmed.starts_with('+')
1124                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1125                && !is_numbered_list_item(next_trimmed)
1126            {
1127                is_single_line_paragraph = false;
1128            }
1129        }
1130
1131        // If it's a single line that fits, just add it as-is
1132        if is_single_line_paragraph && line.chars().count() <= options.line_length {
1133            result.push(line.to_string());
1134            i += 1;
1135            continue;
1136        }
1137
1138        // For regular paragraphs, collect consecutive lines
1139        let mut paragraph_parts = Vec::new();
1140        let mut current_part = vec![line];
1141        i += 1;
1142
1143        // If preserve_breaks is true, treat each line separately
1144        if options.preserve_breaks {
1145            // Don't collect consecutive lines - just reflow this single line
1146            let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
1147                Some("\\")
1148            } else if line.ends_with("  ") {
1149                Some("  ")
1150            } else {
1151                None
1152            };
1153            let reflowed = reflow_line(line, options);
1154
1155            // Preserve hard breaks (two trailing spaces or backslash)
1156            if let Some(break_marker) = hard_break_type {
1157                if !reflowed.is_empty() {
1158                    let mut reflowed_with_break = reflowed;
1159                    let last_idx = reflowed_with_break.len() - 1;
1160                    if !has_hard_break(&reflowed_with_break[last_idx]) {
1161                        reflowed_with_break[last_idx].push_str(break_marker);
1162                    }
1163                    result.extend(reflowed_with_break);
1164                }
1165            } else {
1166                result.extend(reflowed);
1167            }
1168        } else {
1169            // Original behavior: collect consecutive lines into a paragraph
1170            while i < lines.len() {
1171                let prev_line = if !current_part.is_empty() {
1172                    current_part.last().unwrap()
1173                } else {
1174                    ""
1175                };
1176                let next_line = lines[i];
1177                let next_trimmed = next_line.trim();
1178
1179                // Stop at empty lines or special blocks
1180                if next_trimmed.is_empty()
1181                    || next_trimmed.starts_with('#')
1182                    || next_trimmed.starts_with("```")
1183                    || next_trimmed.starts_with("~~~")
1184                    || next_trimmed.starts_with('>')
1185                    || next_trimmed.starts_with('|')
1186                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1187                    || is_horizontal_rule(next_trimmed)
1188                    || (next_trimmed.starts_with('-')
1189                        && !is_horizontal_rule(next_trimmed)
1190                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1191                    || (next_trimmed.starts_with('*')
1192                        && !is_horizontal_rule(next_trimmed)
1193                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1194                    || (next_trimmed.starts_with('+')
1195                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1196                    || is_numbered_list_item(next_trimmed)
1197                    || is_definition_list_item(next_trimmed)
1198                {
1199                    break;
1200                }
1201
1202                // Check if previous line ends with hard break (two spaces or backslash)
1203                if has_hard_break(prev_line) {
1204                    // Start a new part after hard break
1205                    paragraph_parts.push(current_part.join(" "));
1206                    current_part = vec![next_line];
1207                } else {
1208                    current_part.push(next_line);
1209                }
1210                i += 1;
1211            }
1212
1213            // Add the last part
1214            if !current_part.is_empty() {
1215                if current_part.len() == 1 {
1216                    // Single line, don't add trailing space
1217                    paragraph_parts.push(current_part[0].to_string());
1218                } else {
1219                    paragraph_parts.push(current_part.join(" "));
1220                }
1221            }
1222
1223            // Reflow each part separately, preserving hard breaks
1224            for (j, part) in paragraph_parts.iter().enumerate() {
1225                let reflowed = reflow_line(part, options);
1226                result.extend(reflowed);
1227
1228                // Preserve hard break by ensuring last line of part ends with hard break marker
1229                // Use two spaces as the default hard break format for reflows
1230                if j < paragraph_parts.len() - 1 && !result.is_empty() {
1231                    let last_idx = result.len() - 1;
1232                    if !has_hard_break(&result[last_idx]) {
1233                        result[last_idx].push_str("  ");
1234                    }
1235                }
1236            }
1237        }
1238    }
1239
1240    // Preserve trailing newline if the original content had one
1241    let result_text = result.join("\n");
1242    if content.ends_with('\n') && !result_text.ends_with('\n') {
1243        format!("{result_text}\n")
1244    } else {
1245        result_text
1246    }
1247}
1248
1249/// Information about a reflowed paragraph
1250#[derive(Debug, Clone)]
1251pub struct ParagraphReflow {
1252    /// Starting byte offset of the paragraph in the original content
1253    pub start_byte: usize,
1254    /// Ending byte offset of the paragraph in the original content
1255    pub end_byte: usize,
1256    /// The reflowed text for this paragraph
1257    pub reflowed_text: String,
1258}
1259
1260/// Reflow a single paragraph at the specified line number
1261///
1262/// This function finds the paragraph containing the given line number,
1263/// reflows it according to the specified line length, and returns
1264/// information about the paragraph location and its reflowed text.
1265///
1266/// # Arguments
1267///
1268/// * `content` - The full document content
1269/// * `line_number` - The 1-based line number within the paragraph to reflow
1270/// * `line_length` - The target line length for reflowing
1271///
1272/// # Returns
1273///
1274/// Returns `Some(ParagraphReflow)` if a paragraph was found and reflowed,
1275/// or `None` if the line number is out of bounds or the content at that
1276/// line shouldn't be reflowed (e.g., code blocks, headings, etc.)
1277pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
1278    if line_number == 0 {
1279        return None;
1280    }
1281
1282    let lines: Vec<&str> = content.lines().collect();
1283
1284    // Check if line number is valid (1-based)
1285    if line_number > lines.len() {
1286        return None;
1287    }
1288
1289    let target_idx = line_number - 1; // Convert to 0-based
1290    let target_line = lines[target_idx];
1291    let trimmed = target_line.trim();
1292
1293    // Don't reflow special blocks
1294    if trimmed.is_empty()
1295        || trimmed.starts_with('#')
1296        || trimmed.starts_with("```")
1297        || trimmed.starts_with("~~~")
1298        || target_line.starts_with("    ")
1299        || target_line.starts_with('\t')
1300        || trimmed.starts_with('>')
1301        || trimmed.contains('|') // Tables
1302        || (trimmed.starts_with('[') && target_line.contains("]:")) // Reference definitions
1303        || is_horizontal_rule(trimmed)
1304        || ((trimmed.starts_with('-') || trimmed.starts_with('*') || trimmed.starts_with('+'))
1305            && !is_horizontal_rule(trimmed)
1306            && (trimmed.len() == 1 || trimmed.chars().nth(1) == Some(' ')))
1307        || is_numbered_list_item(trimmed)
1308        || is_definition_list_item(trimmed)
1309    {
1310        return None;
1311    }
1312
1313    // Find paragraph start - scan backward until blank line or special block
1314    let mut para_start = target_idx;
1315    while para_start > 0 {
1316        let prev_idx = para_start - 1;
1317        let prev_line = lines[prev_idx];
1318        let prev_trimmed = prev_line.trim();
1319
1320        // Stop at blank line or special blocks
1321        if prev_trimmed.is_empty()
1322            || prev_trimmed.starts_with('#')
1323            || prev_trimmed.starts_with("```")
1324            || prev_trimmed.starts_with("~~~")
1325            || prev_line.starts_with("    ")
1326            || prev_line.starts_with('\t')
1327            || prev_trimmed.starts_with('>')
1328            || prev_trimmed.contains('|')
1329            || (prev_trimmed.starts_with('[') && prev_line.contains("]:"))
1330            || is_horizontal_rule(prev_trimmed)
1331            || ((prev_trimmed.starts_with('-') || prev_trimmed.starts_with('*') || prev_trimmed.starts_with('+'))
1332                && !is_horizontal_rule(prev_trimmed)
1333                && (prev_trimmed.len() == 1 || prev_trimmed.chars().nth(1) == Some(' ')))
1334            || is_numbered_list_item(prev_trimmed)
1335            || is_definition_list_item(prev_trimmed)
1336        {
1337            break;
1338        }
1339
1340        para_start = prev_idx;
1341    }
1342
1343    // Find paragraph end - scan forward until blank line or special block
1344    let mut para_end = target_idx;
1345    while para_end + 1 < lines.len() {
1346        let next_idx = para_end + 1;
1347        let next_line = lines[next_idx];
1348        let next_trimmed = next_line.trim();
1349
1350        // Stop at blank line or special blocks
1351        if next_trimmed.is_empty()
1352            || next_trimmed.starts_with('#')
1353            || next_trimmed.starts_with("```")
1354            || next_trimmed.starts_with("~~~")
1355            || next_line.starts_with("    ")
1356            || next_line.starts_with('\t')
1357            || next_trimmed.starts_with('>')
1358            || next_trimmed.contains('|')
1359            || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1360            || is_horizontal_rule(next_trimmed)
1361            || ((next_trimmed.starts_with('-') || next_trimmed.starts_with('*') || next_trimmed.starts_with('+'))
1362                && !is_horizontal_rule(next_trimmed)
1363                && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1364            || is_numbered_list_item(next_trimmed)
1365            || is_definition_list_item(next_trimmed)
1366        {
1367            break;
1368        }
1369
1370        para_end = next_idx;
1371    }
1372
1373    // Extract paragraph lines
1374    let paragraph_lines = &lines[para_start..=para_end];
1375
1376    // Calculate byte offsets
1377    let mut start_byte = 0;
1378    for line in lines.iter().take(para_start) {
1379        start_byte += line.len() + 1; // +1 for newline
1380    }
1381
1382    let mut end_byte = start_byte;
1383    for line in paragraph_lines.iter() {
1384        end_byte += line.len() + 1; // +1 for newline
1385    }
1386
1387    // Track whether the byte range includes a trailing newline
1388    // (it doesn't if this is the last line and the file doesn't end with newline)
1389    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
1390
1391    // Adjust end_byte if the last line doesn't have a newline
1392    if !includes_trailing_newline {
1393        end_byte -= 1;
1394    }
1395
1396    // Join paragraph lines and reflow
1397    let paragraph_text = paragraph_lines.join("\n");
1398
1399    // Create reflow options
1400    let options = ReflowOptions {
1401        line_length,
1402        break_on_sentences: true,
1403        preserve_breaks: false,
1404        sentence_per_line: false,
1405    };
1406
1407    // Reflow the paragraph using reflow_markdown to handle it properly
1408    let reflowed = reflow_markdown(&paragraph_text, &options);
1409
1410    // Ensure reflowed text matches whether the byte range includes a trailing newline
1411    // This is critical: if the range includes a newline, the replacement must too,
1412    // otherwise the next line will get appended to the reflowed paragraph
1413    let reflowed_text = if includes_trailing_newline {
1414        // Range includes newline - ensure reflowed text has one
1415        if reflowed.ends_with('\n') {
1416            reflowed
1417        } else {
1418            format!("{reflowed}\n")
1419        }
1420    } else {
1421        // Range doesn't include newline - ensure reflowed text doesn't have one
1422        if reflowed.ends_with('\n') {
1423            reflowed.trim_end_matches('\n').to_string()
1424        } else {
1425            reflowed
1426        }
1427    };
1428
1429    Some(ParagraphReflow {
1430        start_byte,
1431        end_byte,
1432        reflowed_text,
1433    })
1434}
1435
1436#[cfg(test)]
1437mod tests {
1438    use super::*;
1439
1440    #[test]
1441    fn test_list_item_trailing_whitespace_removal() {
1442        // Test for issue #76 - hard breaks (2 trailing spaces) should be preserved
1443        // and prevent reflowing
1444        let input = "1. First line with trailing spaces   \n    Second line with trailing spaces  \n    Third line\n";
1445
1446        let options = ReflowOptions {
1447            line_length: 999999,
1448            break_on_sentences: true, // MD013 uses true by default
1449            preserve_breaks: false,
1450            sentence_per_line: false,
1451        };
1452
1453        let result = reflow_markdown(input, &options);
1454
1455        // Should not contain 3+ consecutive spaces (which would indicate
1456        // trailing whitespace became mid-line whitespace)
1457        assert!(
1458            !result.contains("   "),
1459            "Result should not contain 3+ consecutive spaces: {result:?}"
1460        );
1461
1462        // Hard breaks should be preserved (exactly 2 trailing spaces)
1463        assert!(result.contains("  \n"), "Hard breaks should be preserved: {result:?}");
1464
1465        // Should NOT be reflowed into a single line because hard breaks are present
1466        // The content should maintain its line structure
1467        assert!(
1468            result.lines().count() >= 2,
1469            "Should have multiple lines (not reflowed due to hard breaks), got: {}",
1470            result.lines().count()
1471        );
1472    }
1473
1474    #[test]
1475    fn test_reflow_simple_text() {
1476        let options = ReflowOptions {
1477            line_length: 20,
1478            ..Default::default()
1479        };
1480
1481        let input = "This is a very long line that needs to be wrapped";
1482        let result = reflow_line(input, &options);
1483
1484        assert_eq!(result.len(), 3);
1485        assert!(result[0].chars().count() <= 20);
1486        assert!(result[1].chars().count() <= 20);
1487        assert!(result[2].chars().count() <= 20);
1488    }
1489
1490    #[test]
1491    fn test_preserve_inline_code() {
1492        let options = ReflowOptions {
1493            line_length: 30,
1494            ..Default::default()
1495        };
1496
1497        let result = reflow_line("This line has `inline code` that should be preserved", &options);
1498        // Verify inline code is not broken
1499        let joined = result.join(" ");
1500        assert!(joined.contains("`inline code`"));
1501    }
1502
1503    #[test]
1504    fn test_preserve_links() {
1505        let options = ReflowOptions {
1506            line_length: 40,
1507            ..Default::default()
1508        };
1509
1510        let text = "Check out [this link](https://example.com/very/long/url) for more info";
1511        let result = reflow_line(text, &options);
1512
1513        // Verify link is preserved intact
1514        let joined = result.join(" ");
1515        assert!(joined.contains("[this link](https://example.com/very/long/url)"));
1516    }
1517
1518    #[test]
1519    fn test_reference_link_patterns_fixed() {
1520        let options = ReflowOptions {
1521            line_length: 30,
1522            break_on_sentences: true,
1523            preserve_breaks: false,
1524            sentence_per_line: false,
1525        };
1526
1527        // Test cases that verify reference links are preserved as atomic units
1528        let test_cases = vec![
1529            // Reference link: [text][ref] - should be preserved intact
1530            ("Check out [text][ref] for details", vec!["[text][ref]"]),
1531            // Empty reference: [text][] - should be preserved intact
1532            ("See [text][] for info", vec!["[text][]"]),
1533            // Shortcut reference: [homepage] - should be preserved intact
1534            ("Visit [homepage] today", vec!["[homepage]"]),
1535            // Multiple reference links in one line
1536            (
1537                "Links: [first][ref1] and [second][ref2] here",
1538                vec!["[first][ref1]", "[second][ref2]"],
1539            ),
1540            // Mixed inline and reference links
1541            (
1542                "See [inline](url) and [reference][ref] links",
1543                vec!["[inline](url)", "[reference][ref]"],
1544            ),
1545        ];
1546
1547        for (input, expected_patterns) in test_cases {
1548            println!("\nTesting: {input}");
1549            let result = reflow_line(input, &options);
1550            let joined = result.join(" ");
1551            println!("Result:  {joined}");
1552
1553            // Verify all expected patterns are preserved
1554            for expected_pattern in expected_patterns {
1555                assert!(
1556                    joined.contains(expected_pattern),
1557                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1558                );
1559            }
1560
1561            // Verify no broken patterns exist (spaces inside brackets)
1562            assert!(
1563                !joined.contains("[ ") || !joined.contains("] ["),
1564                "Detected broken reference link pattern with spaces inside brackets in '{joined}'"
1565            );
1566        }
1567    }
1568
1569    #[test]
1570    fn test_sentence_detection_basic() {
1571        // Test basic sentence detection
1572        assert!(is_sentence_boundary("Hello. World", 5));
1573        assert!(is_sentence_boundary("Test! Another", 4));
1574        assert!(is_sentence_boundary("Question? Answer", 8));
1575
1576        // Test non-boundaries
1577        assert!(!is_sentence_boundary("Hello world", 5));
1578        assert!(!is_sentence_boundary("Test.com", 4));
1579        assert!(!is_sentence_boundary("3.14 pi", 1));
1580    }
1581
1582    #[test]
1583    fn test_sentence_detection_abbreviations() {
1584        // Common abbreviations should not be treated as sentence boundaries
1585        assert!(!is_sentence_boundary("Mr. Smith", 2));
1586        assert!(!is_sentence_boundary("Dr. Jones", 2));
1587        assert!(!is_sentence_boundary("e.g. example", 3));
1588        assert!(!is_sentence_boundary("i.e. that is", 3));
1589        assert!(!is_sentence_boundary("etc. items", 3));
1590
1591        // But sentence after abbreviation should be a boundary
1592        assert!(is_sentence_boundary("Mr. Smith arrived. Next sentence.", 17));
1593    }
1594
1595    #[test]
1596    fn test_split_into_sentences() {
1597        let text = "First sentence. Second sentence. Third one!";
1598        let sentences = split_into_sentences(text);
1599        assert_eq!(sentences.len(), 3);
1600        assert_eq!(sentences[0], "First sentence.");
1601        assert_eq!(sentences[1], "Second sentence.");
1602        assert_eq!(sentences[2], "Third one!");
1603
1604        // Test with abbreviations
1605        let text2 = "Mr. Smith met Dr. Jones.";
1606        let sentences2 = split_into_sentences(text2);
1607        assert_eq!(sentences2.len(), 1);
1608        assert_eq!(sentences2[0], "Mr. Smith met Dr. Jones.");
1609
1610        // Test single sentence
1611        let text3 = "This is a single sentence.";
1612        let sentences3 = split_into_sentences(text3);
1613        assert_eq!(sentences3.len(), 1);
1614        assert_eq!(sentences3[0], "This is a single sentence.");
1615    }
1616
1617    #[test]
1618    fn test_sentence_per_line_reflow() {
1619        let options = ReflowOptions {
1620            line_length: 80,
1621            break_on_sentences: true,
1622            preserve_breaks: false,
1623            sentence_per_line: true,
1624        };
1625
1626        // Test basic sentence splitting
1627        let input = "First sentence. Second sentence. Third sentence.";
1628        let result = reflow_line(input, &options);
1629        assert_eq!(result.len(), 3);
1630        assert_eq!(result[0], "First sentence.");
1631        assert_eq!(result[1], "Second sentence.");
1632        assert_eq!(result[2], "Third sentence.");
1633
1634        // Test with markdown elements
1635        let input2 = "This has **bold**. And [a link](url).";
1636        let result2 = reflow_line(input2, &options);
1637        assert_eq!(result2.len(), 2);
1638        assert_eq!(result2[0], "This has **bold**.");
1639        assert_eq!(result2[1], "And [a link](url).");
1640    }
1641
1642    #[test]
1643    fn test_sentence_per_line_with_backticks() {
1644        let options = ReflowOptions {
1645            line_length: 80,
1646            break_on_sentences: true,
1647            preserve_breaks: false,
1648            sentence_per_line: true,
1649        };
1650
1651        let input = "This sentence has `code` in it. And this has `more code` too.";
1652        let result = reflow_line(input, &options);
1653        assert_eq!(result.len(), 2);
1654        assert_eq!(result[0], "This sentence has `code` in it.");
1655        assert_eq!(result[1], "And this has `more code` too.");
1656    }
1657
1658    #[test]
1659    fn test_sentence_per_line_with_backticks_in_parens() {
1660        let options = ReflowOptions {
1661            line_length: 80,
1662            break_on_sentences: true,
1663            preserve_breaks: false,
1664            sentence_per_line: true,
1665        };
1666
1667        let input = "Configure in (`.rumdl.toml` or `pyproject.toml`). Next sentence.";
1668        let result = reflow_line(input, &options);
1669        assert_eq!(result.len(), 2);
1670        assert_eq!(result[0], "Configure in (`.rumdl.toml` or `pyproject.toml`).");
1671        assert_eq!(result[1], "Next sentence.");
1672    }
1673
1674    #[test]
1675    fn test_sentence_per_line_with_questions_exclamations() {
1676        let options = ReflowOptions {
1677            line_length: 80,
1678            break_on_sentences: true,
1679            preserve_breaks: false,
1680            sentence_per_line: true,
1681        };
1682
1683        let input = "Is this a question? Yes it is! And a statement.";
1684        let result = reflow_line(input, &options);
1685        assert_eq!(result.len(), 3);
1686        assert_eq!(result[0], "Is this a question?");
1687        assert_eq!(result[1], "Yes it is!");
1688        assert_eq!(result[2], "And a statement.");
1689    }
1690
1691    #[test]
1692    fn test_split_sentences_issue_124() {
1693        // Test the actual text from issue #124
1694        let text = "If you are sure that all data structures exposed in a `PyModule` are thread-safe, then pass `gil_used = false` as a parameter to the `pymodule` procedural macro declaring the module or call `PyModule::gil_used` on a `PyModule` instance.  For example:";
1695
1696        let sentences = split_into_sentences(text);
1697
1698        // This should detect 2 sentences:
1699        // 1. "If you are sure ... on a `PyModule` instance."
1700        // 2. "For example:"
1701        assert_eq!(sentences.len(), 2, "Should detect 2 sentences in the text");
1702    }
1703
1704    #[test]
1705    fn test_reference_link_edge_cases() {
1706        let options = ReflowOptions {
1707            line_length: 40,
1708            break_on_sentences: true,
1709            preserve_breaks: false,
1710            sentence_per_line: false,
1711        };
1712
1713        // Test cases for edge cases and potential conflicts
1714        let test_cases = vec![
1715            // Escaped brackets should be treated as regular text
1716            ("Text with \\[escaped\\] brackets", vec!["\\[escaped\\]"]),
1717            // Nested brackets in reference links
1718            (
1719                "Link [text with [nested] content][ref]",
1720                vec!["[text with [nested] content][ref]"],
1721            ),
1722            // Reference link followed by inline link
1723            (
1724                "First [ref][link] then [inline](url)",
1725                vec!["[ref][link]", "[inline](url)"],
1726            ),
1727            // Shortcut reference that might conflict with other patterns
1728            ("Array [0] and reference [link] here", vec!["[0]", "[link]"]),
1729            // Empty reference with complex text
1730            (
1731                "Complex [text with *emphasis*][] reference",
1732                vec!["[text with *emphasis*][]"],
1733            ),
1734        ];
1735
1736        for (input, expected_patterns) in test_cases {
1737            println!("\nTesting edge case: {input}");
1738            let result = reflow_line(input, &options);
1739            let joined = result.join(" ");
1740            println!("Result: {joined}");
1741
1742            // Verify all expected patterns are preserved
1743            for expected_pattern in expected_patterns {
1744                assert!(
1745                    joined.contains(expected_pattern),
1746                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1747                );
1748            }
1749        }
1750    }
1751
1752    #[test]
1753    fn test_reflow_with_emphasis() {
1754        let options = ReflowOptions {
1755            line_length: 25,
1756            ..Default::default()
1757        };
1758
1759        let result = reflow_line("This is *emphasized* and **strong** text that needs wrapping", &options);
1760
1761        // Verify emphasis markers are preserved
1762        let joined = result.join(" ");
1763        assert!(joined.contains("*emphasized*"));
1764        assert!(joined.contains("**strong**"));
1765    }
1766
1767    #[test]
1768    fn test_image_patterns_preserved() {
1769        let options = ReflowOptions {
1770            line_length: 30,
1771            ..Default::default()
1772        };
1773
1774        // Test cases for image patterns
1775        let test_cases = vec![
1776            // Inline image
1777            (
1778                "Check out ![alt text](image.png) for details",
1779                vec!["![alt text](image.png)"],
1780            ),
1781            // Reference image
1782            ("See ![image][ref] for info", vec!["![image][ref]"]),
1783            // Empty reference image
1784            ("Visit ![homepage][] today", vec!["![homepage][]"]),
1785            // Multiple images
1786            (
1787                "Images: ![first](a.png) and ![second][ref2]",
1788                vec!["![first](a.png)", "![second][ref2]"],
1789            ),
1790        ];
1791
1792        for (input, expected_patterns) in test_cases {
1793            println!("\nTesting: {input}");
1794            let result = reflow_line(input, &options);
1795            let joined = result.join(" ");
1796            println!("Result:  {joined}");
1797
1798            for expected_pattern in expected_patterns {
1799                assert!(
1800                    joined.contains(expected_pattern),
1801                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1802                );
1803            }
1804        }
1805    }
1806
1807    #[test]
1808    fn test_extended_markdown_patterns() {
1809        let options = ReflowOptions {
1810            line_length: 40,
1811            ..Default::default()
1812        };
1813
1814        let test_cases = vec![
1815            // Strikethrough
1816            ("Text with ~~strikethrough~~ preserved", vec!["~~strikethrough~~"]),
1817            // Wiki links
1818            (
1819                "Check [[wiki link]] and [[page|display]]",
1820                vec!["[[wiki link]]", "[[page|display]]"],
1821            ),
1822            // Math
1823            (
1824                "Inline $x^2 + y^2$ and display $$\\int f(x) dx$$",
1825                vec!["$x^2 + y^2$", "$$\\int f(x) dx$$"],
1826            ),
1827            // Emoji
1828            ("Use :smile: and :heart: emojis", vec![":smile:", ":heart:"]),
1829            // HTML tags
1830            (
1831                "Text with <span>tag</span> and <br/>",
1832                vec!["<span>", "</span>", "<br/>"],
1833            ),
1834            // HTML entities
1835            ("Non-breaking&nbsp;space and em&mdash;dash", vec!["&nbsp;", "&mdash;"]),
1836        ];
1837
1838        for (input, expected_patterns) in test_cases {
1839            let result = reflow_line(input, &options);
1840            let joined = result.join(" ");
1841
1842            for pattern in expected_patterns {
1843                assert!(
1844                    joined.contains(pattern),
1845                    "Expected '{pattern}' to be preserved in '{input}', but got '{joined}'"
1846                );
1847            }
1848        }
1849    }
1850
1851    #[test]
1852    fn test_complex_mixed_patterns() {
1853        let options = ReflowOptions {
1854            line_length: 50,
1855            ..Default::default()
1856        };
1857
1858        // Test that multiple pattern types work together
1859        let input = "Line with **bold**, `code`, [link](url), ![image](img), ~~strike~~, $math$, :emoji:, and <tag> all together";
1860        let result = reflow_line(input, &options);
1861        let joined = result.join(" ");
1862
1863        // All patterns should be preserved
1864        assert!(joined.contains("**bold**"));
1865        assert!(joined.contains("`code`"));
1866        assert!(joined.contains("[link](url)"));
1867        assert!(joined.contains("![image](img)"));
1868        assert!(joined.contains("~~strike~~"));
1869        assert!(joined.contains("$math$"));
1870        assert!(joined.contains(":emoji:"));
1871        assert!(joined.contains("<tag>"));
1872    }
1873
1874    #[test]
1875    fn test_footnote_patterns_preserved() {
1876        let options = ReflowOptions {
1877            line_length: 40,
1878            ..Default::default()
1879        };
1880
1881        let test_cases = vec![
1882            // Single footnote
1883            ("This has a footnote[^1] reference", vec!["[^1]"]),
1884            // Multiple footnotes
1885            ("Text with [^first] and [^second] notes", vec!["[^first]", "[^second]"]),
1886            // Long footnote name
1887            ("Reference to [^long-footnote-name] here", vec!["[^long-footnote-name]"]),
1888        ];
1889
1890        for (input, expected_patterns) in test_cases {
1891            let result = reflow_line(input, &options);
1892            let joined = result.join(" ");
1893
1894            for expected_pattern in expected_patterns {
1895                assert!(
1896                    joined.contains(expected_pattern),
1897                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1898                );
1899            }
1900        }
1901    }
1902
1903    #[test]
1904    fn test_reflow_markdown_numbered_lists() {
1905        // Test for issue #83: numbered lists with proper formatting
1906        let options = ReflowOptions {
1907            line_length: 50,
1908            ..Default::default()
1909        };
1910
1911        let content = r#"1. List `manifest` to find the manifest with the largest ID. Say it's `00000000000000000002.manifest` in this example.
19122. Short item
19133. Another long item that definitely exceeds the fifty character limit and needs wrapping"#;
1914
1915        let result = reflow_markdown(content, &options);
1916
1917        // Define exact expected output
1918        let expected = r#"1. List `manifest` to find the manifest with the
1919   largest ID. Say it's
1920   `00000000000000000002.manifest` in this
1921   example.
19222. Short item
19233. Another long item that definitely exceeds the
1924   fifty character limit and needs wrapping"#;
1925
1926        assert_eq!(
1927            result, expected,
1928            "Numbered lists should be reflowed with proper markers and indentation.\nExpected:\n{expected}\nGot:\n{result}"
1929        );
1930    }
1931
1932    #[test]
1933    fn test_reflow_markdown_bullet_lists() {
1934        let options = ReflowOptions {
1935            line_length: 40,
1936            ..Default::default()
1937        };
1938
1939        let content = r#"- First bullet point with a very long line that needs wrapping
1940* Second bullet using asterisk
1941+ Third bullet using plus sign
1942- Short one"#;
1943
1944        let result = reflow_markdown(content, &options);
1945
1946        // Define exact expected output - each bullet type preserved with proper indentation
1947        let expected = r#"- First bullet point with a very long
1948  line that needs wrapping
1949* Second bullet using asterisk
1950+ Third bullet using plus sign
1951- Short one"#;
1952
1953        assert_eq!(
1954            result, expected,
1955            "Bullet lists should preserve markers and indent continuations with 2 spaces.\nExpected:\n{expected}\nGot:\n{result}"
1956        );
1957    }
1958
1959    #[test]
1960    fn test_ie_abbreviation_split_debug() {
1961        let input = "This results in extracting directly from the input object, i.e. `obj.extract()`, rather than trying to access an item or attribute.";
1962
1963        let options = ReflowOptions {
1964            line_length: 80,
1965            break_on_sentences: true,
1966            preserve_breaks: false,
1967            sentence_per_line: true,
1968        };
1969
1970        let result = reflow_line(input, &options);
1971
1972        // Should be 1 sentence, not split after "i.e."
1973        assert_eq!(result.len(), 1, "Should not split after i.e. abbreviation");
1974    }
1975
1976    #[test]
1977    fn test_ie_abbreviation_paragraph() {
1978        // Test the full paragraph from the file that's causing the issue
1979        let input = "The `pyo3(transparent)` attribute can be used on structs with exactly one field.\nThis results in extracting directly from the input object, i.e. `obj.extract()`, rather than trying to access an item or attribute.\nThis behaviour is enabled per default for newtype structs and tuple-variants with a single field.";
1980
1981        let options = ReflowOptions {
1982            line_length: 80,
1983            break_on_sentences: true,
1984            preserve_breaks: false,
1985            sentence_per_line: true,
1986        };
1987
1988        let result = reflow_markdown(input, &options);
1989
1990        // Should be 3 sentences, not 4 (shouldn't split after "i.e.")
1991        let line_count = result.lines().count();
1992        assert_eq!(line_count, 3, "Should have 3 sentences, not {line_count}");
1993
1994        // Verify the second line contains the full sentence
1995        let lines: Vec<&str> = result.lines().collect();
1996        assert!(lines.len() >= 2, "Should have at least 2 lines");
1997        assert!(lines[1].contains("i.e."), "Second line should contain 'i.e.'");
1998        assert!(
1999            lines[1].contains("`obj.extract()`"),
2000            "Second line should contain the code span"
2001        );
2002        assert!(
2003            lines[1].contains("attribute."),
2004            "Second line should end with 'attribute.'"
2005        );
2006    }
2007
2008    #[test]
2009    fn test_definition_list_preservation() {
2010        let options = ReflowOptions {
2011            line_length: 80,
2012            break_on_sentences: true,
2013            preserve_breaks: false,
2014            sentence_per_line: true,
2015        };
2016
2017        let content = "Term\n: Definition text here.";
2018        let result = reflow_markdown(content, &options);
2019
2020        // Should NOT join into "Term : Definition text here."
2021        assert_eq!(result, "Term\n: Definition text here.");
2022    }
2023
2024    #[test]
2025    fn test_definition_list_multiline() {
2026        let options = ReflowOptions {
2027            line_length: 80,
2028            break_on_sentences: true,
2029            preserve_breaks: false,
2030            sentence_per_line: true,
2031        };
2032
2033        let content = "Term\n: First sentence of definition. Second sentence.";
2034        let result = reflow_markdown(content, &options);
2035
2036        // Term line should stay separate
2037        assert!(result.starts_with("Term\n"));
2038        // Definition list item (line starting with ": ") should be preserved as-is
2039        // We don't split sentences within definition list items
2040        assert!(result.contains("\n: First sentence of definition. Second sentence."));
2041    }
2042
2043    #[test]
2044    fn test_definition_list_multiple() {
2045        let options = ReflowOptions {
2046            line_length: 80,
2047            sentence_per_line: true,
2048            ..Default::default()
2049        };
2050
2051        let content = "Term 1\n: Definition 1\n: Another definition for term 1\n\nTerm 2\n: Definition 2";
2052        let result = reflow_markdown(content, &options);
2053
2054        // All definition lines should preserve ": " at start
2055        assert!(result.lines().filter(|l| l.trim_start().starts_with(": ")).count() >= 3);
2056    }
2057
2058    #[test]
2059    fn test_definition_list_with_paragraphs() {
2060        let options = ReflowOptions {
2061            line_length: 0, // No line length constraint
2062            break_on_sentences: true,
2063            preserve_breaks: false,
2064            sentence_per_line: true,
2065        };
2066
2067        let content = "Regular paragraph. With multiple sentences.\n\nTerm\n: Definition.\n\nAnother paragraph.";
2068        let result = reflow_markdown(content, &options);
2069
2070        // Paragraph should be reflowed (sentences on separate lines)
2071        assert!(result.contains("Regular paragraph."));
2072        assert!(result.contains("\nWith multiple sentences."));
2073        // Definition list should be preserved
2074        assert!(result.contains("Term\n: Definition."));
2075        // Another paragraph should be preserved (single sentence, stays as is)
2076        assert!(result.contains("Another paragraph."));
2077    }
2078
2079    #[test]
2080    fn test_definition_list_edge_cases() {
2081        let options = ReflowOptions::default();
2082
2083        // Indented definition
2084        let content1 = "Term\n  : Indented definition";
2085        let result1 = reflow_markdown(content1, &options);
2086        assert!(result1.contains("\n  : Indented definition"));
2087
2088        // Multiple spaces after colon
2089        let content2 = "Term\n:   Definition";
2090        let result2 = reflow_markdown(content2, &options);
2091        assert!(result2.contains("\n:   Definition"));
2092
2093        // Tab after colon
2094        let content3 = "Term\n:\tDefinition";
2095        let result3 = reflow_markdown(content3, &options);
2096        assert!(result3.contains("\n:\tDefinition"));
2097    }
2098}
rumdl_lib/utils/text_reflow.rs

rumdl_lib/utils/
text_reflow.rs