rumdl_lib/utils/
text_reflow.rs

1//! Text reflow utilities for MD013
2//!
3//! This module implements text wrapping/reflow functionality that preserves
4//! Markdown elements like links, emphasis, code spans, etc.
5
6use crate::utils::regex_cache::{
7    DISPLAY_MATH_REGEX, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
8    INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, REF_IMAGE_REGEX, REF_LINK_REGEX,
9    SHORTCUT_REF_REGEX, STRIKETHROUGH_FANCY_REGEX, WIKI_LINK_REGEX,
10};
11/// Options for reflowing text
12#[derive(Clone)]
13pub struct ReflowOptions {
14    /// Target line length
15    pub line_length: usize,
16    /// Whether to break on sentence boundaries when possible
17    pub break_on_sentences: bool,
18    /// Whether to preserve existing line breaks in paragraphs
19    pub preserve_breaks: bool,
20    /// Whether to enforce one sentence per line
21    pub sentence_per_line: bool,
22}
23
24impl Default for ReflowOptions {
25    fn default() -> Self {
26        Self {
27            line_length: 80,
28            break_on_sentences: true,
29            preserve_breaks: false,
30            sentence_per_line: false,
31        }
32    }
33}
34
35/// Detect if a character position is a sentence boundary
36/// Based on the approach from github.com/JoshuaKGoldberg/sentences-per-line
37fn is_sentence_boundary(text: &str, pos: usize) -> bool {
38    let chars: Vec<char> = text.chars().collect();
39
40    if pos + 2 >= chars.len() {
41        return false;
42    }
43
44    // Check for sentence-ending punctuation
45    let c = chars[pos];
46    if c != '.' && c != '!' && c != '?' {
47        return false;
48    }
49
50    // Must be followed by a space
51    if chars[pos + 1] != ' ' {
52        return false;
53    }
54
55    // Next character after space must be uppercase (new sentence indicator)
56    if !chars[pos + 2].is_uppercase() {
57        return false;
58    }
59
60    // Look back to check for common abbreviations
61    if pos > 0 {
62        // Abbreviation list similar to sentences-per-line
63        let prev_word = &text[..pos];
64        let ignored_words = [
65            "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr", "Jr",
66        ];
67        for word in &ignored_words {
68            if prev_word.to_lowercase().ends_with(&word.to_lowercase()) {
69                return false;
70            }
71        }
72
73        // Check for decimal numbers (e.g., "3.14")
74        if pos > 0 && chars[pos - 1].is_numeric() && pos + 2 < chars.len() && chars[pos + 2].is_numeric() {
75            return false;
76        }
77    }
78
79    true
80}
81
82/// Split text into sentences
83pub fn split_into_sentences(text: &str) -> Vec<String> {
84    let mut sentences = Vec::new();
85    let mut current_sentence = String::new();
86    let mut chars = text.chars().peekable();
87    let mut pos = 0;
88
89    while let Some(c) = chars.next() {
90        current_sentence.push(c);
91
92        if is_sentence_boundary(text, pos) {
93            // Include the space after sentence if it exists
94            if chars.peek() == Some(&' ') {
95                chars.next();
96                pos += 1;
97            }
98
99            sentences.push(current_sentence.trim().to_string());
100            current_sentence.clear();
101        }
102
103        pos += 1;
104    }
105
106    // Add any remaining text as the last sentence
107    if !current_sentence.trim().is_empty() {
108        sentences.push(current_sentence.trim().to_string());
109    }
110
111    sentences
112}
113
114/// Check if a line is a horizontal rule (---, ___, ***)
115fn is_horizontal_rule(line: &str) -> bool {
116    if line.len() < 3 {
117        return false;
118    }
119
120    // Check if line consists only of -, _, or * characters (at least 3)
121    let chars: Vec<char> = line.chars().collect();
122    if chars.is_empty() {
123        return false;
124    }
125
126    let first_char = chars[0];
127    if first_char != '-' && first_char != '_' && first_char != '*' {
128        return false;
129    }
130
131    // All characters should be the same (allowing spaces between)
132    for c in &chars {
133        if *c != first_char && *c != ' ' {
134            return false;
135        }
136    }
137
138    // Count non-space characters
139    let non_space_count = chars.iter().filter(|c| **c != ' ').count();
140    non_space_count >= 3
141}
142
143/// Check if a line is a numbered list item (e.g., "1. ", "10. ")
144fn is_numbered_list_item(line: &str) -> bool {
145    let mut chars = line.chars();
146
147    // Must start with a digit
148    if !chars.next().is_some_and(|c| c.is_numeric()) {
149        return false;
150    }
151
152    // Can have more digits
153    while let Some(c) = chars.next() {
154        if c == '.' {
155            // After period, must have a space or be end of line
156            return chars.next().is_none_or(|c| c == ' ');
157        }
158        if !c.is_numeric() {
159            return false;
160        }
161    }
162
163    false
164}
165
166/// Reflow a single line of markdown text to fit within the specified line length
167/// Trim trailing whitespace while preserving hard breaks (exactly 2 trailing spaces)
168/// Hard breaks in Markdown are indicated by 2 trailing spaces before a newline
169fn trim_preserving_hard_break(s: &str) -> String {
170    // Strip trailing \r from CRLF line endings first to handle Windows files
171    let s = s.strip_suffix('\r').unwrap_or(s);
172
173    // Check if there are at least 2 trailing spaces (potential hard break)
174    if s.ends_with("  ") {
175        // Find the position where non-space content ends
176        let content_end = s.trim_end().len();
177        if content_end == 0 {
178            // String is all whitespace
179            return String::new();
180        }
181        // Preserve exactly 2 trailing spaces for hard break
182        format!("{}  ", &s[..content_end])
183    } else {
184        // No hard break, just trim all trailing whitespace
185        s.trim_end().to_string()
186    }
187}
188
189pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
190    // For sentence-per-line mode, always process regardless of length
191    if options.sentence_per_line {
192        let elements = parse_markdown_elements(line);
193        return reflow_elements_sentence_per_line(&elements);
194    }
195
196    // Quick check: if line is already short enough, return as-is
197    if line.chars().count() <= options.line_length {
198        return vec![line.to_string()];
199    }
200
201    // Parse the markdown to identify elements
202    let elements = parse_markdown_elements(line);
203
204    // Reflow the elements into lines
205    reflow_elements(&elements, options)
206}
207
208/// Represents a piece of content in the markdown
209#[derive(Debug, Clone)]
210enum Element {
211    /// Plain text that can be wrapped
212    Text(String),
213    /// A complete markdown inline link [text](url)
214    Link { text: String, url: String },
215    /// A complete markdown reference link [text][ref]
216    ReferenceLink { text: String, reference: String },
217    /// A complete markdown empty reference link [text][]
218    EmptyReferenceLink { text: String },
219    /// A complete markdown shortcut reference link [ref]
220    ShortcutReference { reference: String },
221    /// A complete markdown inline image ![alt](url)
222    InlineImage { alt: String, url: String },
223    /// A complete markdown reference image ![alt][ref]
224    ReferenceImage { alt: String, reference: String },
225    /// A complete markdown empty reference image ![alt][]
226    EmptyReferenceImage { alt: String },
227    /// Footnote reference [^note]
228    FootnoteReference { note: String },
229    /// Strikethrough text ~~text~~
230    Strikethrough(String),
231    /// Wiki-style link [[wiki]] or [[wiki|text]]
232    WikiLink(String),
233    /// Inline math $math$
234    InlineMath(String),
235    /// Display math $$math$$
236    DisplayMath(String),
237    /// Emoji shortcode :emoji:
238    EmojiShortcode(String),
239    /// HTML tag <tag> or </tag> or <tag/>
240    HtmlTag(String),
241    /// HTML entity &nbsp; or &#123;
242    HtmlEntity(String),
243    /// Inline code `code`
244    Code(String),
245    /// Bold text **text**
246    Bold(String),
247    /// Italic text *text*
248    Italic(String),
249}
250
251impl std::fmt::Display for Element {
252    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
253        match self {
254            Element::Text(s) => write!(f, "{s}"),
255            Element::Link { text, url } => write!(f, "[{text}]({url})"),
256            Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
257            Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
258            Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
259            Element::InlineImage { alt, url } => write!(f, "![{alt}]({url})"),
260            Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
261            Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
262            Element::FootnoteReference { note } => write!(f, "[^{note}]"),
263            Element::Strikethrough(s) => write!(f, "~~{s}~~"),
264            Element::WikiLink(s) => write!(f, "[[{s}]]"),
265            Element::InlineMath(s) => write!(f, "${s}$"),
266            Element::DisplayMath(s) => write!(f, "$${s}$$"),
267            Element::EmojiShortcode(s) => write!(f, ":{s}:"),
268            Element::HtmlTag(s) => write!(f, "{s}"),
269            Element::HtmlEntity(s) => write!(f, "{s}"),
270            Element::Code(s) => write!(f, "`{s}`"),
271            Element::Bold(s) => write!(f, "**{s}**"),
272            Element::Italic(s) => write!(f, "*{s}*"),
273        }
274    }
275}
276
277impl Element {
278    fn len(&self) -> usize {
279        match self {
280            Element::Text(s) => s.chars().count(),
281            Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, // [text](url)
282            Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, // [text][ref]
283            Element::EmptyReferenceLink { text } => text.chars().count() + 4, // [text][]
284            Element::ShortcutReference { reference } => reference.chars().count() + 2, // [ref]
285            Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, // ![alt](url)
286            Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, // ![alt][ref]
287            Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, // ![alt][]
288            Element::FootnoteReference { note } => note.chars().count() + 3, // [^note]
289            Element::Strikethrough(s) => s.chars().count() + 4,              // ~~text~~
290            Element::WikiLink(s) => s.chars().count() + 4,                   // [[wiki]]
291            Element::InlineMath(s) => s.chars().count() + 2,                 // $math$
292            Element::DisplayMath(s) => s.chars().count() + 4,                // $$math$$
293            Element::EmojiShortcode(s) => s.chars().count() + 2,             // :emoji:
294            Element::HtmlTag(s) => s.chars().count(),                        // <tag> - already includes brackets
295            Element::HtmlEntity(s) => s.chars().count(),                     // &nbsp; - already complete
296            Element::Code(s) => s.chars().count() + 2,                       // `code`
297            Element::Bold(s) => s.chars().count() + 4,                       // **text**
298            Element::Italic(s) => s.chars().count() + 2,                     // *text*
299        }
300    }
301}
302
303/// Parse markdown elements from text preserving the raw syntax
304///
305/// Detection order is critical:
306/// 1. Inline links [text](url) - must be detected first to avoid conflicts
307/// 2. Reference links [text][ref] - detected before shortcut references
308/// 3. Empty reference links [text][] - a special case of reference links
309/// 4. Shortcut reference links [ref] - detected last to avoid false positives
310/// 5. Other elements (code, bold, italic) - processed normally
311fn parse_markdown_elements(text: &str) -> Vec<Element> {
312    let mut elements = Vec::new();
313    let mut remaining = text;
314
315    while !remaining.is_empty() {
316        // Find the earliest occurrence of any markdown pattern
317        let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
318
319        // Check for images first (they start with ! so should be detected before links)
320        // Inline images - ![alt](url)
321        if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
322            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
323        {
324            earliest_match = Some((m.start(), "inline_image", m));
325        }
326
327        // Reference images - ![alt][ref]
328        if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
329            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
330        {
331            earliest_match = Some((m.start(), "ref_image", m));
332        }
333
334        // Check for footnote references - [^note]
335        if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
336            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
337        {
338            earliest_match = Some((m.start(), "footnote_ref", m));
339        }
340
341        // Check for inline links - [text](url)
342        if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
343            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
344        {
345            earliest_match = Some((m.start(), "inline_link", m));
346        }
347
348        // Check for reference links - [text][ref]
349        if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
350            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
351        {
352            earliest_match = Some((m.start(), "ref_link", m));
353        }
354
355        // Check for shortcut reference links - [ref]
356        // Only check if we haven't found an earlier pattern that would conflict
357        if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
358            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
359        {
360            earliest_match = Some((m.start(), "shortcut_ref", m));
361        }
362
363        // Check for wiki-style links - [[wiki]]
364        if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
365            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
366        {
367            earliest_match = Some((m.start(), "wiki_link", m));
368        }
369
370        // Check for display math first (before inline) - $$math$$
371        if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
372            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
373        {
374            earliest_match = Some((m.start(), "display_math", m));
375        }
376
377        // Check for inline math - $math$
378        if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
379            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
380        {
381            earliest_match = Some((m.start(), "inline_math", m));
382        }
383
384        // Check for strikethrough - ~~text~~
385        if let Ok(Some(m)) = STRIKETHROUGH_FANCY_REGEX.find(remaining)
386            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
387        {
388            earliest_match = Some((m.start(), "strikethrough", m));
389        }
390
391        // Check for emoji shortcodes - :emoji:
392        if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
393            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
394        {
395            earliest_match = Some((m.start(), "emoji", m));
396        }
397
398        // Check for HTML entities - &nbsp; etc
399        if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
400            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
401        {
402            earliest_match = Some((m.start(), "html_entity", m));
403        }
404
405        // Check for HTML tags - <tag> </tag> <tag/>
406        if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
407            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
408        {
409            earliest_match = Some((m.start(), "html_tag", m));
410        }
411
412        // Find earliest non-link special characters
413        let mut next_special = remaining.len();
414        let mut special_type = "";
415
416        if let Some(pos) = remaining.find('`')
417            && pos < next_special
418        {
419            next_special = pos;
420            special_type = "code";
421        }
422        if let Some(pos) = remaining.find("**")
423            && pos < next_special
424        {
425            next_special = pos;
426            special_type = "bold";
427        }
428        if let Some(pos) = remaining.find('*')
429            && pos < next_special
430            && !remaining[pos..].starts_with("**")
431        {
432            next_special = pos;
433            special_type = "italic";
434        }
435
436        // Determine which pattern to process first
437        let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
438            pos < next_special
439        } else {
440            false
441        };
442
443        if should_process_markdown_link {
444            let (pos, pattern_type, match_obj) = earliest_match.unwrap();
445
446            // Add any text before the match
447            if pos > 0 {
448                elements.push(Element::Text(remaining[..pos].to_string()));
449            }
450
451            // Process the matched pattern
452            match pattern_type {
453                "inline_image" => {
454                    if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
455                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
456                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
457                        elements.push(Element::InlineImage {
458                            alt: alt.to_string(),
459                            url: url.to_string(),
460                        });
461                        remaining = &remaining[match_obj.end()..];
462                    } else {
463                        elements.push(Element::Text("!".to_string()));
464                        remaining = &remaining[1..];
465                    }
466                }
467                "ref_image" => {
468                    if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
469                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
470                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
471
472                        if reference.is_empty() {
473                            elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
474                        } else {
475                            elements.push(Element::ReferenceImage {
476                                alt: alt.to_string(),
477                                reference: reference.to_string(),
478                            });
479                        }
480                        remaining = &remaining[match_obj.end()..];
481                    } else {
482                        elements.push(Element::Text("!".to_string()));
483                        remaining = &remaining[1..];
484                    }
485                }
486                "footnote_ref" => {
487                    if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
488                        let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
489                        elements.push(Element::FootnoteReference { note: note.to_string() });
490                        remaining = &remaining[match_obj.end()..];
491                    } else {
492                        elements.push(Element::Text("[".to_string()));
493                        remaining = &remaining[1..];
494                    }
495                }
496                "inline_link" => {
497                    if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
498                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
499                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
500                        elements.push(Element::Link {
501                            text: text.to_string(),
502                            url: url.to_string(),
503                        });
504                        remaining = &remaining[match_obj.end()..];
505                    } else {
506                        // Fallback - shouldn't happen
507                        elements.push(Element::Text("[".to_string()));
508                        remaining = &remaining[1..];
509                    }
510                }
511                "ref_link" => {
512                    if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
513                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
514                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
515
516                        if reference.is_empty() {
517                            // Empty reference link [text][]
518                            elements.push(Element::EmptyReferenceLink { text: text.to_string() });
519                        } else {
520                            // Regular reference link [text][ref]
521                            elements.push(Element::ReferenceLink {
522                                text: text.to_string(),
523                                reference: reference.to_string(),
524                            });
525                        }
526                        remaining = &remaining[match_obj.end()..];
527                    } else {
528                        // Fallback - shouldn't happen
529                        elements.push(Element::Text("[".to_string()));
530                        remaining = &remaining[1..];
531                    }
532                }
533                "shortcut_ref" => {
534                    if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
535                        let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
536                        elements.push(Element::ShortcutReference {
537                            reference: reference.to_string(),
538                        });
539                        remaining = &remaining[match_obj.end()..];
540                    } else {
541                        // Fallback - shouldn't happen
542                        elements.push(Element::Text("[".to_string()));
543                        remaining = &remaining[1..];
544                    }
545                }
546                "wiki_link" => {
547                    if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
548                        let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
549                        elements.push(Element::WikiLink(content.to_string()));
550                        remaining = &remaining[match_obj.end()..];
551                    } else {
552                        elements.push(Element::Text("[[".to_string()));
553                        remaining = &remaining[2..];
554                    }
555                }
556                "display_math" => {
557                    if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
558                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
559                        elements.push(Element::DisplayMath(math.to_string()));
560                        remaining = &remaining[match_obj.end()..];
561                    } else {
562                        elements.push(Element::Text("$$".to_string()));
563                        remaining = &remaining[2..];
564                    }
565                }
566                "inline_math" => {
567                    if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
568                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
569                        elements.push(Element::InlineMath(math.to_string()));
570                        remaining = &remaining[match_obj.end()..];
571                    } else {
572                        elements.push(Element::Text("$".to_string()));
573                        remaining = &remaining[1..];
574                    }
575                }
576                "strikethrough" => {
577                    if let Ok(Some(caps)) = STRIKETHROUGH_FANCY_REGEX.captures(remaining) {
578                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
579                        elements.push(Element::Strikethrough(text.to_string()));
580                        remaining = &remaining[match_obj.end()..];
581                    } else {
582                        elements.push(Element::Text("~~".to_string()));
583                        remaining = &remaining[2..];
584                    }
585                }
586                "emoji" => {
587                    if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
588                        let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
589                        elements.push(Element::EmojiShortcode(emoji.to_string()));
590                        remaining = &remaining[match_obj.end()..];
591                    } else {
592                        elements.push(Element::Text(":".to_string()));
593                        remaining = &remaining[1..];
594                    }
595                }
596                "html_entity" => {
597                    // HTML entities are captured whole
598                    elements.push(Element::HtmlEntity(remaining[..match_obj.end()].to_string()));
599                    remaining = &remaining[match_obj.end()..];
600                }
601                "html_tag" => {
602                    // HTML tags are captured whole
603                    elements.push(Element::HtmlTag(remaining[..match_obj.end()].to_string()));
604                    remaining = &remaining[match_obj.end()..];
605                }
606                _ => {
607                    // Unknown pattern, treat as text
608                    elements.push(Element::Text("[".to_string()));
609                    remaining = &remaining[1..];
610                }
611            }
612        } else {
613            // Process non-link special characters
614
615            // Add any text before the special character
616            if next_special > 0 && next_special < remaining.len() {
617                elements.push(Element::Text(remaining[..next_special].to_string()));
618                remaining = &remaining[next_special..];
619            }
620
621            // Process the special element
622            match special_type {
623                "code" => {
624                    // Find end of code
625                    if let Some(code_end) = remaining[1..].find('`') {
626                        let code = &remaining[1..1 + code_end];
627                        elements.push(Element::Code(code.to_string()));
628                        remaining = &remaining[1 + code_end + 1..];
629                    } else {
630                        // No closing backtick, treat as text
631                        elements.push(Element::Text(remaining.to_string()));
632                        break;
633                    }
634                }
635                "bold" => {
636                    // Check for bold text
637                    if let Some(bold_end) = remaining[2..].find("**") {
638                        let bold_text = &remaining[2..2 + bold_end];
639                        elements.push(Element::Bold(bold_text.to_string()));
640                        remaining = &remaining[2 + bold_end + 2..];
641                    } else {
642                        // No closing **, treat as text
643                        elements.push(Element::Text("**".to_string()));
644                        remaining = &remaining[2..];
645                    }
646                }
647                "italic" => {
648                    // Check for italic text
649                    if let Some(italic_end) = remaining[1..].find('*') {
650                        let italic_text = &remaining[1..1 + italic_end];
651                        elements.push(Element::Italic(italic_text.to_string()));
652                        remaining = &remaining[1 + italic_end + 1..];
653                    } else {
654                        // No closing *, treat as text
655                        elements.push(Element::Text("*".to_string()));
656                        remaining = &remaining[1..];
657                    }
658                }
659                _ => {
660                    // No special elements found, add all remaining text
661                    elements.push(Element::Text(remaining.to_string()));
662                    break;
663                }
664            }
665        }
666    }
667
668    elements
669}
670
671/// Reflow elements for sentence-per-line mode
672fn reflow_elements_sentence_per_line(elements: &[Element]) -> Vec<String> {
673    let mut lines = Vec::new();
674    let mut current_line = String::new();
675
676    for element in elements {
677        let element_str = format!("{element}");
678
679        // For text elements, split into sentences
680        if let Element::Text(text) = element {
681            // Simply append text - it already has correct spacing from tokenization
682            let combined = format!("{current_line}{text}");
683            let sentences = split_into_sentences(&combined);
684
685            if sentences.len() > 1 {
686                // We found sentence boundaries
687                for (i, sentence) in sentences.iter().enumerate() {
688                    if i == 0 {
689                        // First sentence might continue from previous elements
690                        lines.push(sentence.to_string());
691                    } else if i == sentences.len() - 1 {
692                        // Last sentence might continue to next elements
693                        current_line = sentence.to_string();
694                    } else {
695                        // Complete sentences in the middle
696                        lines.push(sentence.to_string());
697                    }
698                }
699            } else {
700                // No sentence boundary found, continue accumulating
701                current_line = combined;
702            }
703        } else {
704            // Non-text elements (Code, Bold, Italic, etc.)
705            // Add space before element if needed (unless it's after an opening paren/bracket)
706            if !current_line.is_empty()
707                && !current_line.ends_with(' ')
708                && !current_line.ends_with('(')
709                && !current_line.ends_with('[')
710            {
711                current_line.push(' ');
712            }
713            current_line.push_str(&element_str);
714        }
715    }
716
717    // Add any remaining content
718    if !current_line.is_empty() {
719        lines.push(current_line.trim().to_string());
720    }
721
722    lines
723}
724
725/// Reflow elements into lines that fit within the line length
726fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
727    let mut lines = Vec::new();
728    let mut current_line = String::new();
729    let mut current_length = 0;
730
731    for element in elements {
732        let element_str = format!("{element}");
733        let element_len = element.len();
734
735        // For text elements that might need breaking
736        if let Element::Text(text) = element {
737            // If this is a text element, always process it word by word
738            let words: Vec<&str> = text.split_whitespace().collect();
739
740            for word in words {
741                let word_len = word.chars().count();
742                if current_length > 0 && current_length + 1 + word_len > options.line_length {
743                    // Start a new line
744                    lines.push(current_line.trim().to_string());
745                    current_line = word.to_string();
746                    current_length = word_len;
747                } else {
748                    // Add word to current line
749                    if current_length > 0 {
750                        current_line.push(' ');
751                        current_length += 1;
752                    }
753                    current_line.push_str(word);
754                    current_length += word_len;
755                }
756            }
757        } else {
758            // For non-text elements (code, links, references), treat as atomic units
759            // These should never be broken across lines
760            if current_length > 0 && current_length + 1 + element_len > options.line_length {
761                // Start a new line
762                lines.push(current_line.trim().to_string());
763                current_line = element_str;
764                current_length = element_len;
765            } else {
766                // Add element to current line
767                if current_length > 0 {
768                    current_line.push(' ');
769                    current_length += 1;
770                }
771                current_line.push_str(&element_str);
772                current_length += element_len;
773            }
774        }
775    }
776
777    // Don't forget the last line
778    if !current_line.is_empty() {
779        lines.push(current_line.trim_end().to_string());
780    }
781
782    lines
783}
784
785/// Reflow markdown content preserving structure
786pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
787    let lines: Vec<&str> = content.lines().collect();
788    let mut result = Vec::new();
789    let mut i = 0;
790
791    while i < lines.len() {
792        let line = lines[i];
793        let trimmed = line.trim();
794
795        // Preserve empty lines
796        if trimmed.is_empty() {
797            result.push(String::new());
798            i += 1;
799            continue;
800        }
801
802        // Preserve headings as-is
803        if trimmed.starts_with('#') {
804            result.push(line.to_string());
805            i += 1;
806            continue;
807        }
808
809        // Preserve fenced code blocks
810        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
811            result.push(line.to_string());
812            i += 1;
813            // Copy lines until closing fence
814            while i < lines.len() {
815                result.push(lines[i].to_string());
816                if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
817                    i += 1;
818                    break;
819                }
820                i += 1;
821            }
822            continue;
823        }
824
825        // Preserve indented code blocks (4+ spaces or 1+ tab)
826        if line.starts_with("    ") || line.starts_with("\t") {
827            // Collect all consecutive indented lines
828            result.push(line.to_string());
829            i += 1;
830            while i < lines.len() {
831                let next_line = lines[i];
832                // Continue if next line is also indented or empty (empty lines in code blocks are ok)
833                if next_line.starts_with("    ") || next_line.starts_with("\t") || next_line.trim().is_empty() {
834                    result.push(next_line.to_string());
835                    i += 1;
836                } else {
837                    break;
838                }
839            }
840            continue;
841        }
842
843        // Preserve block quotes (but reflow their content)
844        if trimmed.starts_with('>') {
845            let quote_prefix = line[0..line.find('>').unwrap() + 1].to_string();
846            let quote_content = &line[quote_prefix.len()..].trim_start();
847
848            let reflowed = reflow_line(quote_content, options);
849            for reflowed_line in reflowed.iter() {
850                result.push(format!("{quote_prefix} {reflowed_line}"));
851            }
852            i += 1;
853            continue;
854        }
855
856        // Preserve horizontal rules first (before checking for lists)
857        if is_horizontal_rule(trimmed) {
858            result.push(line.to_string());
859            i += 1;
860            continue;
861        }
862
863        // Preserve lists (but not horizontal rules)
864        if (trimmed.starts_with('-') && !is_horizontal_rule(trimmed))
865            || (trimmed.starts_with('*') && !is_horizontal_rule(trimmed))
866            || trimmed.starts_with('+')
867            || is_numbered_list_item(trimmed)
868        {
869            // Find the list marker and preserve indentation
870            let indent = line.len() - line.trim_start().len();
871            let indent_str = " ".repeat(indent);
872
873            // For numbered lists, find the period and the space after it
874            // For bullet lists, find the marker and the space after it
875            let mut marker_end = indent;
876            let mut content_start = indent;
877
878            if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
879                // Numbered list: find the period
880                if let Some(period_pos) = line[indent..].find('.') {
881                    marker_end = indent + period_pos + 1; // Include the period
882                    content_start = marker_end;
883                    // Skip any spaces after the period to find content start
884                    while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
885                        content_start += 1;
886                    }
887                }
888            } else {
889                // Bullet list: marker is single character
890                marker_end = indent + 1; // Just the marker character
891                content_start = marker_end;
892                // Skip any spaces after the marker
893                while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
894                    content_start += 1;
895                }
896            }
897
898            let marker = &line[indent..marker_end];
899
900            // Collect all content for this list item (including continuation lines)
901            // Preserve hard breaks (2 trailing spaces) while trimming excessive whitespace
902            let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
903            i += 1;
904
905            // Collect continuation lines (indented lines that are part of this list item)
906            while i < lines.len() {
907                let next_line = lines[i];
908                let next_trimmed = next_line.trim();
909
910                // Stop if we hit an empty line or another list item or special block
911                if next_trimmed.is_empty()
912                    || next_trimmed.starts_with('#')
913                    || next_trimmed.starts_with("```")
914                    || next_trimmed.starts_with("~~~")
915                    || next_trimmed.starts_with('>')
916                    || next_trimmed.starts_with('|')
917                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
918                    || is_horizontal_rule(next_trimmed)
919                    || (next_trimmed.starts_with('-')
920                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
921                    || (next_trimmed.starts_with('*')
922                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
923                    || (next_trimmed.starts_with('+')
924                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
925                    || is_numbered_list_item(next_trimmed)
926                {
927                    break;
928                }
929
930                // Check if this line is indented (continuation of list item)
931                let next_indent = next_line.len() - next_line.trim_start().len();
932                if next_indent >= content_start {
933                    // This is a continuation line - add its content
934                    // Preserve hard breaks while trimming excessive whitespace
935                    let trimmed_start = next_line.trim_start();
936                    list_content.push(trim_preserving_hard_break(trimmed_start));
937                    i += 1;
938                } else {
939                    // Not indented enough, not part of this list item
940                    break;
941                }
942            }
943
944            // Join content, but respect hard breaks (lines ending with 2 spaces)
945            // Hard breaks should prevent joining with the next line
946            let combined_content = if options.preserve_breaks {
947                list_content[0].clone()
948            } else {
949                // Check if any lines have hard breaks - if so, preserve the structure
950                let has_hard_breaks = list_content.iter().any(|line| line.ends_with("  "));
951                if has_hard_breaks {
952                    // Don't join lines with hard breaks - keep them separate with newlines
953                    list_content.join("\n")
954                } else {
955                    // No hard breaks, safe to join with spaces
956                    list_content.join(" ")
957                }
958            };
959
960            // Calculate the proper indentation for continuation lines
961            let trimmed_marker = marker;
962            let continuation_spaces = content_start;
963
964            // Adjust line length to account for list marker and space
965            let prefix_length = indent + trimmed_marker.len() + 1;
966
967            // Create adjusted options with reduced line length
968            let adjusted_options = ReflowOptions {
969                line_length: options.line_length.saturating_sub(prefix_length),
970                ..options.clone()
971            };
972
973            let reflowed = reflow_line(&combined_content, &adjusted_options);
974            for (j, reflowed_line) in reflowed.iter().enumerate() {
975                if j == 0 {
976                    result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
977                } else {
978                    // Continuation lines aligned with text after marker
979                    let continuation_indent = " ".repeat(continuation_spaces);
980                    result.push(format!("{continuation_indent}{reflowed_line}"));
981                }
982            }
983            continue;
984        }
985
986        // Preserve tables
987        if trimmed.contains('|') {
988            result.push(line.to_string());
989            i += 1;
990            continue;
991        }
992
993        // Preserve reference definitions
994        if trimmed.starts_with('[') && line.contains("]:") {
995            result.push(line.to_string());
996            i += 1;
997            continue;
998        }
999
1000        // Check if this is a single line that doesn't need processing
1001        let mut is_single_line_paragraph = true;
1002        if i + 1 < lines.len() {
1003            let next_line = lines[i + 1];
1004            let next_trimmed = next_line.trim();
1005            // Check if next line starts a new block
1006            if !next_trimmed.is_empty()
1007                && !next_trimmed.starts_with('#')
1008                && !next_trimmed.starts_with("```")
1009                && !next_trimmed.starts_with("~~~")
1010                && !next_trimmed.starts_with('>')
1011                && !next_trimmed.starts_with('|')
1012                && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
1013                && !is_horizontal_rule(next_trimmed)
1014                && !(next_trimmed.starts_with('-')
1015                    && !is_horizontal_rule(next_trimmed)
1016                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1017                && !(next_trimmed.starts_with('*')
1018                    && !is_horizontal_rule(next_trimmed)
1019                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1020                && !(next_trimmed.starts_with('+')
1021                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1022                && !is_numbered_list_item(next_trimmed)
1023            {
1024                is_single_line_paragraph = false;
1025            }
1026        }
1027
1028        // If it's a single line that fits, just add it as-is
1029        if is_single_line_paragraph && line.chars().count() <= options.line_length {
1030            result.push(line.to_string());
1031            i += 1;
1032            continue;
1033        }
1034
1035        // For regular paragraphs, collect consecutive lines
1036        let mut paragraph_parts = Vec::new();
1037        let mut current_part = vec![line];
1038        i += 1;
1039
1040        // If preserve_breaks is true, treat each line separately
1041        if options.preserve_breaks {
1042            // Don't collect consecutive lines - just reflow this single line
1043            let has_hard_break = line.ends_with("  ");
1044            let reflowed = reflow_line(line, options);
1045
1046            // Preserve hard breaks (two trailing spaces)
1047            if has_hard_break && !reflowed.is_empty() {
1048                let mut reflowed_with_break = reflowed;
1049                let last_idx = reflowed_with_break.len() - 1;
1050                if !reflowed_with_break[last_idx].ends_with("  ") {
1051                    reflowed_with_break[last_idx].push_str("  ");
1052                }
1053                result.extend(reflowed_with_break);
1054            } else {
1055                result.extend(reflowed);
1056            }
1057        } else {
1058            // Original behavior: collect consecutive lines into a paragraph
1059            while i < lines.len() {
1060                let prev_line = if !current_part.is_empty() {
1061                    current_part.last().unwrap()
1062                } else {
1063                    ""
1064                };
1065                let next_line = lines[i];
1066                let next_trimmed = next_line.trim();
1067
1068                // Stop at empty lines or special blocks
1069                if next_trimmed.is_empty()
1070                    || next_trimmed.starts_with('#')
1071                    || next_trimmed.starts_with("```")
1072                    || next_trimmed.starts_with("~~~")
1073                    || next_trimmed.starts_with('>')
1074                    || next_trimmed.starts_with('|')
1075                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1076                    || is_horizontal_rule(next_trimmed)
1077                    || (next_trimmed.starts_with('-')
1078                        && !is_horizontal_rule(next_trimmed)
1079                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1080                    || (next_trimmed.starts_with('*')
1081                        && !is_horizontal_rule(next_trimmed)
1082                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1083                    || (next_trimmed.starts_with('+')
1084                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1085                    || is_numbered_list_item(next_trimmed)
1086                {
1087                    break;
1088                }
1089
1090                // Check if previous line ends with hard break (two spaces)
1091                if prev_line.ends_with("  ") {
1092                    // Start a new part after hard break
1093                    paragraph_parts.push(current_part.join(" "));
1094                    current_part = vec![next_line];
1095                } else {
1096                    current_part.push(next_line);
1097                }
1098                i += 1;
1099            }
1100
1101            // Add the last part
1102            if !current_part.is_empty() {
1103                if current_part.len() == 1 {
1104                    // Single line, don't add trailing space
1105                    paragraph_parts.push(current_part[0].to_string());
1106                } else {
1107                    paragraph_parts.push(current_part.join(" "));
1108                }
1109            }
1110
1111            // Reflow each part separately, preserving hard breaks
1112            for (j, part) in paragraph_parts.iter().enumerate() {
1113                let reflowed = reflow_line(part, options);
1114                result.extend(reflowed);
1115
1116                // Preserve hard break by ensuring last line of part ends with two spaces
1117                if j < paragraph_parts.len() - 1 && !result.is_empty() {
1118                    let last_idx = result.len() - 1;
1119                    if !result[last_idx].ends_with("  ") {
1120                        result[last_idx].push_str("  ");
1121                    }
1122                }
1123            }
1124        }
1125    }
1126
1127    // Preserve trailing newline if the original content had one
1128    let result_text = result.join("\n");
1129    if content.ends_with('\n') && !result_text.ends_with('\n') {
1130        format!("{result_text}\n")
1131    } else {
1132        result_text
1133    }
1134}
1135
1136#[cfg(test)]
1137mod tests {
1138    use super::*;
1139
1140    #[test]
1141    fn test_list_item_trailing_whitespace_removal() {
1142        // Test for issue #76 - hard breaks (2 trailing spaces) should be preserved
1143        // and prevent reflowing
1144        let input = "1. First line with trailing spaces   \n    Second line with trailing spaces  \n    Third line\n";
1145
1146        let options = ReflowOptions {
1147            line_length: 999999,
1148            break_on_sentences: true, // MD013 uses true by default
1149            preserve_breaks: false,
1150            sentence_per_line: false,
1151        };
1152
1153        let result = reflow_markdown(input, &options);
1154
1155        eprintln!("Input: {input:?}");
1156        eprintln!("Result: {result:?}");
1157
1158        // Should not contain 3+ consecutive spaces (which would indicate
1159        // trailing whitespace became mid-line whitespace)
1160        assert!(
1161            !result.contains("   "),
1162            "Result should not contain 3+ consecutive spaces: {result:?}"
1163        );
1164
1165        // Hard breaks should be preserved (exactly 2 trailing spaces)
1166        assert!(result.contains("  \n"), "Hard breaks should be preserved: {result:?}");
1167
1168        // Should NOT be reflowed into a single line because hard breaks are present
1169        // The content should maintain its line structure
1170        assert!(
1171            result.lines().count() >= 2,
1172            "Should have multiple lines (not reflowed due to hard breaks), got: {}",
1173            result.lines().count()
1174        );
1175    }
1176
1177    #[test]
1178    fn test_reflow_simple_text() {
1179        let options = ReflowOptions {
1180            line_length: 20,
1181            ..Default::default()
1182        };
1183
1184        let input = "This is a very long line that needs to be wrapped";
1185        let result = reflow_line(input, &options);
1186
1187        assert_eq!(result.len(), 3);
1188        assert!(result[0].chars().count() <= 20);
1189        assert!(result[1].chars().count() <= 20);
1190        assert!(result[2].chars().count() <= 20);
1191    }
1192
1193    #[test]
1194    fn test_preserve_inline_code() {
1195        let options = ReflowOptions {
1196            line_length: 30,
1197            ..Default::default()
1198        };
1199
1200        let result = reflow_line("This line has `inline code` that should be preserved", &options);
1201        // Verify inline code is not broken
1202        let joined = result.join(" ");
1203        assert!(joined.contains("`inline code`"));
1204    }
1205
1206    #[test]
1207    fn test_preserve_links() {
1208        let options = ReflowOptions {
1209            line_length: 40,
1210            ..Default::default()
1211        };
1212
1213        let text = "Check out [this link](https://example.com/very/long/url) for more info";
1214        let result = reflow_line(text, &options);
1215
1216        // Verify link is preserved intact
1217        let joined = result.join(" ");
1218        assert!(joined.contains("[this link](https://example.com/very/long/url)"));
1219    }
1220
1221    #[test]
1222    fn test_reference_link_patterns_fixed() {
1223        let options = ReflowOptions {
1224            line_length: 30,
1225            break_on_sentences: true,
1226            preserve_breaks: false,
1227            sentence_per_line: false,
1228        };
1229
1230        // Test cases that verify reference links are preserved as atomic units
1231        let test_cases = vec![
1232            // Reference link: [text][ref] - should be preserved intact
1233            ("Check out [text][ref] for details", vec!["[text][ref]"]),
1234            // Empty reference: [text][] - should be preserved intact
1235            ("See [text][] for info", vec!["[text][]"]),
1236            // Shortcut reference: [homepage] - should be preserved intact
1237            ("Visit [homepage] today", vec!["[homepage]"]),
1238            // Multiple reference links in one line
1239            (
1240                "Links: [first][ref1] and [second][ref2] here",
1241                vec!["[first][ref1]", "[second][ref2]"],
1242            ),
1243            // Mixed inline and reference links
1244            (
1245                "See [inline](url) and [reference][ref] links",
1246                vec!["[inline](url)", "[reference][ref]"],
1247            ),
1248        ];
1249
1250        for (input, expected_patterns) in test_cases {
1251            println!("\nTesting: {input}");
1252            let result = reflow_line(input, &options);
1253            let joined = result.join(" ");
1254            println!("Result:  {joined}");
1255
1256            // Verify all expected patterns are preserved
1257            for expected_pattern in expected_patterns {
1258                assert!(
1259                    joined.contains(expected_pattern),
1260                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1261                );
1262            }
1263
1264            // Verify no broken patterns exist (spaces inside brackets)
1265            assert!(
1266                !joined.contains("[ ") || !joined.contains("] ["),
1267                "Detected broken reference link pattern with spaces inside brackets in '{joined}'"
1268            );
1269        }
1270    }
1271
1272    #[test]
1273    fn test_sentence_detection_basic() {
1274        // Test basic sentence detection
1275        assert!(is_sentence_boundary("Hello. World", 5));
1276        assert!(is_sentence_boundary("Test! Another", 4));
1277        assert!(is_sentence_boundary("Question? Answer", 8));
1278
1279        // Test non-boundaries
1280        assert!(!is_sentence_boundary("Hello world", 5));
1281        assert!(!is_sentence_boundary("Test.com", 4));
1282        assert!(!is_sentence_boundary("3.14 pi", 1));
1283    }
1284
1285    #[test]
1286    fn test_sentence_detection_abbreviations() {
1287        // Common abbreviations should not be treated as sentence boundaries
1288        assert!(!is_sentence_boundary("Mr. Smith", 2));
1289        assert!(!is_sentence_boundary("Dr. Jones", 2));
1290        assert!(!is_sentence_boundary("e.g. example", 3));
1291        assert!(!is_sentence_boundary("i.e. that is", 3));
1292        assert!(!is_sentence_boundary("etc. items", 3));
1293
1294        // But sentence after abbreviation should be a boundary
1295        assert!(is_sentence_boundary("Mr. Smith arrived. Next sentence.", 17));
1296    }
1297
1298    #[test]
1299    fn test_split_into_sentences() {
1300        let text = "First sentence. Second sentence. Third one!";
1301        let sentences = split_into_sentences(text);
1302        assert_eq!(sentences.len(), 3);
1303        assert_eq!(sentences[0], "First sentence.");
1304        assert_eq!(sentences[1], "Second sentence.");
1305        assert_eq!(sentences[2], "Third one!");
1306
1307        // Test with abbreviations
1308        let text2 = "Mr. Smith met Dr. Jones.";
1309        let sentences2 = split_into_sentences(text2);
1310        assert_eq!(sentences2.len(), 1);
1311        assert_eq!(sentences2[0], "Mr. Smith met Dr. Jones.");
1312
1313        // Test single sentence
1314        let text3 = "This is a single sentence.";
1315        let sentences3 = split_into_sentences(text3);
1316        assert_eq!(sentences3.len(), 1);
1317        assert_eq!(sentences3[0], "This is a single sentence.");
1318    }
1319
1320    #[test]
1321    fn test_sentence_per_line_reflow() {
1322        let options = ReflowOptions {
1323            line_length: 80,
1324            break_on_sentences: true,
1325            preserve_breaks: false,
1326            sentence_per_line: true,
1327        };
1328
1329        // Test basic sentence splitting
1330        let input = "First sentence. Second sentence. Third sentence.";
1331        let result = reflow_line(input, &options);
1332        assert_eq!(result.len(), 3);
1333        assert_eq!(result[0], "First sentence.");
1334        assert_eq!(result[1], "Second sentence.");
1335        assert_eq!(result[2], "Third sentence.");
1336
1337        // Test with markdown elements
1338        let input2 = "This has **bold**. And [a link](url).";
1339        let result2 = reflow_line(input2, &options);
1340        assert_eq!(result2.len(), 2);
1341        assert_eq!(result2[0], "This has **bold**.");
1342        assert_eq!(result2[1], "And [a link](url).");
1343    }
1344
1345    #[test]
1346    fn test_sentence_per_line_with_backticks() {
1347        let options = ReflowOptions {
1348            line_length: 80,
1349            break_on_sentences: true,
1350            preserve_breaks: false,
1351            sentence_per_line: true,
1352        };
1353
1354        let input = "This sentence has `code` in it. And this has `more code` too.";
1355        let result = reflow_line(input, &options);
1356        assert_eq!(result.len(), 2);
1357        assert_eq!(result[0], "This sentence has `code` in it.");
1358        assert_eq!(result[1], "And this has `more code` too.");
1359    }
1360
1361    #[test]
1362    fn test_sentence_per_line_with_backticks_in_parens() {
1363        let options = ReflowOptions {
1364            line_length: 80,
1365            break_on_sentences: true,
1366            preserve_breaks: false,
1367            sentence_per_line: true,
1368        };
1369
1370        let input = "Configure in (`.rumdl.toml` or `pyproject.toml`). Next sentence.";
1371        let result = reflow_line(input, &options);
1372        assert_eq!(result.len(), 2);
1373        assert_eq!(result[0], "Configure in (`.rumdl.toml` or `pyproject.toml`).");
1374        assert_eq!(result[1], "Next sentence.");
1375    }
1376
1377    #[test]
1378    fn test_sentence_per_line_with_questions_exclamations() {
1379        let options = ReflowOptions {
1380            line_length: 80,
1381            break_on_sentences: true,
1382            preserve_breaks: false,
1383            sentence_per_line: true,
1384        };
1385
1386        let input = "Is this a question? Yes it is! And a statement.";
1387        let result = reflow_line(input, &options);
1388        assert_eq!(result.len(), 3);
1389        assert_eq!(result[0], "Is this a question?");
1390        assert_eq!(result[1], "Yes it is!");
1391        assert_eq!(result[2], "And a statement.");
1392    }
1393
1394    #[test]
1395    fn test_reference_link_edge_cases() {
1396        let options = ReflowOptions {
1397            line_length: 40,
1398            break_on_sentences: true,
1399            preserve_breaks: false,
1400            sentence_per_line: false,
1401        };
1402
1403        // Test cases for edge cases and potential conflicts
1404        let test_cases = vec![
1405            // Escaped brackets should be treated as regular text
1406            ("Text with \\[escaped\\] brackets", vec!["\\[escaped\\]"]),
1407            // Nested brackets in reference links
1408            (
1409                "Link [text with [nested] content][ref]",
1410                vec!["[text with [nested] content][ref]"],
1411            ),
1412            // Reference link followed by inline link
1413            (
1414                "First [ref][link] then [inline](url)",
1415                vec!["[ref][link]", "[inline](url)"],
1416            ),
1417            // Shortcut reference that might conflict with other patterns
1418            ("Array [0] and reference [link] here", vec!["[0]", "[link]"]),
1419            // Empty reference with complex text
1420            (
1421                "Complex [text with *emphasis*][] reference",
1422                vec!["[text with *emphasis*][]"],
1423            ),
1424        ];
1425
1426        for (input, expected_patterns) in test_cases {
1427            println!("\nTesting edge case: {input}");
1428            let result = reflow_line(input, &options);
1429            let joined = result.join(" ");
1430            println!("Result: {joined}");
1431
1432            // Verify all expected patterns are preserved
1433            for expected_pattern in expected_patterns {
1434                assert!(
1435                    joined.contains(expected_pattern),
1436                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1437                );
1438            }
1439        }
1440    }
1441
1442    #[test]
1443    fn test_reflow_with_emphasis() {
1444        let options = ReflowOptions {
1445            line_length: 25,
1446            ..Default::default()
1447        };
1448
1449        let result = reflow_line("This is *emphasized* and **strong** text that needs wrapping", &options);
1450
1451        // Verify emphasis markers are preserved
1452        let joined = result.join(" ");
1453        assert!(joined.contains("*emphasized*"));
1454        assert!(joined.contains("**strong**"));
1455    }
1456
1457    #[test]
1458    fn test_image_patterns_preserved() {
1459        let options = ReflowOptions {
1460            line_length: 30,
1461            ..Default::default()
1462        };
1463
1464        // Test cases for image patterns
1465        let test_cases = vec![
1466            // Inline image
1467            (
1468                "Check out ![alt text](image.png) for details",
1469                vec!["![alt text](image.png)"],
1470            ),
1471            // Reference image
1472            ("See ![image][ref] for info", vec!["![image][ref]"]),
1473            // Empty reference image
1474            ("Visit ![homepage][] today", vec!["![homepage][]"]),
1475            // Multiple images
1476            (
1477                "Images: ![first](a.png) and ![second][ref2]",
1478                vec!["![first](a.png)", "![second][ref2]"],
1479            ),
1480        ];
1481
1482        for (input, expected_patterns) in test_cases {
1483            println!("\nTesting: {input}");
1484            let result = reflow_line(input, &options);
1485            let joined = result.join(" ");
1486            println!("Result:  {joined}");
1487
1488            for expected_pattern in expected_patterns {
1489                assert!(
1490                    joined.contains(expected_pattern),
1491                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1492                );
1493            }
1494        }
1495    }
1496
1497    #[test]
1498    fn test_extended_markdown_patterns() {
1499        let options = ReflowOptions {
1500            line_length: 40,
1501            ..Default::default()
1502        };
1503
1504        let test_cases = vec![
1505            // Strikethrough
1506            ("Text with ~~strikethrough~~ preserved", vec!["~~strikethrough~~"]),
1507            // Wiki links
1508            (
1509                "Check [[wiki link]] and [[page|display]]",
1510                vec!["[[wiki link]]", "[[page|display]]"],
1511            ),
1512            // Math
1513            (
1514                "Inline $x^2 + y^2$ and display $$\\int f(x) dx$$",
1515                vec!["$x^2 + y^2$", "$$\\int f(x) dx$$"],
1516            ),
1517            // Emoji
1518            ("Use :smile: and :heart: emojis", vec![":smile:", ":heart:"]),
1519            // HTML tags
1520            (
1521                "Text with <span>tag</span> and <br/>",
1522                vec!["<span>", "</span>", "<br/>"],
1523            ),
1524            // HTML entities
1525            ("Non-breaking&nbsp;space and em&mdash;dash", vec!["&nbsp;", "&mdash;"]),
1526        ];
1527
1528        for (input, expected_patterns) in test_cases {
1529            let result = reflow_line(input, &options);
1530            let joined = result.join(" ");
1531
1532            for pattern in expected_patterns {
1533                assert!(
1534                    joined.contains(pattern),
1535                    "Expected '{pattern}' to be preserved in '{input}', but got '{joined}'"
1536                );
1537            }
1538        }
1539    }
1540
1541    #[test]
1542    fn test_complex_mixed_patterns() {
1543        let options = ReflowOptions {
1544            line_length: 50,
1545            ..Default::default()
1546        };
1547
1548        // Test that multiple pattern types work together
1549        let input = "Line with **bold**, `code`, [link](url), ![image](img), ~~strike~~, $math$, :emoji:, and <tag> all together";
1550        let result = reflow_line(input, &options);
1551        let joined = result.join(" ");
1552
1553        // All patterns should be preserved
1554        assert!(joined.contains("**bold**"));
1555        assert!(joined.contains("`code`"));
1556        assert!(joined.contains("[link](url)"));
1557        assert!(joined.contains("![image](img)"));
1558        assert!(joined.contains("~~strike~~"));
1559        assert!(joined.contains("$math$"));
1560        assert!(joined.contains(":emoji:"));
1561        assert!(joined.contains("<tag>"));
1562    }
1563
1564    #[test]
1565    fn test_footnote_patterns_preserved() {
1566        let options = ReflowOptions {
1567            line_length: 40,
1568            ..Default::default()
1569        };
1570
1571        let test_cases = vec![
1572            // Single footnote
1573            ("This has a footnote[^1] reference", vec!["[^1]"]),
1574            // Multiple footnotes
1575            ("Text with [^first] and [^second] notes", vec!["[^first]", "[^second]"]),
1576            // Long footnote name
1577            ("Reference to [^long-footnote-name] here", vec!["[^long-footnote-name]"]),
1578        ];
1579
1580        for (input, expected_patterns) in test_cases {
1581            let result = reflow_line(input, &options);
1582            let joined = result.join(" ");
1583
1584            for expected_pattern in expected_patterns {
1585                assert!(
1586                    joined.contains(expected_pattern),
1587                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1588                );
1589            }
1590        }
1591    }
1592
1593    #[test]
1594    fn test_reflow_markdown_numbered_lists() {
1595        // Test for issue #83: numbered lists with proper formatting
1596        let options = ReflowOptions {
1597            line_length: 50,
1598            ..Default::default()
1599        };
1600
1601        let content = r#"1. List `manifest` to find the manifest with the largest ID. Say it's `00000000000000000002.manifest` in this example.
16022. Short item
16033. Another long item that definitely exceeds the fifty character limit and needs wrapping"#;
1604
1605        let result = reflow_markdown(content, &options);
1606
1607        // Define exact expected output
1608        let expected = r#"1. List `manifest` to find the manifest with the
1609   largest ID. Say it's
1610   `00000000000000000002.manifest` in this
1611   example.
16122. Short item
16133. Another long item that definitely exceeds the
1614   fifty character limit and needs wrapping"#;
1615
1616        assert_eq!(
1617            result, expected,
1618            "Numbered lists should be reflowed with proper markers and indentation.\nExpected:\n{expected}\nGot:\n{result}"
1619        );
1620    }
1621
1622    #[test]
1623    fn test_reflow_markdown_bullet_lists() {
1624        let options = ReflowOptions {
1625            line_length: 40,
1626            ..Default::default()
1627        };
1628
1629        let content = r#"- First bullet point with a very long line that needs wrapping
1630* Second bullet using asterisk
1631+ Third bullet using plus sign
1632- Short one"#;
1633
1634        let result = reflow_markdown(content, &options);
1635
1636        // Define exact expected output - each bullet type preserved with proper indentation
1637        let expected = r#"- First bullet point with a very long
1638  line that needs wrapping
1639* Second bullet using asterisk
1640+ Third bullet using plus sign
1641- Short one"#;
1642
1643        assert_eq!(
1644            result, expected,
1645            "Bullet lists should preserve markers and indent continuations with 2 spaces.\nExpected:\n{expected}\nGot:\n{result}"
1646        );
1647    }
1648}