rumdl_lib/utils/
text_reflow.rs

1//! Text reflow utilities for MD013
2//!
3//! This module implements text wrapping/reflow functionality that preserves
4//! Markdown elements like links, emphasis, code spans, etc.
5
6use crate::utils::regex_cache::{
7    DISPLAY_MATH_REGEX, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
8    INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, REF_IMAGE_REGEX, REF_LINK_REGEX,
9    SHORTCUT_REF_REGEX, STRIKETHROUGH_FANCY_REGEX, WIKI_LINK_REGEX,
10};
11/// Options for reflowing text
12#[derive(Clone)]
13pub struct ReflowOptions {
14    /// Target line length
15    pub line_length: usize,
16    /// Whether to break on sentence boundaries when possible
17    pub break_on_sentences: bool,
18    /// Whether to preserve existing line breaks in paragraphs
19    pub preserve_breaks: bool,
20}
21
22impl Default for ReflowOptions {
23    fn default() -> Self {
24        Self {
25            line_length: 80,
26            break_on_sentences: true,
27            preserve_breaks: false,
28        }
29    }
30}
31
32/// Check if a line is a horizontal rule (---, ___, ***)
33fn is_horizontal_rule(line: &str) -> bool {
34    if line.len() < 3 {
35        return false;
36    }
37
38    // Check if line consists only of -, _, or * characters (at least 3)
39    let chars: Vec<char> = line.chars().collect();
40    if chars.is_empty() {
41        return false;
42    }
43
44    let first_char = chars[0];
45    if first_char != '-' && first_char != '_' && first_char != '*' {
46        return false;
47    }
48
49    // All characters should be the same (allowing spaces between)
50    for c in &chars {
51        if *c != first_char && *c != ' ' {
52            return false;
53        }
54    }
55
56    // Count non-space characters
57    let non_space_count = chars.iter().filter(|c| **c != ' ').count();
58    non_space_count >= 3
59}
60
61/// Check if a line is a numbered list item (e.g., "1. ", "10. ")
62fn is_numbered_list_item(line: &str) -> bool {
63    let mut chars = line.chars();
64
65    // Must start with a digit
66    if !chars.next().is_some_and(|c| c.is_numeric()) {
67        return false;
68    }
69
70    // Can have more digits
71    while let Some(c) = chars.next() {
72        if c == '.' {
73            // After period, must have a space or be end of line
74            return chars.next().is_none_or(|c| c == ' ');
75        }
76        if !c.is_numeric() {
77            return false;
78        }
79    }
80
81    false
82}
83
84/// Reflow a single line of markdown text to fit within the specified line length
85pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
86    // Quick check: if line is already short enough, return as-is
87    if line.chars().count() <= options.line_length {
88        return vec![line.to_string()];
89    }
90
91    // Parse the markdown to identify elements
92    let elements = parse_markdown_elements(line);
93
94    // Reflow the elements into lines
95    reflow_elements(&elements, options)
96}
97
98/// Represents a piece of content in the markdown
99#[derive(Debug, Clone)]
100enum Element {
101    /// Plain text that can be wrapped
102    Text(String),
103    /// A complete markdown inline link [text](url)
104    Link { text: String, url: String },
105    /// A complete markdown reference link [text][ref]
106    ReferenceLink { text: String, reference: String },
107    /// A complete markdown empty reference link [text][]
108    EmptyReferenceLink { text: String },
109    /// A complete markdown shortcut reference link [ref]
110    ShortcutReference { reference: String },
111    /// A complete markdown inline image ![alt](url)
112    InlineImage { alt: String, url: String },
113    /// A complete markdown reference image ![alt][ref]
114    ReferenceImage { alt: String, reference: String },
115    /// A complete markdown empty reference image ![alt][]
116    EmptyReferenceImage { alt: String },
117    /// Footnote reference [^note]
118    FootnoteReference { note: String },
119    /// Strikethrough text ~~text~~
120    Strikethrough(String),
121    /// Wiki-style link [[wiki]] or [[wiki|text]]
122    WikiLink(String),
123    /// Inline math $math$
124    InlineMath(String),
125    /// Display math $$math$$
126    DisplayMath(String),
127    /// Emoji shortcode :emoji:
128    EmojiShortcode(String),
129    /// HTML tag <tag> or </tag> or <tag/>
130    HtmlTag(String),
131    /// HTML entity &nbsp; or &#123;
132    HtmlEntity(String),
133    /// Inline code `code`
134    Code(String),
135    /// Bold text **text**
136    Bold(String),
137    /// Italic text *text*
138    Italic(String),
139}
140
141impl std::fmt::Display for Element {
142    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
143        match self {
144            Element::Text(s) => write!(f, "{s}"),
145            Element::Link { text, url } => write!(f, "[{text}]({url})"),
146            Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
147            Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
148            Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
149            Element::InlineImage { alt, url } => write!(f, "![{alt}]({url})"),
150            Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
151            Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
152            Element::FootnoteReference { note } => write!(f, "[^{note}]"),
153            Element::Strikethrough(s) => write!(f, "~~{s}~~"),
154            Element::WikiLink(s) => write!(f, "[[{s}]]"),
155            Element::InlineMath(s) => write!(f, "${s}$"),
156            Element::DisplayMath(s) => write!(f, "$${s}$$"),
157            Element::EmojiShortcode(s) => write!(f, ":{s}:"),
158            Element::HtmlTag(s) => write!(f, "{s}"),
159            Element::HtmlEntity(s) => write!(f, "{s}"),
160            Element::Code(s) => write!(f, "`{s}`"),
161            Element::Bold(s) => write!(f, "**{s}**"),
162            Element::Italic(s) => write!(f, "*{s}*"),
163        }
164    }
165}
166
167impl Element {
168    fn len(&self) -> usize {
169        match self {
170            Element::Text(s) => s.chars().count(),
171            Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, // [text](url)
172            Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, // [text][ref]
173            Element::EmptyReferenceLink { text } => text.chars().count() + 4, // [text][]
174            Element::ShortcutReference { reference } => reference.chars().count() + 2, // [ref]
175            Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, // ![alt](url)
176            Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, // ![alt][ref]
177            Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, // ![alt][]
178            Element::FootnoteReference { note } => note.chars().count() + 3, // [^note]
179            Element::Strikethrough(s) => s.chars().count() + 4,              // ~~text~~
180            Element::WikiLink(s) => s.chars().count() + 4,                   // [[wiki]]
181            Element::InlineMath(s) => s.chars().count() + 2,                 // $math$
182            Element::DisplayMath(s) => s.chars().count() + 4,                // $$math$$
183            Element::EmojiShortcode(s) => s.chars().count() + 2,             // :emoji:
184            Element::HtmlTag(s) => s.chars().count(),                        // <tag> - already includes brackets
185            Element::HtmlEntity(s) => s.chars().count(),                     // &nbsp; - already complete
186            Element::Code(s) => s.chars().count() + 2,                       // `code`
187            Element::Bold(s) => s.chars().count() + 4,                       // **text**
188            Element::Italic(s) => s.chars().count() + 2,                     // *text*
189        }
190    }
191}
192
193/// Parse markdown elements from text preserving the raw syntax
194///
195/// Detection order is critical:
196/// 1. Inline links [text](url) - must be detected first to avoid conflicts
197/// 2. Reference links [text][ref] - detected before shortcut references
198/// 3. Empty reference links [text][] - a special case of reference links
199/// 4. Shortcut reference links [ref] - detected last to avoid false positives
200/// 5. Other elements (code, bold, italic) - processed normally
201fn parse_markdown_elements(text: &str) -> Vec<Element> {
202    let mut elements = Vec::new();
203    let mut remaining = text;
204
205    while !remaining.is_empty() {
206        // Find the earliest occurrence of any markdown pattern
207        let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
208
209        // Check for images first (they start with ! so should be detected before links)
210        // Inline images - ![alt](url)
211        if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
212            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
213        {
214            earliest_match = Some((m.start(), "inline_image", m));
215        }
216
217        // Reference images - ![alt][ref]
218        if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
219            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
220        {
221            earliest_match = Some((m.start(), "ref_image", m));
222        }
223
224        // Check for footnote references - [^note]
225        if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
226            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
227        {
228            earliest_match = Some((m.start(), "footnote_ref", m));
229        }
230
231        // Check for inline links - [text](url)
232        if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
233            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
234        {
235            earliest_match = Some((m.start(), "inline_link", m));
236        }
237
238        // Check for reference links - [text][ref]
239        if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
240            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
241        {
242            earliest_match = Some((m.start(), "ref_link", m));
243        }
244
245        // Check for shortcut reference links - [ref]
246        // Only check if we haven't found an earlier pattern that would conflict
247        if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
248            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
249        {
250            earliest_match = Some((m.start(), "shortcut_ref", m));
251        }
252
253        // Check for wiki-style links - [[wiki]]
254        if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
255            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
256        {
257            earliest_match = Some((m.start(), "wiki_link", m));
258        }
259
260        // Check for display math first (before inline) - $$math$$
261        if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
262            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
263        {
264            earliest_match = Some((m.start(), "display_math", m));
265        }
266
267        // Check for inline math - $math$
268        if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
269            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
270        {
271            earliest_match = Some((m.start(), "inline_math", m));
272        }
273
274        // Check for strikethrough - ~~text~~
275        if let Ok(Some(m)) = STRIKETHROUGH_FANCY_REGEX.find(remaining)
276            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
277        {
278            earliest_match = Some((m.start(), "strikethrough", m));
279        }
280
281        // Check for emoji shortcodes - :emoji:
282        if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
283            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
284        {
285            earliest_match = Some((m.start(), "emoji", m));
286        }
287
288        // Check for HTML entities - &nbsp; etc
289        if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
290            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
291        {
292            earliest_match = Some((m.start(), "html_entity", m));
293        }
294
295        // Check for HTML tags - <tag> </tag> <tag/>
296        if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
297            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
298        {
299            earliest_match = Some((m.start(), "html_tag", m));
300        }
301
302        // Find earliest non-link special characters
303        let mut next_special = remaining.len();
304        let mut special_type = "";
305
306        if let Some(pos) = remaining.find('`')
307            && pos < next_special
308        {
309            next_special = pos;
310            special_type = "code";
311        }
312        if let Some(pos) = remaining.find("**")
313            && pos < next_special
314        {
315            next_special = pos;
316            special_type = "bold";
317        }
318        if let Some(pos) = remaining.find('*')
319            && pos < next_special
320            && !remaining[pos..].starts_with("**")
321        {
322            next_special = pos;
323            special_type = "italic";
324        }
325
326        // Determine which pattern to process first
327        let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
328            pos < next_special
329        } else {
330            false
331        };
332
333        if should_process_markdown_link {
334            let (pos, pattern_type, match_obj) = earliest_match.unwrap();
335
336            // Add any text before the match
337            if pos > 0 {
338                elements.push(Element::Text(remaining[..pos].to_string()));
339            }
340
341            // Process the matched pattern
342            match pattern_type {
343                "inline_image" => {
344                    if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
345                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
346                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
347                        elements.push(Element::InlineImage {
348                            alt: alt.to_string(),
349                            url: url.to_string(),
350                        });
351                        remaining = &remaining[match_obj.end()..];
352                    } else {
353                        elements.push(Element::Text("!".to_string()));
354                        remaining = &remaining[1..];
355                    }
356                }
357                "ref_image" => {
358                    if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
359                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
360                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
361
362                        if reference.is_empty() {
363                            elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
364                        } else {
365                            elements.push(Element::ReferenceImage {
366                                alt: alt.to_string(),
367                                reference: reference.to_string(),
368                            });
369                        }
370                        remaining = &remaining[match_obj.end()..];
371                    } else {
372                        elements.push(Element::Text("!".to_string()));
373                        remaining = &remaining[1..];
374                    }
375                }
376                "footnote_ref" => {
377                    if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
378                        let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
379                        elements.push(Element::FootnoteReference { note: note.to_string() });
380                        remaining = &remaining[match_obj.end()..];
381                    } else {
382                        elements.push(Element::Text("[".to_string()));
383                        remaining = &remaining[1..];
384                    }
385                }
386                "inline_link" => {
387                    if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
388                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
389                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
390                        elements.push(Element::Link {
391                            text: text.to_string(),
392                            url: url.to_string(),
393                        });
394                        remaining = &remaining[match_obj.end()..];
395                    } else {
396                        // Fallback - shouldn't happen
397                        elements.push(Element::Text("[".to_string()));
398                        remaining = &remaining[1..];
399                    }
400                }
401                "ref_link" => {
402                    if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
403                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
404                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
405
406                        if reference.is_empty() {
407                            // Empty reference link [text][]
408                            elements.push(Element::EmptyReferenceLink { text: text.to_string() });
409                        } else {
410                            // Regular reference link [text][ref]
411                            elements.push(Element::ReferenceLink {
412                                text: text.to_string(),
413                                reference: reference.to_string(),
414                            });
415                        }
416                        remaining = &remaining[match_obj.end()..];
417                    } else {
418                        // Fallback - shouldn't happen
419                        elements.push(Element::Text("[".to_string()));
420                        remaining = &remaining[1..];
421                    }
422                }
423                "shortcut_ref" => {
424                    if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
425                        let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
426                        elements.push(Element::ShortcutReference {
427                            reference: reference.to_string(),
428                        });
429                        remaining = &remaining[match_obj.end()..];
430                    } else {
431                        // Fallback - shouldn't happen
432                        elements.push(Element::Text("[".to_string()));
433                        remaining = &remaining[1..];
434                    }
435                }
436                "wiki_link" => {
437                    if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
438                        let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
439                        elements.push(Element::WikiLink(content.to_string()));
440                        remaining = &remaining[match_obj.end()..];
441                    } else {
442                        elements.push(Element::Text("[[".to_string()));
443                        remaining = &remaining[2..];
444                    }
445                }
446                "display_math" => {
447                    if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
448                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
449                        elements.push(Element::DisplayMath(math.to_string()));
450                        remaining = &remaining[match_obj.end()..];
451                    } else {
452                        elements.push(Element::Text("$$".to_string()));
453                        remaining = &remaining[2..];
454                    }
455                }
456                "inline_math" => {
457                    if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
458                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
459                        elements.push(Element::InlineMath(math.to_string()));
460                        remaining = &remaining[match_obj.end()..];
461                    } else {
462                        elements.push(Element::Text("$".to_string()));
463                        remaining = &remaining[1..];
464                    }
465                }
466                "strikethrough" => {
467                    if let Ok(Some(caps)) = STRIKETHROUGH_FANCY_REGEX.captures(remaining) {
468                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
469                        elements.push(Element::Strikethrough(text.to_string()));
470                        remaining = &remaining[match_obj.end()..];
471                    } else {
472                        elements.push(Element::Text("~~".to_string()));
473                        remaining = &remaining[2..];
474                    }
475                }
476                "emoji" => {
477                    if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
478                        let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
479                        elements.push(Element::EmojiShortcode(emoji.to_string()));
480                        remaining = &remaining[match_obj.end()..];
481                    } else {
482                        elements.push(Element::Text(":".to_string()));
483                        remaining = &remaining[1..];
484                    }
485                }
486                "html_entity" => {
487                    // HTML entities are captured whole
488                    elements.push(Element::HtmlEntity(remaining[..match_obj.end()].to_string()));
489                    remaining = &remaining[match_obj.end()..];
490                }
491                "html_tag" => {
492                    // HTML tags are captured whole
493                    elements.push(Element::HtmlTag(remaining[..match_obj.end()].to_string()));
494                    remaining = &remaining[match_obj.end()..];
495                }
496                _ => {
497                    // Unknown pattern, treat as text
498                    elements.push(Element::Text("[".to_string()));
499                    remaining = &remaining[1..];
500                }
501            }
502        } else {
503            // Process non-link special characters
504
505            // Add any text before the special character
506            if next_special > 0 && next_special < remaining.len() {
507                elements.push(Element::Text(remaining[..next_special].to_string()));
508                remaining = &remaining[next_special..];
509            }
510
511            // Process the special element
512            match special_type {
513                "code" => {
514                    // Find end of code
515                    if let Some(code_end) = remaining[1..].find('`') {
516                        let code = &remaining[1..1 + code_end];
517                        elements.push(Element::Code(code.to_string()));
518                        remaining = &remaining[1 + code_end + 1..];
519                    } else {
520                        // No closing backtick, treat as text
521                        elements.push(Element::Text(remaining.to_string()));
522                        break;
523                    }
524                }
525                "bold" => {
526                    // Check for bold text
527                    if let Some(bold_end) = remaining[2..].find("**") {
528                        let bold_text = &remaining[2..2 + bold_end];
529                        elements.push(Element::Bold(bold_text.to_string()));
530                        remaining = &remaining[2 + bold_end + 2..];
531                    } else {
532                        // No closing **, treat as text
533                        elements.push(Element::Text("**".to_string()));
534                        remaining = &remaining[2..];
535                    }
536                }
537                "italic" => {
538                    // Check for italic text
539                    if let Some(italic_end) = remaining[1..].find('*') {
540                        let italic_text = &remaining[1..1 + italic_end];
541                        elements.push(Element::Italic(italic_text.to_string()));
542                        remaining = &remaining[1 + italic_end + 1..];
543                    } else {
544                        // No closing *, treat as text
545                        elements.push(Element::Text("*".to_string()));
546                        remaining = &remaining[1..];
547                    }
548                }
549                _ => {
550                    // No special elements found, add all remaining text
551                    elements.push(Element::Text(remaining.to_string()));
552                    break;
553                }
554            }
555        }
556    }
557
558    elements
559}
560
561/// Reflow elements into lines that fit within the line length
562fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
563    let mut lines = Vec::new();
564    let mut current_line = String::new();
565    let mut current_length = 0;
566
567    for element in elements {
568        let element_str = format!("{element}");
569        let element_len = element.len();
570
571        // For text elements that might need breaking
572        if let Element::Text(text) = element {
573            // If this is a text element, always process it word by word
574            let words: Vec<&str> = text.split_whitespace().collect();
575
576            for word in words {
577                let word_len = word.chars().count();
578                if current_length > 0 && current_length + 1 + word_len > options.line_length {
579                    // Start a new line
580                    lines.push(current_line.trim().to_string());
581                    current_line = word.to_string();
582                    current_length = word_len;
583                } else {
584                    // Add word to current line
585                    if current_length > 0 {
586                        current_line.push(' ');
587                        current_length += 1;
588                    }
589                    current_line.push_str(word);
590                    current_length += word_len;
591                }
592            }
593        } else {
594            // For non-text elements (code, links, references), treat as atomic units
595            // These should never be broken across lines
596            if current_length > 0 && current_length + 1 + element_len > options.line_length {
597                // Start a new line
598                lines.push(current_line.trim().to_string());
599                current_line = element_str;
600                current_length = element_len;
601            } else {
602                // Add element to current line
603                if current_length > 0 {
604                    current_line.push(' ');
605                    current_length += 1;
606                }
607                current_line.push_str(&element_str);
608                current_length += element_len;
609            }
610        }
611    }
612
613    // Don't forget the last line
614    if !current_line.is_empty() {
615        lines.push(current_line.trim_end().to_string());
616    }
617
618    lines
619}
620
621/// Reflow markdown content preserving structure
622pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
623    let lines: Vec<&str> = content.lines().collect();
624    let mut result = Vec::new();
625    let mut i = 0;
626
627    while i < lines.len() {
628        let line = lines[i];
629        let trimmed = line.trim();
630
631        // Preserve empty lines
632        if trimmed.is_empty() {
633            result.push(String::new());
634            i += 1;
635            continue;
636        }
637
638        // Preserve headings as-is
639        if trimmed.starts_with('#') {
640            result.push(line.to_string());
641            i += 1;
642            continue;
643        }
644
645        // Preserve fenced code blocks
646        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
647            result.push(line.to_string());
648            i += 1;
649            // Copy lines until closing fence
650            while i < lines.len() {
651                result.push(lines[i].to_string());
652                if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
653                    i += 1;
654                    break;
655                }
656                i += 1;
657            }
658            continue;
659        }
660
661        // Preserve indented code blocks (4+ spaces or 1+ tab)
662        if line.starts_with("    ") || line.starts_with("\t") {
663            // Collect all consecutive indented lines
664            result.push(line.to_string());
665            i += 1;
666            while i < lines.len() {
667                let next_line = lines[i];
668                // Continue if next line is also indented or empty (empty lines in code blocks are ok)
669                if next_line.starts_with("    ") || next_line.starts_with("\t") || next_line.trim().is_empty() {
670                    result.push(next_line.to_string());
671                    i += 1;
672                } else {
673                    break;
674                }
675            }
676            continue;
677        }
678
679        // Preserve block quotes (but reflow their content)
680        if trimmed.starts_with('>') {
681            let quote_prefix = line[0..line.find('>').unwrap() + 1].to_string();
682            let quote_content = &line[quote_prefix.len()..].trim_start();
683
684            let reflowed = reflow_line(quote_content, options);
685            for reflowed_line in reflowed.iter() {
686                result.push(format!("{quote_prefix} {reflowed_line}"));
687            }
688            i += 1;
689            continue;
690        }
691
692        // Preserve horizontal rules first (before checking for lists)
693        if is_horizontal_rule(trimmed) {
694            result.push(line.to_string());
695            i += 1;
696            continue;
697        }
698
699        // Preserve lists (but not horizontal rules)
700        if (trimmed.starts_with('-') && !is_horizontal_rule(trimmed))
701            || (trimmed.starts_with('*') && !is_horizontal_rule(trimmed))
702            || trimmed.starts_with('+')
703            || is_numbered_list_item(trimmed)
704        {
705            // Find the list marker and preserve indentation
706            let indent = line.len() - line.trim_start().len();
707            let indent_str = " ".repeat(indent);
708
709            // For numbered lists, find the period and the space after it
710            // For bullet lists, find the marker and the space after it
711            let mut marker_end = indent;
712            let mut content_start = indent;
713
714            if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
715                // Numbered list: find the period
716                if let Some(period_pos) = line[indent..].find('.') {
717                    marker_end = indent + period_pos + 1; // Include the period
718                    content_start = marker_end;
719                    // Skip any spaces after the period to find content start
720                    while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
721                        content_start += 1;
722                    }
723                }
724            } else {
725                // Bullet list: marker is single character
726                marker_end = indent + 1; // Just the marker character
727                content_start = marker_end;
728                // Skip any spaces after the marker
729                while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
730                    content_start += 1;
731                }
732            }
733
734            let marker = &line[indent..marker_end];
735
736            // Collect all content for this list item (including continuation lines)
737            let mut list_content = vec![line[content_start..].to_string()];
738            i += 1;
739
740            // Collect continuation lines (indented lines that are part of this list item)
741            while i < lines.len() {
742                let next_line = lines[i];
743                let next_trimmed = next_line.trim();
744
745                // Stop if we hit an empty line or another list item or special block
746                if next_trimmed.is_empty()
747                    || next_trimmed.starts_with('#')
748                    || next_trimmed.starts_with("```")
749                    || next_trimmed.starts_with("~~~")
750                    || next_trimmed.starts_with('>')
751                    || next_trimmed.starts_with('|')
752                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
753                    || is_horizontal_rule(next_trimmed)
754                    || (next_trimmed.starts_with('-')
755                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
756                    || (next_trimmed.starts_with('*')
757                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
758                    || (next_trimmed.starts_with('+')
759                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
760                    || is_numbered_list_item(next_trimmed)
761                {
762                    break;
763                }
764
765                // Check if this line is indented (continuation of list item)
766                let next_indent = next_line.len() - next_line.trim_start().len();
767                if next_indent >= content_start {
768                    // This is a continuation line - add its content (trim only leading space)
769                    // We need to preserve trailing spaces for hard breaks
770                    list_content.push(next_line.trim_start().to_string());
771                    i += 1;
772                } else {
773                    // Not indented enough, not part of this list item
774                    break;
775                }
776            }
777
778            // Join all the content with spaces (if preserve_breaks is false)
779            let combined_content = if options.preserve_breaks {
780                list_content[0].clone()
781            } else {
782                list_content.join(" ")
783            };
784
785            // Calculate the proper indentation for continuation lines
786            let trimmed_marker = marker;
787            let continuation_spaces = content_start;
788
789            // Adjust line length to account for list marker and space
790            let prefix_length = indent + trimmed_marker.len() + 1;
791
792            // Create adjusted options with reduced line length
793            let adjusted_options = ReflowOptions {
794                line_length: options.line_length.saturating_sub(prefix_length),
795                ..options.clone()
796            };
797
798            let reflowed = reflow_line(&combined_content, &adjusted_options);
799            for (j, reflowed_line) in reflowed.iter().enumerate() {
800                if j == 0 {
801                    result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
802                } else {
803                    // Continuation lines aligned with text after marker
804                    let continuation_indent = " ".repeat(continuation_spaces);
805                    result.push(format!("{continuation_indent}{reflowed_line}"));
806                }
807            }
808            continue;
809        }
810
811        // Preserve tables
812        if trimmed.contains('|') {
813            result.push(line.to_string());
814            i += 1;
815            continue;
816        }
817
818        // Preserve reference definitions
819        if trimmed.starts_with('[') && line.contains("]:") {
820            result.push(line.to_string());
821            i += 1;
822            continue;
823        }
824
825        // Check if this is a single line that doesn't need processing
826        let mut is_single_line_paragraph = true;
827        if i + 1 < lines.len() {
828            let next_line = lines[i + 1];
829            let next_trimmed = next_line.trim();
830            // Check if next line starts a new block
831            if !next_trimmed.is_empty()
832                && !next_trimmed.starts_with('#')
833                && !next_trimmed.starts_with("```")
834                && !next_trimmed.starts_with("~~~")
835                && !next_trimmed.starts_with('>')
836                && !next_trimmed.starts_with('|')
837                && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
838                && !is_horizontal_rule(next_trimmed)
839                && !(next_trimmed.starts_with('-')
840                    && !is_horizontal_rule(next_trimmed)
841                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
842                && !(next_trimmed.starts_with('*')
843                    && !is_horizontal_rule(next_trimmed)
844                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
845                && !(next_trimmed.starts_with('+')
846                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
847                && !is_numbered_list_item(next_trimmed)
848            {
849                is_single_line_paragraph = false;
850            }
851        }
852
853        // If it's a single line that fits, just add it as-is
854        if is_single_line_paragraph && line.chars().count() <= options.line_length {
855            result.push(line.to_string());
856            i += 1;
857            continue;
858        }
859
860        // For regular paragraphs, collect consecutive lines
861        let mut paragraph_parts = Vec::new();
862        let mut current_part = vec![line];
863        i += 1;
864
865        // If preserve_breaks is true, treat each line separately
866        if options.preserve_breaks {
867            // Don't collect consecutive lines - just reflow this single line
868            let has_hard_break = line.ends_with("  ");
869            let reflowed = reflow_line(line, options);
870
871            // Preserve hard breaks (two trailing spaces)
872            if has_hard_break && !reflowed.is_empty() {
873                let mut reflowed_with_break = reflowed;
874                let last_idx = reflowed_with_break.len() - 1;
875                if !reflowed_with_break[last_idx].ends_with("  ") {
876                    reflowed_with_break[last_idx].push_str("  ");
877                }
878                result.extend(reflowed_with_break);
879            } else {
880                result.extend(reflowed);
881            }
882        } else {
883            // Original behavior: collect consecutive lines into a paragraph
884            while i < lines.len() {
885                let prev_line = if !current_part.is_empty() {
886                    current_part.last().unwrap()
887                } else {
888                    ""
889                };
890                let next_line = lines[i];
891                let next_trimmed = next_line.trim();
892
893                // Stop at empty lines or special blocks
894                if next_trimmed.is_empty()
895                    || next_trimmed.starts_with('#')
896                    || next_trimmed.starts_with("```")
897                    || next_trimmed.starts_with("~~~")
898                    || next_trimmed.starts_with('>')
899                    || next_trimmed.starts_with('|')
900                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
901                    || is_horizontal_rule(next_trimmed)
902                    || (next_trimmed.starts_with('-')
903                        && !is_horizontal_rule(next_trimmed)
904                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
905                    || (next_trimmed.starts_with('*')
906                        && !is_horizontal_rule(next_trimmed)
907                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
908                    || (next_trimmed.starts_with('+')
909                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
910                    || is_numbered_list_item(next_trimmed)
911                {
912                    break;
913                }
914
915                // Check if previous line ends with hard break (two spaces)
916                if prev_line.ends_with("  ") {
917                    // Start a new part after hard break
918                    paragraph_parts.push(current_part.join(" "));
919                    current_part = vec![next_line];
920                } else {
921                    current_part.push(next_line);
922                }
923                i += 1;
924            }
925
926            // Add the last part
927            if !current_part.is_empty() {
928                if current_part.len() == 1 {
929                    // Single line, don't add trailing space
930                    paragraph_parts.push(current_part[0].to_string());
931                } else {
932                    paragraph_parts.push(current_part.join(" "));
933                }
934            }
935
936            // Reflow each part separately, preserving hard breaks
937            for (j, part) in paragraph_parts.iter().enumerate() {
938                let reflowed = reflow_line(part, options);
939                result.extend(reflowed);
940
941                // Preserve hard break by ensuring last line of part ends with two spaces
942                if j < paragraph_parts.len() - 1 && !result.is_empty() {
943                    let last_idx = result.len() - 1;
944                    if !result[last_idx].ends_with("  ") {
945                        result[last_idx].push_str("  ");
946                    }
947                }
948            }
949        }
950    }
951
952    // Preserve trailing newline if the original content had one
953    let result_text = result.join("\n");
954    if content.ends_with('\n') && !result_text.ends_with('\n') {
955        format!("{result_text}\n")
956    } else {
957        result_text
958    }
959}
960
961#[cfg(test)]
962mod tests {
963    use super::*;
964
965    #[test]
966    fn test_reflow_simple_text() {
967        let options = ReflowOptions {
968            line_length: 20,
969            ..Default::default()
970        };
971
972        let input = "This is a very long line that needs to be wrapped";
973        let result = reflow_line(input, &options);
974
975        assert_eq!(result.len(), 3);
976        assert!(result[0].chars().count() <= 20);
977        assert!(result[1].chars().count() <= 20);
978        assert!(result[2].chars().count() <= 20);
979    }
980
981    #[test]
982    fn test_preserve_inline_code() {
983        let options = ReflowOptions {
984            line_length: 30,
985            ..Default::default()
986        };
987
988        let result = reflow_line("This line has `inline code` that should be preserved", &options);
989        // Verify inline code is not broken
990        let joined = result.join(" ");
991        assert!(joined.contains("`inline code`"));
992    }
993
994    #[test]
995    fn test_preserve_links() {
996        let options = ReflowOptions {
997            line_length: 40,
998            ..Default::default()
999        };
1000
1001        let text = "Check out [this link](https://example.com/very/long/url) for more info";
1002        let result = reflow_line(text, &options);
1003
1004        // Verify link is preserved intact
1005        let joined = result.join(" ");
1006        assert!(joined.contains("[this link](https://example.com/very/long/url)"));
1007    }
1008
1009    #[test]
1010    fn test_reference_link_patterns_fixed() {
1011        let options = ReflowOptions {
1012            line_length: 30,
1013            break_on_sentences: true,
1014            preserve_breaks: false,
1015        };
1016
1017        // Test cases that verify reference links are preserved as atomic units
1018        let test_cases = vec![
1019            // Reference link: [text][ref] - should be preserved intact
1020            ("Check out [text][ref] for details", vec!["[text][ref]"]),
1021            // Empty reference: [text][] - should be preserved intact
1022            ("See [text][] for info", vec!["[text][]"]),
1023            // Shortcut reference: [homepage] - should be preserved intact
1024            ("Visit [homepage] today", vec!["[homepage]"]),
1025            // Multiple reference links in one line
1026            (
1027                "Links: [first][ref1] and [second][ref2] here",
1028                vec!["[first][ref1]", "[second][ref2]"],
1029            ),
1030            // Mixed inline and reference links
1031            (
1032                "See [inline](url) and [reference][ref] links",
1033                vec!["[inline](url)", "[reference][ref]"],
1034            ),
1035        ];
1036
1037        for (input, expected_patterns) in test_cases {
1038            println!("\nTesting: {input}");
1039            let result = reflow_line(input, &options);
1040            let joined = result.join(" ");
1041            println!("Result:  {joined}");
1042
1043            // Verify all expected patterns are preserved
1044            for expected_pattern in expected_patterns {
1045                assert!(
1046                    joined.contains(expected_pattern),
1047                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1048                );
1049            }
1050
1051            // Verify no broken patterns exist (spaces inside brackets)
1052            assert!(
1053                !joined.contains("[ ") || !joined.contains("] ["),
1054                "Detected broken reference link pattern with spaces inside brackets in '{joined}'"
1055            );
1056        }
1057    }
1058
1059    #[test]
1060    fn test_reference_link_edge_cases() {
1061        let options = ReflowOptions {
1062            line_length: 40,
1063            break_on_sentences: true,
1064            preserve_breaks: false,
1065        };
1066
1067        // Test cases for edge cases and potential conflicts
1068        let test_cases = vec![
1069            // Escaped brackets should be treated as regular text
1070            ("Text with \\[escaped\\] brackets", vec!["\\[escaped\\]"]),
1071            // Nested brackets in reference links
1072            (
1073                "Link [text with [nested] content][ref]",
1074                vec!["[text with [nested] content][ref]"],
1075            ),
1076            // Reference link followed by inline link
1077            (
1078                "First [ref][link] then [inline](url)",
1079                vec!["[ref][link]", "[inline](url)"],
1080            ),
1081            // Shortcut reference that might conflict with other patterns
1082            ("Array [0] and reference [link] here", vec!["[0]", "[link]"]),
1083            // Empty reference with complex text
1084            (
1085                "Complex [text with *emphasis*][] reference",
1086                vec!["[text with *emphasis*][]"],
1087            ),
1088        ];
1089
1090        for (input, expected_patterns) in test_cases {
1091            println!("\nTesting edge case: {input}");
1092            let result = reflow_line(input, &options);
1093            let joined = result.join(" ");
1094            println!("Result: {joined}");
1095
1096            // Verify all expected patterns are preserved
1097            for expected_pattern in expected_patterns {
1098                assert!(
1099                    joined.contains(expected_pattern),
1100                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1101                );
1102            }
1103        }
1104    }
1105
1106    #[test]
1107    fn test_reflow_with_emphasis() {
1108        let options = ReflowOptions {
1109            line_length: 25,
1110            ..Default::default()
1111        };
1112
1113        let result = reflow_line("This is *emphasized* and **strong** text that needs wrapping", &options);
1114
1115        // Verify emphasis markers are preserved
1116        let joined = result.join(" ");
1117        assert!(joined.contains("*emphasized*"));
1118        assert!(joined.contains("**strong**"));
1119    }
1120
1121    #[test]
1122    fn test_image_patterns_preserved() {
1123        let options = ReflowOptions {
1124            line_length: 30,
1125            ..Default::default()
1126        };
1127
1128        // Test cases for image patterns
1129        let test_cases = vec![
1130            // Inline image
1131            (
1132                "Check out ![alt text](image.png) for details",
1133                vec!["![alt text](image.png)"],
1134            ),
1135            // Reference image
1136            ("See ![image][ref] for info", vec!["![image][ref]"]),
1137            // Empty reference image
1138            ("Visit ![homepage][] today", vec!["![homepage][]"]),
1139            // Multiple images
1140            (
1141                "Images: ![first](a.png) and ![second][ref2]",
1142                vec!["![first](a.png)", "![second][ref2]"],
1143            ),
1144        ];
1145
1146        for (input, expected_patterns) in test_cases {
1147            println!("\nTesting: {input}");
1148            let result = reflow_line(input, &options);
1149            let joined = result.join(" ");
1150            println!("Result:  {joined}");
1151
1152            for expected_pattern in expected_patterns {
1153                assert!(
1154                    joined.contains(expected_pattern),
1155                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1156                );
1157            }
1158        }
1159    }
1160
1161    #[test]
1162    fn test_extended_markdown_patterns() {
1163        let options = ReflowOptions {
1164            line_length: 40,
1165            ..Default::default()
1166        };
1167
1168        let test_cases = vec![
1169            // Strikethrough
1170            ("Text with ~~strikethrough~~ preserved", vec!["~~strikethrough~~"]),
1171            // Wiki links
1172            (
1173                "Check [[wiki link]] and [[page|display]]",
1174                vec!["[[wiki link]]", "[[page|display]]"],
1175            ),
1176            // Math
1177            (
1178                "Inline $x^2 + y^2$ and display $$\\int f(x) dx$$",
1179                vec!["$x^2 + y^2$", "$$\\int f(x) dx$$"],
1180            ),
1181            // Emoji
1182            ("Use :smile: and :heart: emojis", vec![":smile:", ":heart:"]),
1183            // HTML tags
1184            (
1185                "Text with <span>tag</span> and <br/>",
1186                vec!["<span>", "</span>", "<br/>"],
1187            ),
1188            // HTML entities
1189            ("Non-breaking&nbsp;space and em&mdash;dash", vec!["&nbsp;", "&mdash;"]),
1190        ];
1191
1192        for (input, expected_patterns) in test_cases {
1193            let result = reflow_line(input, &options);
1194            let joined = result.join(" ");
1195
1196            for pattern in expected_patterns {
1197                assert!(
1198                    joined.contains(pattern),
1199                    "Expected '{pattern}' to be preserved in '{input}', but got '{joined}'"
1200                );
1201            }
1202        }
1203    }
1204
1205    #[test]
1206    fn test_complex_mixed_patterns() {
1207        let options = ReflowOptions {
1208            line_length: 50,
1209            ..Default::default()
1210        };
1211
1212        // Test that multiple pattern types work together
1213        let input = "Line with **bold**, `code`, [link](url), ![image](img), ~~strike~~, $math$, :emoji:, and <tag> all together";
1214        let result = reflow_line(input, &options);
1215        let joined = result.join(" ");
1216
1217        // All patterns should be preserved
1218        assert!(joined.contains("**bold**"));
1219        assert!(joined.contains("`code`"));
1220        assert!(joined.contains("[link](url)"));
1221        assert!(joined.contains("![image](img)"));
1222        assert!(joined.contains("~~strike~~"));
1223        assert!(joined.contains("$math$"));
1224        assert!(joined.contains(":emoji:"));
1225        assert!(joined.contains("<tag>"));
1226    }
1227
1228    #[test]
1229    fn test_footnote_patterns_preserved() {
1230        let options = ReflowOptions {
1231            line_length: 40,
1232            ..Default::default()
1233        };
1234
1235        let test_cases = vec![
1236            // Single footnote
1237            ("This has a footnote[^1] reference", vec!["[^1]"]),
1238            // Multiple footnotes
1239            ("Text with [^first] and [^second] notes", vec!["[^first]", "[^second]"]),
1240            // Long footnote name
1241            ("Reference to [^long-footnote-name] here", vec!["[^long-footnote-name]"]),
1242        ];
1243
1244        for (input, expected_patterns) in test_cases {
1245            let result = reflow_line(input, &options);
1246            let joined = result.join(" ");
1247
1248            for expected_pattern in expected_patterns {
1249                assert!(
1250                    joined.contains(expected_pattern),
1251                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1252                );
1253            }
1254        }
1255    }
1256
1257    #[test]
1258    fn test_reflow_markdown_numbered_lists() {
1259        // Test for issue #83: numbered lists with proper formatting
1260        let options = ReflowOptions {
1261            line_length: 50,
1262            ..Default::default()
1263        };
1264
1265        let content = r#"1. List `manifest` to find the manifest with the largest ID. Say it's `00000000000000000002.manifest` in this example.
12662. Short item
12673. Another long item that definitely exceeds the fifty character limit and needs wrapping"#;
1268
1269        let result = reflow_markdown(content, &options);
1270
1271        // Define exact expected output
1272        let expected = r#"1. List `manifest` to find the manifest with the
1273   largest ID. Say it's
1274   `00000000000000000002.manifest` in this
1275   example.
12762. Short item
12773. Another long item that definitely exceeds the
1278   fifty character limit and needs wrapping"#;
1279
1280        assert_eq!(
1281            result, expected,
1282            "Numbered lists should be reflowed with proper markers and indentation.\nExpected:\n{expected}\nGot:\n{result}"
1283        );
1284    }
1285
1286    #[test]
1287    fn test_reflow_markdown_bullet_lists() {
1288        let options = ReflowOptions {
1289            line_length: 40,
1290            ..Default::default()
1291        };
1292
1293        let content = r#"- First bullet point with a very long line that needs wrapping
1294* Second bullet using asterisk
1295+ Third bullet using plus sign
1296- Short one"#;
1297
1298        let result = reflow_markdown(content, &options);
1299
1300        // Define exact expected output - each bullet type preserved with proper indentation
1301        let expected = r#"- First bullet point with a very long
1302  line that needs wrapping
1303* Second bullet using asterisk
1304+ Third bullet using plus sign
1305- Short one"#;
1306
1307        assert_eq!(
1308            result, expected,
1309            "Bullet lists should preserve markers and indent continuations with 2 spaces.\nExpected:\n{expected}\nGot:\n{result}"
1310        );
1311    }
1312}