rumdl_lib/utils/
text_reflow.rs

1//! Text reflow utilities for MD013
2//!
3//! This module implements text wrapping/reflow functionality that preserves
4//! Markdown elements like links, emphasis, code spans, etc.
5
6use crate::utils::regex_cache::{
7    DISPLAY_MATH_REGEX, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
8    INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, REF_IMAGE_REGEX, REF_LINK_REGEX,
9    SHORTCUT_REF_REGEX, STRIKETHROUGH_FANCY_REGEX, WIKI_LINK_REGEX,
10};
11/// Options for reflowing text
12#[derive(Clone)]
13pub struct ReflowOptions {
14    /// Target line length
15    pub line_length: usize,
16    /// Whether to break on sentence boundaries when possible
17    pub break_on_sentences: bool,
18    /// Whether to preserve existing line breaks in paragraphs
19    pub preserve_breaks: bool,
20}
21
22impl Default for ReflowOptions {
23    fn default() -> Self {
24        Self {
25            line_length: 80,
26            break_on_sentences: true,
27            preserve_breaks: false,
28        }
29    }
30}
31
32/// Reflow a single line of markdown text to fit within the specified line length
33pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
34    // Quick check: if line is already short enough, return as-is
35    if line.chars().count() <= options.line_length {
36        return vec![line.to_string()];
37    }
38
39    // Parse the markdown to identify elements
40    let elements = parse_markdown_elements(line);
41
42    // Reflow the elements into lines
43    reflow_elements(&elements, options)
44}
45
46/// Represents a piece of content in the markdown
47#[derive(Debug, Clone)]
48enum Element {
49    /// Plain text that can be wrapped
50    Text(String),
51    /// A complete markdown inline link [text](url)
52    Link { text: String, url: String },
53    /// A complete markdown reference link [text][ref]
54    ReferenceLink { text: String, reference: String },
55    /// A complete markdown empty reference link [text][]
56    EmptyReferenceLink { text: String },
57    /// A complete markdown shortcut reference link [ref]
58    ShortcutReference { reference: String },
59    /// A complete markdown inline image ![alt](url)
60    InlineImage { alt: String, url: String },
61    /// A complete markdown reference image ![alt][ref]
62    ReferenceImage { alt: String, reference: String },
63    /// A complete markdown empty reference image ![alt][]
64    EmptyReferenceImage { alt: String },
65    /// Footnote reference [^note]
66    FootnoteReference { note: String },
67    /// Strikethrough text ~~text~~
68    Strikethrough(String),
69    /// Wiki-style link [[wiki]] or [[wiki|text]]
70    WikiLink(String),
71    /// Inline math $math$
72    InlineMath(String),
73    /// Display math $$math$$
74    DisplayMath(String),
75    /// Emoji shortcode :emoji:
76    EmojiShortcode(String),
77    /// HTML tag <tag> or </tag> or <tag/>
78    HtmlTag(String),
79    /// HTML entity &nbsp; or &#123;
80    HtmlEntity(String),
81    /// Inline code `code`
82    Code(String),
83    /// Bold text **text**
84    Bold(String),
85    /// Italic text *text*
86    Italic(String),
87}
88
89impl std::fmt::Display for Element {
90    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
91        match self {
92            Element::Text(s) => write!(f, "{s}"),
93            Element::Link { text, url } => write!(f, "[{text}]({url})"),
94            Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
95            Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
96            Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
97            Element::InlineImage { alt, url } => write!(f, "![{alt}]({url})"),
98            Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
99            Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
100            Element::FootnoteReference { note } => write!(f, "[^{note}]"),
101            Element::Strikethrough(s) => write!(f, "~~{s}~~"),
102            Element::WikiLink(s) => write!(f, "[[{s}]]"),
103            Element::InlineMath(s) => write!(f, "${s}$"),
104            Element::DisplayMath(s) => write!(f, "$${s}$$"),
105            Element::EmojiShortcode(s) => write!(f, ":{s}:"),
106            Element::HtmlTag(s) => write!(f, "{s}"),
107            Element::HtmlEntity(s) => write!(f, "{s}"),
108            Element::Code(s) => write!(f, "`{s}`"),
109            Element::Bold(s) => write!(f, "**{s}**"),
110            Element::Italic(s) => write!(f, "*{s}*"),
111        }
112    }
113}
114
115impl Element {
116    fn len(&self) -> usize {
117        match self {
118            Element::Text(s) => s.chars().count(),
119            Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, // [text](url)
120            Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, // [text][ref]
121            Element::EmptyReferenceLink { text } => text.chars().count() + 4, // [text][]
122            Element::ShortcutReference { reference } => reference.chars().count() + 2, // [ref]
123            Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, // ![alt](url)
124            Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, // ![alt][ref]
125            Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, // ![alt][]
126            Element::FootnoteReference { note } => note.chars().count() + 3, // [^note]
127            Element::Strikethrough(s) => s.chars().count() + 4,              // ~~text~~
128            Element::WikiLink(s) => s.chars().count() + 4,                   // [[wiki]]
129            Element::InlineMath(s) => s.chars().count() + 2,                 // $math$
130            Element::DisplayMath(s) => s.chars().count() + 4,                // $$math$$
131            Element::EmojiShortcode(s) => s.chars().count() + 2,             // :emoji:
132            Element::HtmlTag(s) => s.chars().count(),                        // <tag> - already includes brackets
133            Element::HtmlEntity(s) => s.chars().count(),                     // &nbsp; - already complete
134            Element::Code(s) => s.chars().count() + 2,                       // `code`
135            Element::Bold(s) => s.chars().count() + 4,                       // **text**
136            Element::Italic(s) => s.chars().count() + 2,                     // *text*
137        }
138    }
139}
140
141/// Parse markdown elements from text preserving the raw syntax
142///
143/// Detection order is critical:
144/// 1. Inline links [text](url) - must be detected first to avoid conflicts
145/// 2. Reference links [text][ref] - detected before shortcut references
146/// 3. Empty reference links [text][] - a special case of reference links
147/// 4. Shortcut reference links [ref] - detected last to avoid false positives
148/// 5. Other elements (code, bold, italic) - processed normally
149fn parse_markdown_elements(text: &str) -> Vec<Element> {
150    let mut elements = Vec::new();
151    let mut remaining = text;
152
153    while !remaining.is_empty() {
154        // Find the earliest occurrence of any markdown pattern
155        let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
156
157        // Check for images first (they start with ! so should be detected before links)
158        // Inline images - ![alt](url)
159        if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
160            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
161        {
162            earliest_match = Some((m.start(), "inline_image", m));
163        }
164
165        // Reference images - ![alt][ref]
166        if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
167            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
168        {
169            earliest_match = Some((m.start(), "ref_image", m));
170        }
171
172        // Check for footnote references - [^note]
173        if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
174            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
175        {
176            earliest_match = Some((m.start(), "footnote_ref", m));
177        }
178
179        // Check for inline links - [text](url)
180        if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
181            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
182        {
183            earliest_match = Some((m.start(), "inline_link", m));
184        }
185
186        // Check for reference links - [text][ref]
187        if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
188            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
189        {
190            earliest_match = Some((m.start(), "ref_link", m));
191        }
192
193        // Check for shortcut reference links - [ref]
194        // Only check if we haven't found an earlier pattern that would conflict
195        if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
196            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
197        {
198            earliest_match = Some((m.start(), "shortcut_ref", m));
199        }
200
201        // Check for wiki-style links - [[wiki]]
202        if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
203            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
204        {
205            earliest_match = Some((m.start(), "wiki_link", m));
206        }
207
208        // Check for display math first (before inline) - $$math$$
209        if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
210            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
211        {
212            earliest_match = Some((m.start(), "display_math", m));
213        }
214
215        // Check for inline math - $math$
216        if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
217            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
218        {
219            earliest_match = Some((m.start(), "inline_math", m));
220        }
221
222        // Check for strikethrough - ~~text~~
223        if let Ok(Some(m)) = STRIKETHROUGH_FANCY_REGEX.find(remaining)
224            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
225        {
226            earliest_match = Some((m.start(), "strikethrough", m));
227        }
228
229        // Check for emoji shortcodes - :emoji:
230        if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
231            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
232        {
233            earliest_match = Some((m.start(), "emoji", m));
234        }
235
236        // Check for HTML entities - &nbsp; etc
237        if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
238            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
239        {
240            earliest_match = Some((m.start(), "html_entity", m));
241        }
242
243        // Check for HTML tags - <tag> </tag> <tag/>
244        if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
245            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
246        {
247            earliest_match = Some((m.start(), "html_tag", m));
248        }
249
250        // Find earliest non-link special characters
251        let mut next_special = remaining.len();
252        let mut special_type = "";
253
254        if let Some(pos) = remaining.find('`')
255            && pos < next_special
256        {
257            next_special = pos;
258            special_type = "code";
259        }
260        if let Some(pos) = remaining.find("**")
261            && pos < next_special
262        {
263            next_special = pos;
264            special_type = "bold";
265        }
266        if let Some(pos) = remaining.find('*')
267            && pos < next_special
268            && !remaining[pos..].starts_with("**")
269        {
270            next_special = pos;
271            special_type = "italic";
272        }
273
274        // Determine which pattern to process first
275        let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
276            pos < next_special
277        } else {
278            false
279        };
280
281        if should_process_markdown_link {
282            let (pos, pattern_type, match_obj) = earliest_match.unwrap();
283
284            // Add any text before the match
285            if pos > 0 {
286                elements.push(Element::Text(remaining[..pos].to_string()));
287            }
288
289            // Process the matched pattern
290            match pattern_type {
291                "inline_image" => {
292                    if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
293                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
294                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
295                        elements.push(Element::InlineImage {
296                            alt: alt.to_string(),
297                            url: url.to_string(),
298                        });
299                        remaining = &remaining[match_obj.end()..];
300                    } else {
301                        elements.push(Element::Text("!".to_string()));
302                        remaining = &remaining[1..];
303                    }
304                }
305                "ref_image" => {
306                    if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
307                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
308                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
309
310                        if reference.is_empty() {
311                            elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
312                        } else {
313                            elements.push(Element::ReferenceImage {
314                                alt: alt.to_string(),
315                                reference: reference.to_string(),
316                            });
317                        }
318                        remaining = &remaining[match_obj.end()..];
319                    } else {
320                        elements.push(Element::Text("!".to_string()));
321                        remaining = &remaining[1..];
322                    }
323                }
324                "footnote_ref" => {
325                    if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
326                        let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
327                        elements.push(Element::FootnoteReference { note: note.to_string() });
328                        remaining = &remaining[match_obj.end()..];
329                    } else {
330                        elements.push(Element::Text("[".to_string()));
331                        remaining = &remaining[1..];
332                    }
333                }
334                "inline_link" => {
335                    if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
336                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
337                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
338                        elements.push(Element::Link {
339                            text: text.to_string(),
340                            url: url.to_string(),
341                        });
342                        remaining = &remaining[match_obj.end()..];
343                    } else {
344                        // Fallback - shouldn't happen
345                        elements.push(Element::Text("[".to_string()));
346                        remaining = &remaining[1..];
347                    }
348                }
349                "ref_link" => {
350                    if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
351                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
352                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
353
354                        if reference.is_empty() {
355                            // Empty reference link [text][]
356                            elements.push(Element::EmptyReferenceLink { text: text.to_string() });
357                        } else {
358                            // Regular reference link [text][ref]
359                            elements.push(Element::ReferenceLink {
360                                text: text.to_string(),
361                                reference: reference.to_string(),
362                            });
363                        }
364                        remaining = &remaining[match_obj.end()..];
365                    } else {
366                        // Fallback - shouldn't happen
367                        elements.push(Element::Text("[".to_string()));
368                        remaining = &remaining[1..];
369                    }
370                }
371                "shortcut_ref" => {
372                    if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
373                        let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
374                        elements.push(Element::ShortcutReference {
375                            reference: reference.to_string(),
376                        });
377                        remaining = &remaining[match_obj.end()..];
378                    } else {
379                        // Fallback - shouldn't happen
380                        elements.push(Element::Text("[".to_string()));
381                        remaining = &remaining[1..];
382                    }
383                }
384                "wiki_link" => {
385                    if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
386                        let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
387                        elements.push(Element::WikiLink(content.to_string()));
388                        remaining = &remaining[match_obj.end()..];
389                    } else {
390                        elements.push(Element::Text("[[".to_string()));
391                        remaining = &remaining[2..];
392                    }
393                }
394                "display_math" => {
395                    if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
396                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
397                        elements.push(Element::DisplayMath(math.to_string()));
398                        remaining = &remaining[match_obj.end()..];
399                    } else {
400                        elements.push(Element::Text("$$".to_string()));
401                        remaining = &remaining[2..];
402                    }
403                }
404                "inline_math" => {
405                    if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
406                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
407                        elements.push(Element::InlineMath(math.to_string()));
408                        remaining = &remaining[match_obj.end()..];
409                    } else {
410                        elements.push(Element::Text("$".to_string()));
411                        remaining = &remaining[1..];
412                    }
413                }
414                "strikethrough" => {
415                    if let Ok(Some(caps)) = STRIKETHROUGH_FANCY_REGEX.captures(remaining) {
416                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
417                        elements.push(Element::Strikethrough(text.to_string()));
418                        remaining = &remaining[match_obj.end()..];
419                    } else {
420                        elements.push(Element::Text("~~".to_string()));
421                        remaining = &remaining[2..];
422                    }
423                }
424                "emoji" => {
425                    if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
426                        let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
427                        elements.push(Element::EmojiShortcode(emoji.to_string()));
428                        remaining = &remaining[match_obj.end()..];
429                    } else {
430                        elements.push(Element::Text(":".to_string()));
431                        remaining = &remaining[1..];
432                    }
433                }
434                "html_entity" => {
435                    // HTML entities are captured whole
436                    elements.push(Element::HtmlEntity(remaining[..match_obj.end()].to_string()));
437                    remaining = &remaining[match_obj.end()..];
438                }
439                "html_tag" => {
440                    // HTML tags are captured whole
441                    elements.push(Element::HtmlTag(remaining[..match_obj.end()].to_string()));
442                    remaining = &remaining[match_obj.end()..];
443                }
444                _ => {
445                    // Unknown pattern, treat as text
446                    elements.push(Element::Text("[".to_string()));
447                    remaining = &remaining[1..];
448                }
449            }
450        } else {
451            // Process non-link special characters
452
453            // Add any text before the special character
454            if next_special > 0 && next_special < remaining.len() {
455                elements.push(Element::Text(remaining[..next_special].to_string()));
456                remaining = &remaining[next_special..];
457            }
458
459            // Process the special element
460            match special_type {
461                "code" => {
462                    // Find end of code
463                    if let Some(code_end) = remaining[1..].find('`') {
464                        let code = &remaining[1..1 + code_end];
465                        elements.push(Element::Code(code.to_string()));
466                        remaining = &remaining[1 + code_end + 1..];
467                    } else {
468                        // No closing backtick, treat as text
469                        elements.push(Element::Text(remaining.to_string()));
470                        break;
471                    }
472                }
473                "bold" => {
474                    // Check for bold text
475                    if let Some(bold_end) = remaining[2..].find("**") {
476                        let bold_text = &remaining[2..2 + bold_end];
477                        elements.push(Element::Bold(bold_text.to_string()));
478                        remaining = &remaining[2 + bold_end + 2..];
479                    } else {
480                        // No closing **, treat as text
481                        elements.push(Element::Text("**".to_string()));
482                        remaining = &remaining[2..];
483                    }
484                }
485                "italic" => {
486                    // Check for italic text
487                    if let Some(italic_end) = remaining[1..].find('*') {
488                        let italic_text = &remaining[1..1 + italic_end];
489                        elements.push(Element::Italic(italic_text.to_string()));
490                        remaining = &remaining[1 + italic_end + 1..];
491                    } else {
492                        // No closing *, treat as text
493                        elements.push(Element::Text("*".to_string()));
494                        remaining = &remaining[1..];
495                    }
496                }
497                _ => {
498                    // No special elements found, add all remaining text
499                    elements.push(Element::Text(remaining.to_string()));
500                    break;
501                }
502            }
503        }
504    }
505
506    elements
507}
508
509/// Reflow elements into lines that fit within the line length
510fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
511    let mut lines = Vec::new();
512    let mut current_line = String::new();
513    let mut current_length = 0;
514
515    for element in elements {
516        let element_str = format!("{element}");
517        let element_len = element.len();
518
519        // For text elements that might need breaking
520        if let Element::Text(text) = element {
521            // If this is a text element, always process it word by word
522            let words: Vec<&str> = text.split_whitespace().collect();
523
524            for word in words {
525                let word_len = word.chars().count();
526                if current_length > 0 && current_length + 1 + word_len > options.line_length {
527                    // Start a new line
528                    lines.push(current_line.trim().to_string());
529                    current_line = word.to_string();
530                    current_length = word_len;
531                } else {
532                    // Add word to current line
533                    if current_length > 0 {
534                        current_line.push(' ');
535                        current_length += 1;
536                    }
537                    current_line.push_str(word);
538                    current_length += word_len;
539                }
540            }
541        } else {
542            // For non-text elements (code, links, references), treat as atomic units
543            // These should never be broken across lines
544            if current_length > 0 && current_length + 1 + element_len > options.line_length {
545                // Start a new line
546                lines.push(current_line.trim().to_string());
547                current_line = element_str;
548                current_length = element_len;
549            } else {
550                // Add element to current line
551                if current_length > 0 {
552                    current_line.push(' ');
553                    current_length += 1;
554                }
555                current_line.push_str(&element_str);
556                current_length += element_len;
557            }
558        }
559    }
560
561    // Don't forget the last line
562    if !current_line.is_empty() {
563        lines.push(current_line.trim_end().to_string());
564    }
565
566    lines
567}
568
569/// Reflow markdown content preserving structure
570pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
571    let lines: Vec<&str> = content.lines().collect();
572    let mut result = Vec::new();
573    let mut i = 0;
574
575    while i < lines.len() {
576        let line = lines[i];
577        let trimmed = line.trim();
578
579        // Preserve empty lines
580        if trimmed.is_empty() {
581            result.push(String::new());
582            i += 1;
583            continue;
584        }
585
586        // Preserve headings as-is
587        if trimmed.starts_with('#') {
588            result.push(line.to_string());
589            i += 1;
590            continue;
591        }
592
593        // Preserve code blocks
594        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
595            result.push(line.to_string());
596            i += 1;
597            // Copy lines until closing fence
598            while i < lines.len() {
599                result.push(lines[i].to_string());
600                if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
601                    i += 1;
602                    break;
603                }
604                i += 1;
605            }
606            continue;
607        }
608
609        // Preserve block quotes (but reflow their content)
610        if trimmed.starts_with('>') {
611            let quote_prefix = line[0..line.find('>').unwrap() + 1].to_string();
612            let quote_content = &line[quote_prefix.len()..].trim_start();
613
614            let reflowed = reflow_line(quote_content, options);
615            for reflowed_line in reflowed.iter() {
616                result.push(format!("{quote_prefix} {reflowed_line}"));
617            }
618            i += 1;
619            continue;
620        }
621
622        // Preserve lists
623        if trimmed.starts_with('-')
624            || trimmed.starts_with('*')
625            || trimmed.starts_with('+')
626            || trimmed.chars().next().is_some_and(|c| c.is_numeric())
627        {
628            // Find the list marker and preserve indentation
629            let indent = line.len() - line.trim_start().len();
630            let indent_str = " ".repeat(indent);
631
632            // For numbered lists, find the period and the space after it
633            // For bullet lists, find the marker and the space after it
634            let mut marker_end = indent;
635            let mut content_start = indent;
636
637            if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
638                // Numbered list: find the period
639                if let Some(period_pos) = line[indent..].find('.') {
640                    marker_end = indent + period_pos + 1; // Include the period
641                    content_start = marker_end;
642                    // Skip any spaces after the period to find content start
643                    while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
644                        content_start += 1;
645                    }
646                }
647            } else {
648                // Bullet list: marker is single character
649                marker_end = indent + 1; // Just the marker character
650                content_start = marker_end;
651                // Skip any spaces after the marker
652                while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
653                    content_start += 1;
654                }
655            }
656
657            let marker = &line[indent..marker_end];
658            let content = &line[content_start..];
659
660            // Calculate the proper indentation for continuation lines
661            // We need to align with the text after the marker
662            let trimmed_marker = marker;
663            let continuation_spaces = content_start; // Use the actual content start position
664
665            // CRITICAL: Adjust line length to account for list marker and space
666            // For the first line, we need to account for: indent + marker + space
667            // The format is: "{indent_str}{trimmed_marker} {content}"
668            // So available width = line_length - indent - marker_length - 1 (for space)
669            let prefix_length = indent + trimmed_marker.len() + 1; // +1 for space after marker
670
671            // Create adjusted options with reduced line length
672            let adjusted_options = ReflowOptions {
673                line_length: options.line_length.saturating_sub(prefix_length),
674                ..options.clone()
675            };
676
677            let reflowed = reflow_line(content, &adjusted_options);
678            for (j, reflowed_line) in reflowed.iter().enumerate() {
679                if j == 0 {
680                    result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
681                } else {
682                    // Continuation lines aligned with text after marker
683                    let continuation_indent = " ".repeat(continuation_spaces);
684                    result.push(format!("{continuation_indent}{reflowed_line}"));
685                }
686            }
687            i += 1;
688            continue;
689        }
690
691        // Preserve tables
692        if trimmed.contains('|') {
693            result.push(line.to_string());
694            i += 1;
695            continue;
696        }
697
698        // Preserve reference definitions
699        if trimmed.starts_with('[') && line.contains("]:") {
700            result.push(line.to_string());
701            i += 1;
702            continue;
703        }
704
705        // Check if this is a single line that doesn't need processing
706        let mut is_single_line_paragraph = true;
707        if i + 1 < lines.len() {
708            let next_line = lines[i + 1];
709            let next_trimmed = next_line.trim();
710            // Check if next line starts a new block
711            if !next_trimmed.is_empty()
712                && !next_trimmed.starts_with('#')
713                && !next_trimmed.starts_with("```")
714                && !next_trimmed.starts_with("~~~")
715                && !next_trimmed.starts_with('>')
716                && !next_trimmed.starts_with('|')
717                && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
718                && !next_trimmed.starts_with('-')
719                && !next_trimmed.starts_with('*')
720                && !next_trimmed.starts_with('+')
721                && !next_trimmed.chars().next().is_some_and(|c| c.is_numeric())
722            {
723                is_single_line_paragraph = false;
724            }
725        }
726
727        // If it's a single line that fits, just add it as-is
728        if is_single_line_paragraph && line.chars().count() <= options.line_length {
729            result.push(line.to_string());
730            i += 1;
731            continue;
732        }
733
734        // For regular paragraphs, collect consecutive lines
735        let mut paragraph_parts = Vec::new();
736        let mut current_part = vec![line];
737        i += 1;
738
739        while i < lines.len() {
740            let prev_line = if !current_part.is_empty() {
741                current_part.last().unwrap()
742            } else {
743                ""
744            };
745            let next_line = lines[i];
746            let next_trimmed = next_line.trim();
747
748            // Stop at empty lines or special blocks
749            if next_trimmed.is_empty()
750                || next_trimmed.starts_with('#')
751                || next_trimmed.starts_with("```")
752                || next_trimmed.starts_with("~~~")
753                || next_trimmed.starts_with('>')
754                || next_trimmed.starts_with('|')
755                || (next_trimmed.starts_with('[') && next_line.contains("]:"))
756                || next_trimmed.starts_with('-')
757                || next_trimmed.starts_with('*')
758                || next_trimmed.starts_with('+')
759                || next_trimmed.chars().next().is_some_and(|c| c.is_numeric())
760            {
761                break;
762            }
763
764            // Check if previous line ends with hard break (two spaces)
765            if prev_line.ends_with("  ") {
766                // Start a new part after hard break
767                paragraph_parts.push(current_part.join(" "));
768                current_part = vec![next_line];
769            } else {
770                current_part.push(next_line);
771            }
772            i += 1;
773        }
774
775        // Add the last part
776        if !current_part.is_empty() {
777            if current_part.len() == 1 {
778                // Single line, don't add trailing space
779                paragraph_parts.push(current_part[0].to_string());
780            } else {
781                paragraph_parts.push(current_part.join(" "));
782            }
783        }
784
785        // Reflow each part separately, preserving hard breaks
786        for (j, part) in paragraph_parts.iter().enumerate() {
787            let reflowed = reflow_line(part, options);
788            result.extend(reflowed);
789
790            // Preserve hard break by ensuring last line of part ends with two spaces
791            if j < paragraph_parts.len() - 1 && !result.is_empty() {
792                let last_idx = result.len() - 1;
793                if !result[last_idx].ends_with("  ") {
794                    result[last_idx].push_str("  ");
795                }
796            }
797        }
798    }
799
800    result.join("\n")
801}
802
803#[cfg(test)]
804mod tests {
805    use super::*;
806
807    #[test]
808    fn test_reflow_simple_text() {
809        let options = ReflowOptions {
810            line_length: 20,
811            ..Default::default()
812        };
813
814        let input = "This is a very long line that needs to be wrapped";
815        let result = reflow_line(input, &options);
816
817        assert_eq!(result.len(), 3);
818        assert!(result[0].chars().count() <= 20);
819        assert!(result[1].chars().count() <= 20);
820        assert!(result[2].chars().count() <= 20);
821    }
822
823    #[test]
824    fn test_preserve_inline_code() {
825        let options = ReflowOptions {
826            line_length: 30,
827            ..Default::default()
828        };
829
830        let result = reflow_line("This line has `inline code` that should be preserved", &options);
831        // Verify inline code is not broken
832        let joined = result.join(" ");
833        assert!(joined.contains("`inline code`"));
834    }
835
836    #[test]
837    fn test_preserve_links() {
838        let options = ReflowOptions {
839            line_length: 40,
840            ..Default::default()
841        };
842
843        let text = "Check out [this link](https://example.com/very/long/url) for more info";
844        let result = reflow_line(text, &options);
845
846        // Verify link is preserved intact
847        let joined = result.join(" ");
848        assert!(joined.contains("[this link](https://example.com/very/long/url)"));
849    }
850
851    #[test]
852    fn test_reference_link_patterns_fixed() {
853        let options = ReflowOptions {
854            line_length: 30,
855            break_on_sentences: true,
856            preserve_breaks: false,
857        };
858
859        // Test cases that verify reference links are preserved as atomic units
860        let test_cases = vec![
861            // Reference link: [text][ref] - should be preserved intact
862            ("Check out [text][ref] for details", vec!["[text][ref]"]),
863            // Empty reference: [text][] - should be preserved intact
864            ("See [text][] for info", vec!["[text][]"]),
865            // Shortcut reference: [homepage] - should be preserved intact
866            ("Visit [homepage] today", vec!["[homepage]"]),
867            // Multiple reference links in one line
868            (
869                "Links: [first][ref1] and [second][ref2] here",
870                vec!["[first][ref1]", "[second][ref2]"],
871            ),
872            // Mixed inline and reference links
873            (
874                "See [inline](url) and [reference][ref] links",
875                vec!["[inline](url)", "[reference][ref]"],
876            ),
877        ];
878
879        for (input, expected_patterns) in test_cases {
880            println!("\nTesting: {input}");
881            let result = reflow_line(input, &options);
882            let joined = result.join(" ");
883            println!("Result:  {joined}");
884
885            // Verify all expected patterns are preserved
886            for expected_pattern in expected_patterns {
887                assert!(
888                    joined.contains(expected_pattern),
889                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
890                );
891            }
892
893            // Verify no broken patterns exist (spaces inside brackets)
894            assert!(
895                !joined.contains("[ ") || !joined.contains("] ["),
896                "Detected broken reference link pattern with spaces inside brackets in '{joined}'"
897            );
898        }
899    }
900
901    #[test]
902    fn test_reference_link_edge_cases() {
903        let options = ReflowOptions {
904            line_length: 40,
905            break_on_sentences: true,
906            preserve_breaks: false,
907        };
908
909        // Test cases for edge cases and potential conflicts
910        let test_cases = vec![
911            // Escaped brackets should be treated as regular text
912            ("Text with \\[escaped\\] brackets", vec!["\\[escaped\\]"]),
913            // Nested brackets in reference links
914            (
915                "Link [text with [nested] content][ref]",
916                vec!["[text with [nested] content][ref]"],
917            ),
918            // Reference link followed by inline link
919            (
920                "First [ref][link] then [inline](url)",
921                vec!["[ref][link]", "[inline](url)"],
922            ),
923            // Shortcut reference that might conflict with other patterns
924            ("Array [0] and reference [link] here", vec!["[0]", "[link]"]),
925            // Empty reference with complex text
926            (
927                "Complex [text with *emphasis*][] reference",
928                vec!["[text with *emphasis*][]"],
929            ),
930        ];
931
932        for (input, expected_patterns) in test_cases {
933            println!("\nTesting edge case: {input}");
934            let result = reflow_line(input, &options);
935            let joined = result.join(" ");
936            println!("Result: {joined}");
937
938            // Verify all expected patterns are preserved
939            for expected_pattern in expected_patterns {
940                assert!(
941                    joined.contains(expected_pattern),
942                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
943                );
944            }
945        }
946    }
947
948    #[test]
949    fn test_reflow_with_emphasis() {
950        let options = ReflowOptions {
951            line_length: 25,
952            ..Default::default()
953        };
954
955        let result = reflow_line("This is *emphasized* and **strong** text that needs wrapping", &options);
956
957        // Verify emphasis markers are preserved
958        let joined = result.join(" ");
959        assert!(joined.contains("*emphasized*"));
960        assert!(joined.contains("**strong**"));
961    }
962
963    #[test]
964    fn test_image_patterns_preserved() {
965        let options = ReflowOptions {
966            line_length: 30,
967            ..Default::default()
968        };
969
970        // Test cases for image patterns
971        let test_cases = vec![
972            // Inline image
973            (
974                "Check out ![alt text](image.png) for details",
975                vec!["![alt text](image.png)"],
976            ),
977            // Reference image
978            ("See ![image][ref] for info", vec!["![image][ref]"]),
979            // Empty reference image
980            ("Visit ![homepage][] today", vec!["![homepage][]"]),
981            // Multiple images
982            (
983                "Images: ![first](a.png) and ![second][ref2]",
984                vec!["![first](a.png)", "![second][ref2]"],
985            ),
986        ];
987
988        for (input, expected_patterns) in test_cases {
989            println!("\nTesting: {input}");
990            let result = reflow_line(input, &options);
991            let joined = result.join(" ");
992            println!("Result:  {joined}");
993
994            for expected_pattern in expected_patterns {
995                assert!(
996                    joined.contains(expected_pattern),
997                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
998                );
999            }
1000        }
1001    }
1002
1003    #[test]
1004    fn test_extended_markdown_patterns() {
1005        let options = ReflowOptions {
1006            line_length: 40,
1007            ..Default::default()
1008        };
1009
1010        let test_cases = vec![
1011            // Strikethrough
1012            ("Text with ~~strikethrough~~ preserved", vec!["~~strikethrough~~"]),
1013            // Wiki links
1014            (
1015                "Check [[wiki link]] and [[page|display]]",
1016                vec!["[[wiki link]]", "[[page|display]]"],
1017            ),
1018            // Math
1019            (
1020                "Inline $x^2 + y^2$ and display $$\\int f(x) dx$$",
1021                vec!["$x^2 + y^2$", "$$\\int f(x) dx$$"],
1022            ),
1023            // Emoji
1024            ("Use :smile: and :heart: emojis", vec![":smile:", ":heart:"]),
1025            // HTML tags
1026            (
1027                "Text with <span>tag</span> and <br/>",
1028                vec!["<span>", "</span>", "<br/>"],
1029            ),
1030            // HTML entities
1031            ("Non-breaking&nbsp;space and em&mdash;dash", vec!["&nbsp;", "&mdash;"]),
1032        ];
1033
1034        for (input, expected_patterns) in test_cases {
1035            let result = reflow_line(input, &options);
1036            let joined = result.join(" ");
1037
1038            for pattern in expected_patterns {
1039                assert!(
1040                    joined.contains(pattern),
1041                    "Expected '{pattern}' to be preserved in '{input}', but got '{joined}'"
1042                );
1043            }
1044        }
1045    }
1046
1047    #[test]
1048    fn test_complex_mixed_patterns() {
1049        let options = ReflowOptions {
1050            line_length: 50,
1051            ..Default::default()
1052        };
1053
1054        // Test that multiple pattern types work together
1055        let input = "Line with **bold**, `code`, [link](url), ![image](img), ~~strike~~, $math$, :emoji:, and <tag> all together";
1056        let result = reflow_line(input, &options);
1057        let joined = result.join(" ");
1058
1059        // All patterns should be preserved
1060        assert!(joined.contains("**bold**"));
1061        assert!(joined.contains("`code`"));
1062        assert!(joined.contains("[link](url)"));
1063        assert!(joined.contains("![image](img)"));
1064        assert!(joined.contains("~~strike~~"));
1065        assert!(joined.contains("$math$"));
1066        assert!(joined.contains(":emoji:"));
1067        assert!(joined.contains("<tag>"));
1068    }
1069
1070    #[test]
1071    fn test_footnote_patterns_preserved() {
1072        let options = ReflowOptions {
1073            line_length: 40,
1074            ..Default::default()
1075        };
1076
1077        let test_cases = vec![
1078            // Single footnote
1079            ("This has a footnote[^1] reference", vec!["[^1]"]),
1080            // Multiple footnotes
1081            ("Text with [^first] and [^second] notes", vec!["[^first]", "[^second]"]),
1082            // Long footnote name
1083            ("Reference to [^long-footnote-name] here", vec!["[^long-footnote-name]"]),
1084        ];
1085
1086        for (input, expected_patterns) in test_cases {
1087            let result = reflow_line(input, &options);
1088            let joined = result.join(" ");
1089
1090            for expected_pattern in expected_patterns {
1091                assert!(
1092                    joined.contains(expected_pattern),
1093                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1094                );
1095            }
1096        }
1097    }
1098
1099    #[test]
1100    fn test_reflow_markdown_numbered_lists() {
1101        // Test for issue #83: numbered lists with proper formatting
1102        let options = ReflowOptions {
1103            line_length: 50,
1104            ..Default::default()
1105        };
1106
1107        let content = r#"1. List `manifest` to find the manifest with the largest ID. Say it's `00000000000000000002.manifest` in this example.
11082. Short item
11093. Another long item that definitely exceeds the fifty character limit and needs wrapping"#;
1110
1111        let result = reflow_markdown(content, &options);
1112
1113        // Define exact expected output
1114        let expected = r#"1. List `manifest` to find the manifest with the
1115   largest ID. Say it's
1116   `00000000000000000002.manifest` in this
1117   example.
11182. Short item
11193. Another long item that definitely exceeds the
1120   fifty character limit and needs wrapping"#;
1121
1122        assert_eq!(
1123            result, expected,
1124            "Numbered lists should be reflowed with proper markers and indentation.\nExpected:\n{expected}\nGot:\n{result}"
1125        );
1126    }
1127
1128    #[test]
1129    fn test_reflow_markdown_bullet_lists() {
1130        let options = ReflowOptions {
1131            line_length: 40,
1132            ..Default::default()
1133        };
1134
1135        let content = r#"- First bullet point with a very long line that needs wrapping
1136* Second bullet using asterisk
1137+ Third bullet using plus sign
1138- Short one"#;
1139
1140        let result = reflow_markdown(content, &options);
1141
1142        // Define exact expected output - each bullet type preserved with proper indentation
1143        let expected = r#"- First bullet point with a very long
1144  line that needs wrapping
1145* Second bullet using asterisk
1146+ Third bullet using plus sign
1147- Short one"#;
1148
1149        assert_eq!(
1150            result, expected,
1151            "Bullet lists should preserve markers and indent continuations with 2 spaces.\nExpected:\n{expected}\nGot:\n{result}"
1152        );
1153    }
1154}