rumdl_lib/utils/
text_reflow.rs

1//! Text reflow utilities for MD013
2//!
3//! This module implements text wrapping/reflow functionality that preserves
4//! Markdown elements like links, emphasis, code spans, etc.
5
6use crate::utils::regex_cache::{
7    DISPLAY_MATH_REGEX, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
8    INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, REF_IMAGE_REGEX, REF_LINK_REGEX,
9    SHORTCUT_REF_REGEX, STRIKETHROUGH_FANCY_REGEX, WIKI_LINK_REGEX,
10};
11/// Options for reflowing text
12#[derive(Clone)]
13pub struct ReflowOptions {
14    /// Target line length
15    pub line_length: usize,
16    /// Whether to break on sentence boundaries when possible
17    pub break_on_sentences: bool,
18    /// Whether to preserve existing line breaks in paragraphs
19    pub preserve_breaks: bool,
20    /// Whether to enforce one sentence per line
21    pub sentence_per_line: bool,
22}
23
24impl Default for ReflowOptions {
25    fn default() -> Self {
26        Self {
27            line_length: 80,
28            break_on_sentences: true,
29            preserve_breaks: false,
30            sentence_per_line: false,
31        }
32    }
33}
34
35/// Detect if a character position is a sentence boundary
36/// Based on the approach from github.com/JoshuaKGoldberg/sentences-per-line
37fn is_sentence_boundary(text: &str, pos: usize) -> bool {
38    let chars: Vec<char> = text.chars().collect();
39
40    if pos + 2 >= chars.len() {
41        return false;
42    }
43
44    // Check for sentence-ending punctuation
45    let c = chars[pos];
46    if c != '.' && c != '!' && c != '?' {
47        return false;
48    }
49
50    // Must be followed by a space
51    if chars[pos + 1] != ' ' {
52        return false;
53    }
54
55    // Next character after space must be uppercase (new sentence indicator)
56    if !chars[pos + 2].is_uppercase() {
57        return false;
58    }
59
60    // Look back to check for common abbreviations
61    if pos > 0 {
62        // Abbreviation list similar to sentences-per-line
63        let prev_word = &text[..pos];
64        let ignored_words = [
65            "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr", "Jr",
66        ];
67        for word in &ignored_words {
68            if prev_word.to_lowercase().ends_with(&word.to_lowercase()) {
69                return false;
70            }
71        }
72
73        // Check for decimal numbers (e.g., "3.14")
74        if pos > 0 && chars[pos - 1].is_numeric() && pos + 2 < chars.len() && chars[pos + 2].is_numeric() {
75            return false;
76        }
77    }
78
79    true
80}
81
82/// Split text into sentences
83pub fn split_into_sentences(text: &str) -> Vec<String> {
84    let mut sentences = Vec::new();
85    let mut current_sentence = String::new();
86    let mut chars = text.chars().peekable();
87    let mut pos = 0;
88
89    while let Some(c) = chars.next() {
90        current_sentence.push(c);
91
92        if is_sentence_boundary(text, pos) {
93            // Include the space after sentence if it exists
94            if chars.peek() == Some(&' ') {
95                chars.next();
96                pos += 1;
97            }
98
99            sentences.push(current_sentence.trim().to_string());
100            current_sentence.clear();
101        }
102
103        pos += 1;
104    }
105
106    // Add any remaining text as the last sentence
107    if !current_sentence.trim().is_empty() {
108        sentences.push(current_sentence.trim().to_string());
109    }
110
111    sentences
112}
113
114/// Check if a line is a horizontal rule (---, ___, ***)
115fn is_horizontal_rule(line: &str) -> bool {
116    if line.len() < 3 {
117        return false;
118    }
119
120    // Check if line consists only of -, _, or * characters (at least 3)
121    let chars: Vec<char> = line.chars().collect();
122    if chars.is_empty() {
123        return false;
124    }
125
126    let first_char = chars[0];
127    if first_char != '-' && first_char != '_' && first_char != '*' {
128        return false;
129    }
130
131    // All characters should be the same (allowing spaces between)
132    for c in &chars {
133        if *c != first_char && *c != ' ' {
134            return false;
135        }
136    }
137
138    // Count non-space characters
139    let non_space_count = chars.iter().filter(|c| **c != ' ').count();
140    non_space_count >= 3
141}
142
143/// Check if a line is a numbered list item (e.g., "1. ", "10. ")
144fn is_numbered_list_item(line: &str) -> bool {
145    let mut chars = line.chars();
146
147    // Must start with a digit
148    if !chars.next().is_some_and(|c| c.is_numeric()) {
149        return false;
150    }
151
152    // Can have more digits
153    while let Some(c) = chars.next() {
154        if c == '.' {
155            // After period, must have a space or be end of line
156            return chars.next().is_none_or(|c| c == ' ');
157        }
158        if !c.is_numeric() {
159            return false;
160        }
161    }
162
163    false
164}
165
166/// Check if a line ends with a hard break (either two spaces or backslash)
167///
168/// CommonMark supports two formats for hard line breaks:
169/// 1. Two or more trailing spaces
170/// 2. A backslash at the end of the line
171fn has_hard_break(line: &str) -> bool {
172    let line = line.strip_suffix('\r').unwrap_or(line);
173    line.ends_with("  ") || line.ends_with('\\')
174}
175
176/// Trim trailing whitespace while preserving hard breaks (two trailing spaces or backslash)
177///
178/// Hard breaks in Markdown can be indicated by:
179/// 1. Two trailing spaces before a newline (traditional)
180/// 2. A backslash at the end of the line (mdformat style)
181fn trim_preserving_hard_break(s: &str) -> String {
182    // Strip trailing \r from CRLF line endings first to handle Windows files
183    let s = s.strip_suffix('\r').unwrap_or(s);
184
185    // Check for backslash hard break (mdformat style)
186    if s.ends_with('\\') {
187        // Preserve the backslash exactly as-is
188        return s.to_string();
189    }
190
191    // Check if there are at least 2 trailing spaces (traditional hard break)
192    if s.ends_with("  ") {
193        // Find the position where non-space content ends
194        let content_end = s.trim_end().len();
195        if content_end == 0 {
196            // String is all whitespace
197            return String::new();
198        }
199        // Preserve exactly 2 trailing spaces for hard break
200        format!("{}  ", &s[..content_end])
201    } else {
202        // No hard break, just trim all trailing whitespace
203        s.trim_end().to_string()
204    }
205}
206
207pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
208    // For sentence-per-line mode, always process regardless of length
209    if options.sentence_per_line {
210        let elements = parse_markdown_elements(line);
211        return reflow_elements_sentence_per_line(&elements);
212    }
213
214    // Quick check: if line is already short enough, return as-is
215    if line.chars().count() <= options.line_length {
216        return vec![line.to_string()];
217    }
218
219    // Parse the markdown to identify elements
220    let elements = parse_markdown_elements(line);
221
222    // Reflow the elements into lines
223    reflow_elements(&elements, options)
224}
225
226/// Represents a piece of content in the markdown
227#[derive(Debug, Clone)]
228enum Element {
229    /// Plain text that can be wrapped
230    Text(String),
231    /// A complete markdown inline link [text](url)
232    Link { text: String, url: String },
233    /// A complete markdown reference link [text][ref]
234    ReferenceLink { text: String, reference: String },
235    /// A complete markdown empty reference link [text][]
236    EmptyReferenceLink { text: String },
237    /// A complete markdown shortcut reference link [ref]
238    ShortcutReference { reference: String },
239    /// A complete markdown inline image ![alt](url)
240    InlineImage { alt: String, url: String },
241    /// A complete markdown reference image ![alt][ref]
242    ReferenceImage { alt: String, reference: String },
243    /// A complete markdown empty reference image ![alt][]
244    EmptyReferenceImage { alt: String },
245    /// Footnote reference [^note]
246    FootnoteReference { note: String },
247    /// Strikethrough text ~~text~~
248    Strikethrough(String),
249    /// Wiki-style link [[wiki]] or [[wiki|text]]
250    WikiLink(String),
251    /// Inline math $math$
252    InlineMath(String),
253    /// Display math $$math$$
254    DisplayMath(String),
255    /// Emoji shortcode :emoji:
256    EmojiShortcode(String),
257    /// HTML tag <tag> or </tag> or <tag/>
258    HtmlTag(String),
259    /// HTML entity &nbsp; or &#123;
260    HtmlEntity(String),
261    /// Inline code `code`
262    Code(String),
263    /// Bold text **text**
264    Bold(String),
265    /// Italic text *text*
266    Italic(String),
267}
268
269impl std::fmt::Display for Element {
270    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
271        match self {
272            Element::Text(s) => write!(f, "{s}"),
273            Element::Link { text, url } => write!(f, "[{text}]({url})"),
274            Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
275            Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
276            Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
277            Element::InlineImage { alt, url } => write!(f, "![{alt}]({url})"),
278            Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
279            Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
280            Element::FootnoteReference { note } => write!(f, "[^{note}]"),
281            Element::Strikethrough(s) => write!(f, "~~{s}~~"),
282            Element::WikiLink(s) => write!(f, "[[{s}]]"),
283            Element::InlineMath(s) => write!(f, "${s}$"),
284            Element::DisplayMath(s) => write!(f, "$${s}$$"),
285            Element::EmojiShortcode(s) => write!(f, ":{s}:"),
286            Element::HtmlTag(s) => write!(f, "{s}"),
287            Element::HtmlEntity(s) => write!(f, "{s}"),
288            Element::Code(s) => write!(f, "`{s}`"),
289            Element::Bold(s) => write!(f, "**{s}**"),
290            Element::Italic(s) => write!(f, "*{s}*"),
291        }
292    }
293}
294
295impl Element {
296    fn len(&self) -> usize {
297        match self {
298            Element::Text(s) => s.chars().count(),
299            Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, // [text](url)
300            Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, // [text][ref]
301            Element::EmptyReferenceLink { text } => text.chars().count() + 4, // [text][]
302            Element::ShortcutReference { reference } => reference.chars().count() + 2, // [ref]
303            Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, // ![alt](url)
304            Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, // ![alt][ref]
305            Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, // ![alt][]
306            Element::FootnoteReference { note } => note.chars().count() + 3, // [^note]
307            Element::Strikethrough(s) => s.chars().count() + 4,              // ~~text~~
308            Element::WikiLink(s) => s.chars().count() + 4,                   // [[wiki]]
309            Element::InlineMath(s) => s.chars().count() + 2,                 // $math$
310            Element::DisplayMath(s) => s.chars().count() + 4,                // $$math$$
311            Element::EmojiShortcode(s) => s.chars().count() + 2,             // :emoji:
312            Element::HtmlTag(s) => s.chars().count(),                        // <tag> - already includes brackets
313            Element::HtmlEntity(s) => s.chars().count(),                     // &nbsp; - already complete
314            Element::Code(s) => s.chars().count() + 2,                       // `code`
315            Element::Bold(s) => s.chars().count() + 4,                       // **text**
316            Element::Italic(s) => s.chars().count() + 2,                     // *text*
317        }
318    }
319}
320
321/// Parse markdown elements from text preserving the raw syntax
322///
323/// Detection order is critical:
324/// 1. Inline links [text](url) - must be detected first to avoid conflicts
325/// 2. Reference links [text][ref] - detected before shortcut references
326/// 3. Empty reference links [text][] - a special case of reference links
327/// 4. Shortcut reference links [ref] - detected last to avoid false positives
328/// 5. Other elements (code, bold, italic) - processed normally
329fn parse_markdown_elements(text: &str) -> Vec<Element> {
330    let mut elements = Vec::new();
331    let mut remaining = text;
332
333    while !remaining.is_empty() {
334        // Find the earliest occurrence of any markdown pattern
335        let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
336
337        // Check for images first (they start with ! so should be detected before links)
338        // Inline images - ![alt](url)
339        if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
340            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
341        {
342            earliest_match = Some((m.start(), "inline_image", m));
343        }
344
345        // Reference images - ![alt][ref]
346        if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
347            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
348        {
349            earliest_match = Some((m.start(), "ref_image", m));
350        }
351
352        // Check for footnote references - [^note]
353        if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
354            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
355        {
356            earliest_match = Some((m.start(), "footnote_ref", m));
357        }
358
359        // Check for inline links - [text](url)
360        if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
361            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
362        {
363            earliest_match = Some((m.start(), "inline_link", m));
364        }
365
366        // Check for reference links - [text][ref]
367        if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
368            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
369        {
370            earliest_match = Some((m.start(), "ref_link", m));
371        }
372
373        // Check for shortcut reference links - [ref]
374        // Only check if we haven't found an earlier pattern that would conflict
375        if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
376            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
377        {
378            earliest_match = Some((m.start(), "shortcut_ref", m));
379        }
380
381        // Check for wiki-style links - [[wiki]]
382        if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
383            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
384        {
385            earliest_match = Some((m.start(), "wiki_link", m));
386        }
387
388        // Check for display math first (before inline) - $$math$$
389        if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
390            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
391        {
392            earliest_match = Some((m.start(), "display_math", m));
393        }
394
395        // Check for inline math - $math$
396        if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
397            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
398        {
399            earliest_match = Some((m.start(), "inline_math", m));
400        }
401
402        // Check for strikethrough - ~~text~~
403        if let Ok(Some(m)) = STRIKETHROUGH_FANCY_REGEX.find(remaining)
404            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
405        {
406            earliest_match = Some((m.start(), "strikethrough", m));
407        }
408
409        // Check for emoji shortcodes - :emoji:
410        if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
411            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
412        {
413            earliest_match = Some((m.start(), "emoji", m));
414        }
415
416        // Check for HTML entities - &nbsp; etc
417        if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
418            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
419        {
420            earliest_match = Some((m.start(), "html_entity", m));
421        }
422
423        // Check for HTML tags - <tag> </tag> <tag/>
424        if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
425            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
426        {
427            earliest_match = Some((m.start(), "html_tag", m));
428        }
429
430        // Find earliest non-link special characters
431        let mut next_special = remaining.len();
432        let mut special_type = "";
433
434        if let Some(pos) = remaining.find('`')
435            && pos < next_special
436        {
437            next_special = pos;
438            special_type = "code";
439        }
440        if let Some(pos) = remaining.find("**")
441            && pos < next_special
442        {
443            next_special = pos;
444            special_type = "bold";
445        }
446        if let Some(pos) = remaining.find('*')
447            && pos < next_special
448            && !remaining[pos..].starts_with("**")
449        {
450            next_special = pos;
451            special_type = "italic";
452        }
453
454        // Determine which pattern to process first
455        let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
456            pos < next_special
457        } else {
458            false
459        };
460
461        if should_process_markdown_link {
462            let (pos, pattern_type, match_obj) = earliest_match.unwrap();
463
464            // Add any text before the match
465            if pos > 0 {
466                elements.push(Element::Text(remaining[..pos].to_string()));
467            }
468
469            // Process the matched pattern
470            match pattern_type {
471                "inline_image" => {
472                    if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
473                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
474                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
475                        elements.push(Element::InlineImage {
476                            alt: alt.to_string(),
477                            url: url.to_string(),
478                        });
479                        remaining = &remaining[match_obj.end()..];
480                    } else {
481                        elements.push(Element::Text("!".to_string()));
482                        remaining = &remaining[1..];
483                    }
484                }
485                "ref_image" => {
486                    if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
487                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
488                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
489
490                        if reference.is_empty() {
491                            elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
492                        } else {
493                            elements.push(Element::ReferenceImage {
494                                alt: alt.to_string(),
495                                reference: reference.to_string(),
496                            });
497                        }
498                        remaining = &remaining[match_obj.end()..];
499                    } else {
500                        elements.push(Element::Text("!".to_string()));
501                        remaining = &remaining[1..];
502                    }
503                }
504                "footnote_ref" => {
505                    if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
506                        let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
507                        elements.push(Element::FootnoteReference { note: note.to_string() });
508                        remaining = &remaining[match_obj.end()..];
509                    } else {
510                        elements.push(Element::Text("[".to_string()));
511                        remaining = &remaining[1..];
512                    }
513                }
514                "inline_link" => {
515                    if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
516                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
517                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
518                        elements.push(Element::Link {
519                            text: text.to_string(),
520                            url: url.to_string(),
521                        });
522                        remaining = &remaining[match_obj.end()..];
523                    } else {
524                        // Fallback - shouldn't happen
525                        elements.push(Element::Text("[".to_string()));
526                        remaining = &remaining[1..];
527                    }
528                }
529                "ref_link" => {
530                    if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
531                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
532                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
533
534                        if reference.is_empty() {
535                            // Empty reference link [text][]
536                            elements.push(Element::EmptyReferenceLink { text: text.to_string() });
537                        } else {
538                            // Regular reference link [text][ref]
539                            elements.push(Element::ReferenceLink {
540                                text: text.to_string(),
541                                reference: reference.to_string(),
542                            });
543                        }
544                        remaining = &remaining[match_obj.end()..];
545                    } else {
546                        // Fallback - shouldn't happen
547                        elements.push(Element::Text("[".to_string()));
548                        remaining = &remaining[1..];
549                    }
550                }
551                "shortcut_ref" => {
552                    if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
553                        let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
554                        elements.push(Element::ShortcutReference {
555                            reference: reference.to_string(),
556                        });
557                        remaining = &remaining[match_obj.end()..];
558                    } else {
559                        // Fallback - shouldn't happen
560                        elements.push(Element::Text("[".to_string()));
561                        remaining = &remaining[1..];
562                    }
563                }
564                "wiki_link" => {
565                    if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
566                        let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
567                        elements.push(Element::WikiLink(content.to_string()));
568                        remaining = &remaining[match_obj.end()..];
569                    } else {
570                        elements.push(Element::Text("[[".to_string()));
571                        remaining = &remaining[2..];
572                    }
573                }
574                "display_math" => {
575                    if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
576                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
577                        elements.push(Element::DisplayMath(math.to_string()));
578                        remaining = &remaining[match_obj.end()..];
579                    } else {
580                        elements.push(Element::Text("$$".to_string()));
581                        remaining = &remaining[2..];
582                    }
583                }
584                "inline_math" => {
585                    if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
586                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
587                        elements.push(Element::InlineMath(math.to_string()));
588                        remaining = &remaining[match_obj.end()..];
589                    } else {
590                        elements.push(Element::Text("$".to_string()));
591                        remaining = &remaining[1..];
592                    }
593                }
594                "strikethrough" => {
595                    if let Ok(Some(caps)) = STRIKETHROUGH_FANCY_REGEX.captures(remaining) {
596                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
597                        elements.push(Element::Strikethrough(text.to_string()));
598                        remaining = &remaining[match_obj.end()..];
599                    } else {
600                        elements.push(Element::Text("~~".to_string()));
601                        remaining = &remaining[2..];
602                    }
603                }
604                "emoji" => {
605                    if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
606                        let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
607                        elements.push(Element::EmojiShortcode(emoji.to_string()));
608                        remaining = &remaining[match_obj.end()..];
609                    } else {
610                        elements.push(Element::Text(":".to_string()));
611                        remaining = &remaining[1..];
612                    }
613                }
614                "html_entity" => {
615                    // HTML entities are captured whole
616                    elements.push(Element::HtmlEntity(remaining[..match_obj.end()].to_string()));
617                    remaining = &remaining[match_obj.end()..];
618                }
619                "html_tag" => {
620                    // HTML tags are captured whole
621                    elements.push(Element::HtmlTag(remaining[..match_obj.end()].to_string()));
622                    remaining = &remaining[match_obj.end()..];
623                }
624                _ => {
625                    // Unknown pattern, treat as text
626                    elements.push(Element::Text("[".to_string()));
627                    remaining = &remaining[1..];
628                }
629            }
630        } else {
631            // Process non-link special characters
632
633            // Add any text before the special character
634            if next_special > 0 && next_special < remaining.len() {
635                elements.push(Element::Text(remaining[..next_special].to_string()));
636                remaining = &remaining[next_special..];
637            }
638
639            // Process the special element
640            match special_type {
641                "code" => {
642                    // Find end of code
643                    if let Some(code_end) = remaining[1..].find('`') {
644                        let code = &remaining[1..1 + code_end];
645                        elements.push(Element::Code(code.to_string()));
646                        remaining = &remaining[1 + code_end + 1..];
647                    } else {
648                        // No closing backtick, treat as text
649                        elements.push(Element::Text(remaining.to_string()));
650                        break;
651                    }
652                }
653                "bold" => {
654                    // Check for bold text
655                    if let Some(bold_end) = remaining[2..].find("**") {
656                        let bold_text = &remaining[2..2 + bold_end];
657                        elements.push(Element::Bold(bold_text.to_string()));
658                        remaining = &remaining[2 + bold_end + 2..];
659                    } else {
660                        // No closing **, treat as text
661                        elements.push(Element::Text("**".to_string()));
662                        remaining = &remaining[2..];
663                    }
664                }
665                "italic" => {
666                    // Check for italic text
667                    if let Some(italic_end) = remaining[1..].find('*') {
668                        let italic_text = &remaining[1..1 + italic_end];
669                        elements.push(Element::Italic(italic_text.to_string()));
670                        remaining = &remaining[1 + italic_end + 1..];
671                    } else {
672                        // No closing *, treat as text
673                        elements.push(Element::Text("*".to_string()));
674                        remaining = &remaining[1..];
675                    }
676                }
677                _ => {
678                    // No special elements found, add all remaining text
679                    elements.push(Element::Text(remaining.to_string()));
680                    break;
681                }
682            }
683        }
684    }
685
686    elements
687}
688
689/// Reflow elements for sentence-per-line mode
690fn reflow_elements_sentence_per_line(elements: &[Element]) -> Vec<String> {
691    let mut lines = Vec::new();
692    let mut current_line = String::new();
693
694    for element in elements {
695        let element_str = format!("{element}");
696
697        // For text elements, split into sentences
698        if let Element::Text(text) = element {
699            // Simply append text - it already has correct spacing from tokenization
700            let combined = format!("{current_line}{text}");
701            let sentences = split_into_sentences(&combined);
702
703            if sentences.len() > 1 {
704                // We found sentence boundaries
705                for (i, sentence) in sentences.iter().enumerate() {
706                    if i == 0 {
707                        // First sentence might continue from previous elements
708                        lines.push(sentence.to_string());
709                    } else if i == sentences.len() - 1 {
710                        // Last sentence might continue to next elements
711                        current_line = sentence.to_string();
712                    } else {
713                        // Complete sentences in the middle
714                        lines.push(sentence.to_string());
715                    }
716                }
717            } else {
718                // No sentence boundary found, continue accumulating
719                current_line = combined;
720            }
721        } else {
722            // Non-text elements (Code, Bold, Italic, etc.)
723            // Add space before element if needed (unless it's after an opening paren/bracket)
724            if !current_line.is_empty()
725                && !current_line.ends_with(' ')
726                && !current_line.ends_with('(')
727                && !current_line.ends_with('[')
728            {
729                current_line.push(' ');
730            }
731            current_line.push_str(&element_str);
732        }
733    }
734
735    // Add any remaining content
736    if !current_line.is_empty() {
737        lines.push(current_line.trim().to_string());
738    }
739
740    lines
741}
742
743/// Reflow elements into lines that fit within the line length
744fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
745    let mut lines = Vec::new();
746    let mut current_line = String::new();
747    let mut current_length = 0;
748
749    for element in elements {
750        let element_str = format!("{element}");
751        let element_len = element.len();
752
753        // For text elements that might need breaking
754        if let Element::Text(text) = element {
755            // If this is a text element, always process it word by word
756            let words: Vec<&str> = text.split_whitespace().collect();
757
758            for word in words {
759                let word_len = word.chars().count();
760                if current_length > 0 && current_length + 1 + word_len > options.line_length {
761                    // Start a new line
762                    lines.push(current_line.trim().to_string());
763                    current_line = word.to_string();
764                    current_length = word_len;
765                } else {
766                    // Add word to current line
767                    if current_length > 0 {
768                        current_line.push(' ');
769                        current_length += 1;
770                    }
771                    current_line.push_str(word);
772                    current_length += word_len;
773                }
774            }
775        } else {
776            // For non-text elements (code, links, references), treat as atomic units
777            // These should never be broken across lines
778            if current_length > 0 && current_length + 1 + element_len > options.line_length {
779                // Start a new line
780                lines.push(current_line.trim().to_string());
781                current_line = element_str;
782                current_length = element_len;
783            } else {
784                // Add element to current line
785                if current_length > 0 {
786                    current_line.push(' ');
787                    current_length += 1;
788                }
789                current_line.push_str(&element_str);
790                current_length += element_len;
791            }
792        }
793    }
794
795    // Don't forget the last line
796    if !current_line.is_empty() {
797        lines.push(current_line.trim_end().to_string());
798    }
799
800    lines
801}
802
803/// Reflow markdown content preserving structure
804pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
805    let lines: Vec<&str> = content.lines().collect();
806    let mut result = Vec::new();
807    let mut i = 0;
808
809    while i < lines.len() {
810        let line = lines[i];
811        let trimmed = line.trim();
812
813        // Preserve empty lines
814        if trimmed.is_empty() {
815            result.push(String::new());
816            i += 1;
817            continue;
818        }
819
820        // Preserve headings as-is
821        if trimmed.starts_with('#') {
822            result.push(line.to_string());
823            i += 1;
824            continue;
825        }
826
827        // Preserve fenced code blocks
828        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
829            result.push(line.to_string());
830            i += 1;
831            // Copy lines until closing fence
832            while i < lines.len() {
833                result.push(lines[i].to_string());
834                if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
835                    i += 1;
836                    break;
837                }
838                i += 1;
839            }
840            continue;
841        }
842
843        // Preserve indented code blocks (4+ spaces or 1+ tab)
844        if line.starts_with("    ") || line.starts_with("\t") {
845            // Collect all consecutive indented lines
846            result.push(line.to_string());
847            i += 1;
848            while i < lines.len() {
849                let next_line = lines[i];
850                // Continue if next line is also indented or empty (empty lines in code blocks are ok)
851                if next_line.starts_with("    ") || next_line.starts_with("\t") || next_line.trim().is_empty() {
852                    result.push(next_line.to_string());
853                    i += 1;
854                } else {
855                    break;
856                }
857            }
858            continue;
859        }
860
861        // Preserve block quotes (but reflow their content)
862        if trimmed.starts_with('>') {
863            let quote_prefix = line[0..line.find('>').unwrap() + 1].to_string();
864            let quote_content = &line[quote_prefix.len()..].trim_start();
865
866            let reflowed = reflow_line(quote_content, options);
867            for reflowed_line in reflowed.iter() {
868                result.push(format!("{quote_prefix} {reflowed_line}"));
869            }
870            i += 1;
871            continue;
872        }
873
874        // Preserve horizontal rules first (before checking for lists)
875        if is_horizontal_rule(trimmed) {
876            result.push(line.to_string());
877            i += 1;
878            continue;
879        }
880
881        // Preserve lists (but not horizontal rules)
882        if (trimmed.starts_with('-') && !is_horizontal_rule(trimmed))
883            || (trimmed.starts_with('*') && !is_horizontal_rule(trimmed))
884            || trimmed.starts_with('+')
885            || is_numbered_list_item(trimmed)
886        {
887            // Find the list marker and preserve indentation
888            let indent = line.len() - line.trim_start().len();
889            let indent_str = " ".repeat(indent);
890
891            // For numbered lists, find the period and the space after it
892            // For bullet lists, find the marker and the space after it
893            let mut marker_end = indent;
894            let mut content_start = indent;
895
896            if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
897                // Numbered list: find the period
898                if let Some(period_pos) = line[indent..].find('.') {
899                    marker_end = indent + period_pos + 1; // Include the period
900                    content_start = marker_end;
901                    // Skip any spaces after the period to find content start
902                    while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
903                        content_start += 1;
904                    }
905                }
906            } else {
907                // Bullet list: marker is single character
908                marker_end = indent + 1; // Just the marker character
909                content_start = marker_end;
910                // Skip any spaces after the marker
911                while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
912                    content_start += 1;
913                }
914            }
915
916            let marker = &line[indent..marker_end];
917
918            // Collect all content for this list item (including continuation lines)
919            // Preserve hard breaks (2 trailing spaces) while trimming excessive whitespace
920            let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
921            i += 1;
922
923            // Collect continuation lines (indented lines that are part of this list item)
924            while i < lines.len() {
925                let next_line = lines[i];
926                let next_trimmed = next_line.trim();
927
928                // Stop if we hit an empty line or another list item or special block
929                if next_trimmed.is_empty()
930                    || next_trimmed.starts_with('#')
931                    || next_trimmed.starts_with("```")
932                    || next_trimmed.starts_with("~~~")
933                    || next_trimmed.starts_with('>')
934                    || next_trimmed.starts_with('|')
935                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
936                    || is_horizontal_rule(next_trimmed)
937                    || (next_trimmed.starts_with('-')
938                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
939                    || (next_trimmed.starts_with('*')
940                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
941                    || (next_trimmed.starts_with('+')
942                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
943                    || is_numbered_list_item(next_trimmed)
944                {
945                    break;
946                }
947
948                // Check if this line is indented (continuation of list item)
949                let next_indent = next_line.len() - next_line.trim_start().len();
950                if next_indent >= content_start {
951                    // This is a continuation line - add its content
952                    // Preserve hard breaks while trimming excessive whitespace
953                    let trimmed_start = next_line.trim_start();
954                    list_content.push(trim_preserving_hard_break(trimmed_start));
955                    i += 1;
956                } else {
957                    // Not indented enough, not part of this list item
958                    break;
959                }
960            }
961
962            // Join content, but respect hard breaks (lines ending with 2 spaces or backslash)
963            // Hard breaks should prevent joining with the next line
964            let combined_content = if options.preserve_breaks {
965                list_content[0].clone()
966            } else {
967                // Check if any lines have hard breaks - if so, preserve the structure
968                let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
969                if has_hard_breaks {
970                    // Don't join lines with hard breaks - keep them separate with newlines
971                    list_content.join("\n")
972                } else {
973                    // No hard breaks, safe to join with spaces
974                    list_content.join(" ")
975                }
976            };
977
978            // Calculate the proper indentation for continuation lines
979            let trimmed_marker = marker;
980            let continuation_spaces = content_start;
981
982            // Adjust line length to account for list marker and space
983            let prefix_length = indent + trimmed_marker.len() + 1;
984
985            // Create adjusted options with reduced line length
986            let adjusted_options = ReflowOptions {
987                line_length: options.line_length.saturating_sub(prefix_length),
988                ..options.clone()
989            };
990
991            let reflowed = reflow_line(&combined_content, &adjusted_options);
992            for (j, reflowed_line) in reflowed.iter().enumerate() {
993                if j == 0 {
994                    result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
995                } else {
996                    // Continuation lines aligned with text after marker
997                    let continuation_indent = " ".repeat(continuation_spaces);
998                    result.push(format!("{continuation_indent}{reflowed_line}"));
999                }
1000            }
1001            continue;
1002        }
1003
1004        // Preserve tables
1005        if trimmed.contains('|') {
1006            result.push(line.to_string());
1007            i += 1;
1008            continue;
1009        }
1010
1011        // Preserve reference definitions
1012        if trimmed.starts_with('[') && line.contains("]:") {
1013            result.push(line.to_string());
1014            i += 1;
1015            continue;
1016        }
1017
1018        // Check if this is a single line that doesn't need processing
1019        let mut is_single_line_paragraph = true;
1020        if i + 1 < lines.len() {
1021            let next_line = lines[i + 1];
1022            let next_trimmed = next_line.trim();
1023            // Check if next line starts a new block
1024            if !next_trimmed.is_empty()
1025                && !next_trimmed.starts_with('#')
1026                && !next_trimmed.starts_with("```")
1027                && !next_trimmed.starts_with("~~~")
1028                && !next_trimmed.starts_with('>')
1029                && !next_trimmed.starts_with('|')
1030                && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
1031                && !is_horizontal_rule(next_trimmed)
1032                && !(next_trimmed.starts_with('-')
1033                    && !is_horizontal_rule(next_trimmed)
1034                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1035                && !(next_trimmed.starts_with('*')
1036                    && !is_horizontal_rule(next_trimmed)
1037                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1038                && !(next_trimmed.starts_with('+')
1039                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1040                && !is_numbered_list_item(next_trimmed)
1041            {
1042                is_single_line_paragraph = false;
1043            }
1044        }
1045
1046        // If it's a single line that fits, just add it as-is
1047        if is_single_line_paragraph && line.chars().count() <= options.line_length {
1048            result.push(line.to_string());
1049            i += 1;
1050            continue;
1051        }
1052
1053        // For regular paragraphs, collect consecutive lines
1054        let mut paragraph_parts = Vec::new();
1055        let mut current_part = vec![line];
1056        i += 1;
1057
1058        // If preserve_breaks is true, treat each line separately
1059        if options.preserve_breaks {
1060            // Don't collect consecutive lines - just reflow this single line
1061            let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
1062                Some("\\")
1063            } else if line.ends_with("  ") {
1064                Some("  ")
1065            } else {
1066                None
1067            };
1068            let reflowed = reflow_line(line, options);
1069
1070            // Preserve hard breaks (two trailing spaces or backslash)
1071            if let Some(break_marker) = hard_break_type {
1072                if !reflowed.is_empty() {
1073                    let mut reflowed_with_break = reflowed;
1074                    let last_idx = reflowed_with_break.len() - 1;
1075                    if !has_hard_break(&reflowed_with_break[last_idx]) {
1076                        reflowed_with_break[last_idx].push_str(break_marker);
1077                    }
1078                    result.extend(reflowed_with_break);
1079                }
1080            } else {
1081                result.extend(reflowed);
1082            }
1083        } else {
1084            // Original behavior: collect consecutive lines into a paragraph
1085            while i < lines.len() {
1086                let prev_line = if !current_part.is_empty() {
1087                    current_part.last().unwrap()
1088                } else {
1089                    ""
1090                };
1091                let next_line = lines[i];
1092                let next_trimmed = next_line.trim();
1093
1094                // Stop at empty lines or special blocks
1095                if next_trimmed.is_empty()
1096                    || next_trimmed.starts_with('#')
1097                    || next_trimmed.starts_with("```")
1098                    || next_trimmed.starts_with("~~~")
1099                    || next_trimmed.starts_with('>')
1100                    || next_trimmed.starts_with('|')
1101                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1102                    || is_horizontal_rule(next_trimmed)
1103                    || (next_trimmed.starts_with('-')
1104                        && !is_horizontal_rule(next_trimmed)
1105                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1106                    || (next_trimmed.starts_with('*')
1107                        && !is_horizontal_rule(next_trimmed)
1108                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1109                    || (next_trimmed.starts_with('+')
1110                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1111                    || is_numbered_list_item(next_trimmed)
1112                {
1113                    break;
1114                }
1115
1116                // Check if previous line ends with hard break (two spaces or backslash)
1117                if has_hard_break(prev_line) {
1118                    // Start a new part after hard break
1119                    paragraph_parts.push(current_part.join(" "));
1120                    current_part = vec![next_line];
1121                } else {
1122                    current_part.push(next_line);
1123                }
1124                i += 1;
1125            }
1126
1127            // Add the last part
1128            if !current_part.is_empty() {
1129                if current_part.len() == 1 {
1130                    // Single line, don't add trailing space
1131                    paragraph_parts.push(current_part[0].to_string());
1132                } else {
1133                    paragraph_parts.push(current_part.join(" "));
1134                }
1135            }
1136
1137            // Reflow each part separately, preserving hard breaks
1138            for (j, part) in paragraph_parts.iter().enumerate() {
1139                let reflowed = reflow_line(part, options);
1140                result.extend(reflowed);
1141
1142                // Preserve hard break by ensuring last line of part ends with hard break marker
1143                // Use two spaces as the default hard break format for reflows
1144                if j < paragraph_parts.len() - 1 && !result.is_empty() {
1145                    let last_idx = result.len() - 1;
1146                    if !has_hard_break(&result[last_idx]) {
1147                        result[last_idx].push_str("  ");
1148                    }
1149                }
1150            }
1151        }
1152    }
1153
1154    // Preserve trailing newline if the original content had one
1155    let result_text = result.join("\n");
1156    if content.ends_with('\n') && !result_text.ends_with('\n') {
1157        format!("{result_text}\n")
1158    } else {
1159        result_text
1160    }
1161}
1162
1163/// Information about a reflowed paragraph
1164#[derive(Debug, Clone)]
1165pub struct ParagraphReflow {
1166    /// Starting byte offset of the paragraph in the original content
1167    pub start_byte: usize,
1168    /// Ending byte offset of the paragraph in the original content
1169    pub end_byte: usize,
1170    /// The reflowed text for this paragraph
1171    pub reflowed_text: String,
1172}
1173
1174/// Reflow a single paragraph at the specified line number
1175///
1176/// This function finds the paragraph containing the given line number,
1177/// reflows it according to the specified line length, and returns
1178/// information about the paragraph location and its reflowed text.
1179///
1180/// # Arguments
1181///
1182/// * `content` - The full document content
1183/// * `line_number` - The 1-based line number within the paragraph to reflow
1184/// * `line_length` - The target line length for reflowing
1185///
1186/// # Returns
1187///
1188/// Returns `Some(ParagraphReflow)` if a paragraph was found and reflowed,
1189/// or `None` if the line number is out of bounds or the content at that
1190/// line shouldn't be reflowed (e.g., code blocks, headings, etc.)
1191pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
1192    if line_number == 0 {
1193        return None;
1194    }
1195
1196    let lines: Vec<&str> = content.lines().collect();
1197
1198    // Check if line number is valid (1-based)
1199    if line_number > lines.len() {
1200        return None;
1201    }
1202
1203    let target_idx = line_number - 1; // Convert to 0-based
1204    let target_line = lines[target_idx];
1205    let trimmed = target_line.trim();
1206
1207    // Don't reflow special blocks
1208    if trimmed.is_empty()
1209        || trimmed.starts_with('#')
1210        || trimmed.starts_with("```")
1211        || trimmed.starts_with("~~~")
1212        || target_line.starts_with("    ")
1213        || target_line.starts_with('\t')
1214        || trimmed.starts_with('>')
1215        || trimmed.contains('|') // Tables
1216        || (trimmed.starts_with('[') && target_line.contains("]:")) // Reference definitions
1217        || is_horizontal_rule(trimmed)
1218        || ((trimmed.starts_with('-') || trimmed.starts_with('*') || trimmed.starts_with('+'))
1219            && !is_horizontal_rule(trimmed)
1220            && (trimmed.len() == 1 || trimmed.chars().nth(1) == Some(' ')))
1221        || is_numbered_list_item(trimmed)
1222    {
1223        return None;
1224    }
1225
1226    // Find paragraph start - scan backward until blank line or special block
1227    let mut para_start = target_idx;
1228    while para_start > 0 {
1229        let prev_idx = para_start - 1;
1230        let prev_line = lines[prev_idx];
1231        let prev_trimmed = prev_line.trim();
1232
1233        // Stop at blank line or special blocks
1234        if prev_trimmed.is_empty()
1235            || prev_trimmed.starts_with('#')
1236            || prev_trimmed.starts_with("```")
1237            || prev_trimmed.starts_with("~~~")
1238            || prev_line.starts_with("    ")
1239            || prev_line.starts_with('\t')
1240            || prev_trimmed.starts_with('>')
1241            || prev_trimmed.contains('|')
1242            || (prev_trimmed.starts_with('[') && prev_line.contains("]:"))
1243            || is_horizontal_rule(prev_trimmed)
1244            || ((prev_trimmed.starts_with('-') || prev_trimmed.starts_with('*') || prev_trimmed.starts_with('+'))
1245                && !is_horizontal_rule(prev_trimmed)
1246                && (prev_trimmed.len() == 1 || prev_trimmed.chars().nth(1) == Some(' ')))
1247            || is_numbered_list_item(prev_trimmed)
1248        {
1249            break;
1250        }
1251
1252        para_start = prev_idx;
1253    }
1254
1255    // Find paragraph end - scan forward until blank line or special block
1256    let mut para_end = target_idx;
1257    while para_end + 1 < lines.len() {
1258        let next_idx = para_end + 1;
1259        let next_line = lines[next_idx];
1260        let next_trimmed = next_line.trim();
1261
1262        // Stop at blank line or special blocks
1263        if next_trimmed.is_empty()
1264            || next_trimmed.starts_with('#')
1265            || next_trimmed.starts_with("```")
1266            || next_trimmed.starts_with("~~~")
1267            || next_line.starts_with("    ")
1268            || next_line.starts_with('\t')
1269            || next_trimmed.starts_with('>')
1270            || next_trimmed.contains('|')
1271            || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1272            || is_horizontal_rule(next_trimmed)
1273            || ((next_trimmed.starts_with('-') || next_trimmed.starts_with('*') || next_trimmed.starts_with('+'))
1274                && !is_horizontal_rule(next_trimmed)
1275                && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1276            || is_numbered_list_item(next_trimmed)
1277        {
1278            break;
1279        }
1280
1281        para_end = next_idx;
1282    }
1283
1284    // Extract paragraph lines
1285    let paragraph_lines = &lines[para_start..=para_end];
1286
1287    // Calculate byte offsets
1288    let mut start_byte = 0;
1289    for line in lines.iter().take(para_start) {
1290        start_byte += line.len() + 1; // +1 for newline
1291    }
1292
1293    let mut end_byte = start_byte;
1294    for line in paragraph_lines.iter() {
1295        end_byte += line.len() + 1; // +1 for newline
1296    }
1297
1298    // Track whether the byte range includes a trailing newline
1299    // (it doesn't if this is the last line and the file doesn't end with newline)
1300    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
1301
1302    // Adjust end_byte if the last line doesn't have a newline
1303    if !includes_trailing_newline {
1304        end_byte -= 1;
1305    }
1306
1307    // Join paragraph lines and reflow
1308    let paragraph_text = paragraph_lines.join("\n");
1309
1310    // Create reflow options
1311    let options = ReflowOptions {
1312        line_length,
1313        break_on_sentences: true,
1314        preserve_breaks: false,
1315        sentence_per_line: false,
1316    };
1317
1318    // Reflow the paragraph using reflow_markdown to handle it properly
1319    let reflowed = reflow_markdown(&paragraph_text, &options);
1320
1321    // Ensure reflowed text matches whether the byte range includes a trailing newline
1322    // This is critical: if the range includes a newline, the replacement must too,
1323    // otherwise the next line will get appended to the reflowed paragraph
1324    let reflowed_text = if includes_trailing_newline {
1325        // Range includes newline - ensure reflowed text has one
1326        if reflowed.ends_with('\n') {
1327            reflowed
1328        } else {
1329            format!("{reflowed}\n")
1330        }
1331    } else {
1332        // Range doesn't include newline - ensure reflowed text doesn't have one
1333        if reflowed.ends_with('\n') {
1334            reflowed.trim_end_matches('\n').to_string()
1335        } else {
1336            reflowed
1337        }
1338    };
1339
1340    Some(ParagraphReflow {
1341        start_byte,
1342        end_byte,
1343        reflowed_text,
1344    })
1345}
1346
1347#[cfg(test)]
1348mod tests {
1349    use super::*;
1350
1351    #[test]
1352    fn test_list_item_trailing_whitespace_removal() {
1353        // Test for issue #76 - hard breaks (2 trailing spaces) should be preserved
1354        // and prevent reflowing
1355        let input = "1. First line with trailing spaces   \n    Second line with trailing spaces  \n    Third line\n";
1356
1357        let options = ReflowOptions {
1358            line_length: 999999,
1359            break_on_sentences: true, // MD013 uses true by default
1360            preserve_breaks: false,
1361            sentence_per_line: false,
1362        };
1363
1364        let result = reflow_markdown(input, &options);
1365
1366        eprintln!("Input: {input:?}");
1367        eprintln!("Result: {result:?}");
1368
1369        // Should not contain 3+ consecutive spaces (which would indicate
1370        // trailing whitespace became mid-line whitespace)
1371        assert!(
1372            !result.contains("   "),
1373            "Result should not contain 3+ consecutive spaces: {result:?}"
1374        );
1375
1376        // Hard breaks should be preserved (exactly 2 trailing spaces)
1377        assert!(result.contains("  \n"), "Hard breaks should be preserved: {result:?}");
1378
1379        // Should NOT be reflowed into a single line because hard breaks are present
1380        // The content should maintain its line structure
1381        assert!(
1382            result.lines().count() >= 2,
1383            "Should have multiple lines (not reflowed due to hard breaks), got: {}",
1384            result.lines().count()
1385        );
1386    }
1387
1388    #[test]
1389    fn test_reflow_simple_text() {
1390        let options = ReflowOptions {
1391            line_length: 20,
1392            ..Default::default()
1393        };
1394
1395        let input = "This is a very long line that needs to be wrapped";
1396        let result = reflow_line(input, &options);
1397
1398        assert_eq!(result.len(), 3);
1399        assert!(result[0].chars().count() <= 20);
1400        assert!(result[1].chars().count() <= 20);
1401        assert!(result[2].chars().count() <= 20);
1402    }
1403
1404    #[test]
1405    fn test_preserve_inline_code() {
1406        let options = ReflowOptions {
1407            line_length: 30,
1408            ..Default::default()
1409        };
1410
1411        let result = reflow_line("This line has `inline code` that should be preserved", &options);
1412        // Verify inline code is not broken
1413        let joined = result.join(" ");
1414        assert!(joined.contains("`inline code`"));
1415    }
1416
1417    #[test]
1418    fn test_preserve_links() {
1419        let options = ReflowOptions {
1420            line_length: 40,
1421            ..Default::default()
1422        };
1423
1424        let text = "Check out [this link](https://example.com/very/long/url) for more info";
1425        let result = reflow_line(text, &options);
1426
1427        // Verify link is preserved intact
1428        let joined = result.join(" ");
1429        assert!(joined.contains("[this link](https://example.com/very/long/url)"));
1430    }
1431
1432    #[test]
1433    fn test_reference_link_patterns_fixed() {
1434        let options = ReflowOptions {
1435            line_length: 30,
1436            break_on_sentences: true,
1437            preserve_breaks: false,
1438            sentence_per_line: false,
1439        };
1440
1441        // Test cases that verify reference links are preserved as atomic units
1442        let test_cases = vec![
1443            // Reference link: [text][ref] - should be preserved intact
1444            ("Check out [text][ref] for details", vec!["[text][ref]"]),
1445            // Empty reference: [text][] - should be preserved intact
1446            ("See [text][] for info", vec!["[text][]"]),
1447            // Shortcut reference: [homepage] - should be preserved intact
1448            ("Visit [homepage] today", vec!["[homepage]"]),
1449            // Multiple reference links in one line
1450            (
1451                "Links: [first][ref1] and [second][ref2] here",
1452                vec!["[first][ref1]", "[second][ref2]"],
1453            ),
1454            // Mixed inline and reference links
1455            (
1456                "See [inline](url) and [reference][ref] links",
1457                vec!["[inline](url)", "[reference][ref]"],
1458            ),
1459        ];
1460
1461        for (input, expected_patterns) in test_cases {
1462            println!("\nTesting: {input}");
1463            let result = reflow_line(input, &options);
1464            let joined = result.join(" ");
1465            println!("Result:  {joined}");
1466
1467            // Verify all expected patterns are preserved
1468            for expected_pattern in expected_patterns {
1469                assert!(
1470                    joined.contains(expected_pattern),
1471                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1472                );
1473            }
1474
1475            // Verify no broken patterns exist (spaces inside brackets)
1476            assert!(
1477                !joined.contains("[ ") || !joined.contains("] ["),
1478                "Detected broken reference link pattern with spaces inside brackets in '{joined}'"
1479            );
1480        }
1481    }
1482
1483    #[test]
1484    fn test_sentence_detection_basic() {
1485        // Test basic sentence detection
1486        assert!(is_sentence_boundary("Hello. World", 5));
1487        assert!(is_sentence_boundary("Test! Another", 4));
1488        assert!(is_sentence_boundary("Question? Answer", 8));
1489
1490        // Test non-boundaries
1491        assert!(!is_sentence_boundary("Hello world", 5));
1492        assert!(!is_sentence_boundary("Test.com", 4));
1493        assert!(!is_sentence_boundary("3.14 pi", 1));
1494    }
1495
1496    #[test]
1497    fn test_sentence_detection_abbreviations() {
1498        // Common abbreviations should not be treated as sentence boundaries
1499        assert!(!is_sentence_boundary("Mr. Smith", 2));
1500        assert!(!is_sentence_boundary("Dr. Jones", 2));
1501        assert!(!is_sentence_boundary("e.g. example", 3));
1502        assert!(!is_sentence_boundary("i.e. that is", 3));
1503        assert!(!is_sentence_boundary("etc. items", 3));
1504
1505        // But sentence after abbreviation should be a boundary
1506        assert!(is_sentence_boundary("Mr. Smith arrived. Next sentence.", 17));
1507    }
1508
1509    #[test]
1510    fn test_split_into_sentences() {
1511        let text = "First sentence. Second sentence. Third one!";
1512        let sentences = split_into_sentences(text);
1513        assert_eq!(sentences.len(), 3);
1514        assert_eq!(sentences[0], "First sentence.");
1515        assert_eq!(sentences[1], "Second sentence.");
1516        assert_eq!(sentences[2], "Third one!");
1517
1518        // Test with abbreviations
1519        let text2 = "Mr. Smith met Dr. Jones.";
1520        let sentences2 = split_into_sentences(text2);
1521        assert_eq!(sentences2.len(), 1);
1522        assert_eq!(sentences2[0], "Mr. Smith met Dr. Jones.");
1523
1524        // Test single sentence
1525        let text3 = "This is a single sentence.";
1526        let sentences3 = split_into_sentences(text3);
1527        assert_eq!(sentences3.len(), 1);
1528        assert_eq!(sentences3[0], "This is a single sentence.");
1529    }
1530
1531    #[test]
1532    fn test_sentence_per_line_reflow() {
1533        let options = ReflowOptions {
1534            line_length: 80,
1535            break_on_sentences: true,
1536            preserve_breaks: false,
1537            sentence_per_line: true,
1538        };
1539
1540        // Test basic sentence splitting
1541        let input = "First sentence. Second sentence. Third sentence.";
1542        let result = reflow_line(input, &options);
1543        assert_eq!(result.len(), 3);
1544        assert_eq!(result[0], "First sentence.");
1545        assert_eq!(result[1], "Second sentence.");
1546        assert_eq!(result[2], "Third sentence.");
1547
1548        // Test with markdown elements
1549        let input2 = "This has **bold**. And [a link](url).";
1550        let result2 = reflow_line(input2, &options);
1551        assert_eq!(result2.len(), 2);
1552        assert_eq!(result2[0], "This has **bold**.");
1553        assert_eq!(result2[1], "And [a link](url).");
1554    }
1555
1556    #[test]
1557    fn test_sentence_per_line_with_backticks() {
1558        let options = ReflowOptions {
1559            line_length: 80,
1560            break_on_sentences: true,
1561            preserve_breaks: false,
1562            sentence_per_line: true,
1563        };
1564
1565        let input = "This sentence has `code` in it. And this has `more code` too.";
1566        let result = reflow_line(input, &options);
1567        assert_eq!(result.len(), 2);
1568        assert_eq!(result[0], "This sentence has `code` in it.");
1569        assert_eq!(result[1], "And this has `more code` too.");
1570    }
1571
1572    #[test]
1573    fn test_sentence_per_line_with_backticks_in_parens() {
1574        let options = ReflowOptions {
1575            line_length: 80,
1576            break_on_sentences: true,
1577            preserve_breaks: false,
1578            sentence_per_line: true,
1579        };
1580
1581        let input = "Configure in (`.rumdl.toml` or `pyproject.toml`). Next sentence.";
1582        let result = reflow_line(input, &options);
1583        assert_eq!(result.len(), 2);
1584        assert_eq!(result[0], "Configure in (`.rumdl.toml` or `pyproject.toml`).");
1585        assert_eq!(result[1], "Next sentence.");
1586    }
1587
1588    #[test]
1589    fn test_sentence_per_line_with_questions_exclamations() {
1590        let options = ReflowOptions {
1591            line_length: 80,
1592            break_on_sentences: true,
1593            preserve_breaks: false,
1594            sentence_per_line: true,
1595        };
1596
1597        let input = "Is this a question? Yes it is! And a statement.";
1598        let result = reflow_line(input, &options);
1599        assert_eq!(result.len(), 3);
1600        assert_eq!(result[0], "Is this a question?");
1601        assert_eq!(result[1], "Yes it is!");
1602        assert_eq!(result[2], "And a statement.");
1603    }
1604
1605    #[test]
1606    fn test_reference_link_edge_cases() {
1607        let options = ReflowOptions {
1608            line_length: 40,
1609            break_on_sentences: true,
1610            preserve_breaks: false,
1611            sentence_per_line: false,
1612        };
1613
1614        // Test cases for edge cases and potential conflicts
1615        let test_cases = vec![
1616            // Escaped brackets should be treated as regular text
1617            ("Text with \\[escaped\\] brackets", vec!["\\[escaped\\]"]),
1618            // Nested brackets in reference links
1619            (
1620                "Link [text with [nested] content][ref]",
1621                vec!["[text with [nested] content][ref]"],
1622            ),
1623            // Reference link followed by inline link
1624            (
1625                "First [ref][link] then [inline](url)",
1626                vec!["[ref][link]", "[inline](url)"],
1627            ),
1628            // Shortcut reference that might conflict with other patterns
1629            ("Array [0] and reference [link] here", vec!["[0]", "[link]"]),
1630            // Empty reference with complex text
1631            (
1632                "Complex [text with *emphasis*][] reference",
1633                vec!["[text with *emphasis*][]"],
1634            ),
1635        ];
1636
1637        for (input, expected_patterns) in test_cases {
1638            println!("\nTesting edge case: {input}");
1639            let result = reflow_line(input, &options);
1640            let joined = result.join(" ");
1641            println!("Result: {joined}");
1642
1643            // Verify all expected patterns are preserved
1644            for expected_pattern in expected_patterns {
1645                assert!(
1646                    joined.contains(expected_pattern),
1647                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1648                );
1649            }
1650        }
1651    }
1652
1653    #[test]
1654    fn test_reflow_with_emphasis() {
1655        let options = ReflowOptions {
1656            line_length: 25,
1657            ..Default::default()
1658        };
1659
1660        let result = reflow_line("This is *emphasized* and **strong** text that needs wrapping", &options);
1661
1662        // Verify emphasis markers are preserved
1663        let joined = result.join(" ");
1664        assert!(joined.contains("*emphasized*"));
1665        assert!(joined.contains("**strong**"));
1666    }
1667
1668    #[test]
1669    fn test_image_patterns_preserved() {
1670        let options = ReflowOptions {
1671            line_length: 30,
1672            ..Default::default()
1673        };
1674
1675        // Test cases for image patterns
1676        let test_cases = vec![
1677            // Inline image
1678            (
1679                "Check out ![alt text](image.png) for details",
1680                vec!["![alt text](image.png)"],
1681            ),
1682            // Reference image
1683            ("See ![image][ref] for info", vec!["![image][ref]"]),
1684            // Empty reference image
1685            ("Visit ![homepage][] today", vec!["![homepage][]"]),
1686            // Multiple images
1687            (
1688                "Images: ![first](a.png) and ![second][ref2]",
1689                vec!["![first](a.png)", "![second][ref2]"],
1690            ),
1691        ];
1692
1693        for (input, expected_patterns) in test_cases {
1694            println!("\nTesting: {input}");
1695            let result = reflow_line(input, &options);
1696            let joined = result.join(" ");
1697            println!("Result:  {joined}");
1698
1699            for expected_pattern in expected_patterns {
1700                assert!(
1701                    joined.contains(expected_pattern),
1702                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1703                );
1704            }
1705        }
1706    }
1707
1708    #[test]
1709    fn test_extended_markdown_patterns() {
1710        let options = ReflowOptions {
1711            line_length: 40,
1712            ..Default::default()
1713        };
1714
1715        let test_cases = vec![
1716            // Strikethrough
1717            ("Text with ~~strikethrough~~ preserved", vec!["~~strikethrough~~"]),
1718            // Wiki links
1719            (
1720                "Check [[wiki link]] and [[page|display]]",
1721                vec!["[[wiki link]]", "[[page|display]]"],
1722            ),
1723            // Math
1724            (
1725                "Inline $x^2 + y^2$ and display $$\\int f(x) dx$$",
1726                vec!["$x^2 + y^2$", "$$\\int f(x) dx$$"],
1727            ),
1728            // Emoji
1729            ("Use :smile: and :heart: emojis", vec![":smile:", ":heart:"]),
1730            // HTML tags
1731            (
1732                "Text with <span>tag</span> and <br/>",
1733                vec!["<span>", "</span>", "<br/>"],
1734            ),
1735            // HTML entities
1736            ("Non-breaking&nbsp;space and em&mdash;dash", vec!["&nbsp;", "&mdash;"]),
1737        ];
1738
1739        for (input, expected_patterns) in test_cases {
1740            let result = reflow_line(input, &options);
1741            let joined = result.join(" ");
1742
1743            for pattern in expected_patterns {
1744                assert!(
1745                    joined.contains(pattern),
1746                    "Expected '{pattern}' to be preserved in '{input}', but got '{joined}'"
1747                );
1748            }
1749        }
1750    }
1751
1752    #[test]
1753    fn test_complex_mixed_patterns() {
1754        let options = ReflowOptions {
1755            line_length: 50,
1756            ..Default::default()
1757        };
1758
1759        // Test that multiple pattern types work together
1760        let input = "Line with **bold**, `code`, [link](url), ![image](img), ~~strike~~, $math$, :emoji:, and <tag> all together";
1761        let result = reflow_line(input, &options);
1762        let joined = result.join(" ");
1763
1764        // All patterns should be preserved
1765        assert!(joined.contains("**bold**"));
1766        assert!(joined.contains("`code`"));
1767        assert!(joined.contains("[link](url)"));
1768        assert!(joined.contains("![image](img)"));
1769        assert!(joined.contains("~~strike~~"));
1770        assert!(joined.contains("$math$"));
1771        assert!(joined.contains(":emoji:"));
1772        assert!(joined.contains("<tag>"));
1773    }
1774
1775    #[test]
1776    fn test_footnote_patterns_preserved() {
1777        let options = ReflowOptions {
1778            line_length: 40,
1779            ..Default::default()
1780        };
1781
1782        let test_cases = vec![
1783            // Single footnote
1784            ("This has a footnote[^1] reference", vec!["[^1]"]),
1785            // Multiple footnotes
1786            ("Text with [^first] and [^second] notes", vec!["[^first]", "[^second]"]),
1787            // Long footnote name
1788            ("Reference to [^long-footnote-name] here", vec!["[^long-footnote-name]"]),
1789        ];
1790
1791        for (input, expected_patterns) in test_cases {
1792            let result = reflow_line(input, &options);
1793            let joined = result.join(" ");
1794
1795            for expected_pattern in expected_patterns {
1796                assert!(
1797                    joined.contains(expected_pattern),
1798                    "Expected '{expected_pattern}' to be preserved in '{input}', but got '{joined}'"
1799                );
1800            }
1801        }
1802    }
1803
1804    #[test]
1805    fn test_reflow_markdown_numbered_lists() {
1806        // Test for issue #83: numbered lists with proper formatting
1807        let options = ReflowOptions {
1808            line_length: 50,
1809            ..Default::default()
1810        };
1811
1812        let content = r#"1. List `manifest` to find the manifest with the largest ID. Say it's `00000000000000000002.manifest` in this example.
18132. Short item
18143. Another long item that definitely exceeds the fifty character limit and needs wrapping"#;
1815
1816        let result = reflow_markdown(content, &options);
1817
1818        // Define exact expected output
1819        let expected = r#"1. List `manifest` to find the manifest with the
1820   largest ID. Say it's
1821   `00000000000000000002.manifest` in this
1822   example.
18232. Short item
18243. Another long item that definitely exceeds the
1825   fifty character limit and needs wrapping"#;
1826
1827        assert_eq!(
1828            result, expected,
1829            "Numbered lists should be reflowed with proper markers and indentation.\nExpected:\n{expected}\nGot:\n{result}"
1830        );
1831    }
1832
1833    #[test]
1834    fn test_reflow_markdown_bullet_lists() {
1835        let options = ReflowOptions {
1836            line_length: 40,
1837            ..Default::default()
1838        };
1839
1840        let content = r#"- First bullet point with a very long line that needs wrapping
1841* Second bullet using asterisk
1842+ Third bullet using plus sign
1843- Short one"#;
1844
1845        let result = reflow_markdown(content, &options);
1846
1847        // Define exact expected output - each bullet type preserved with proper indentation
1848        let expected = r#"- First bullet point with a very long
1849  line that needs wrapping
1850* Second bullet using asterisk
1851+ Third bullet using plus sign
1852- Short one"#;
1853
1854        assert_eq!(
1855            result, expected,
1856            "Bullet lists should preserve markers and indent continuations with 2 spaces.\nExpected:\n{expected}\nGot:\n{result}"
1857        );
1858    }
1859}