rumdl_lib/utils/
text_reflow.rs

1//! Text reflow utilities for MD013
2//!
3//! This module implements text wrapping/reflow functionality that preserves
4//! Markdown elements like links, emphasis, code spans, etc.
5
6use crate::utils::is_definition_list_item;
7use crate::utils::regex_cache::{
8    DISPLAY_MATH_REGEX, EMOJI_SHORTCODE_REGEX, FOOTNOTE_REF_REGEX, HTML_ENTITY_REGEX, HTML_TAG_PATTERN,
9    INLINE_IMAGE_FANCY_REGEX, INLINE_LINK_FANCY_REGEX, INLINE_MATH_REGEX, REF_IMAGE_REGEX, REF_LINK_REGEX,
10    SHORTCUT_REF_REGEX, STRIKETHROUGH_FANCY_REGEX, WIKI_LINK_REGEX,
11};
12/// Options for reflowing text
13#[derive(Clone)]
14pub struct ReflowOptions {
15    /// Target line length
16    pub line_length: usize,
17    /// Whether to break on sentence boundaries when possible
18    pub break_on_sentences: bool,
19    /// Whether to preserve existing line breaks in paragraphs
20    pub preserve_breaks: bool,
21    /// Whether to enforce one sentence per line
22    pub sentence_per_line: bool,
23}
24
25impl Default for ReflowOptions {
26    fn default() -> Self {
27        Self {
28            line_length: 80,
29            break_on_sentences: true,
30            preserve_breaks: false,
31            sentence_per_line: false,
32        }
33    }
34}
35
36/// Check if text ends with a common abbreviation followed by a period
37///
38/// Abbreviations only count when followed by a period, not ! or ?.
39/// This prevents false positives where words ending in abbreviation-like
40/// letter sequences (e.g., "paradigms" ending in "ms") are incorrectly
41/// detected as abbreviations.
42///
43/// Examples:
44///   - "Dr." -> true (abbreviation)
45///   - "Dr?" -> false (question, not abbreviation)
46///   - "paradigms." -> false (not in abbreviation list)
47///   - "paradigms?" -> false (question mark, not abbreviation)
48///
49/// See: Issue #150
50fn text_ends_with_abbreviation(text: &str) -> bool {
51    // Only check if text ends with a period (abbreviations require periods)
52    if !text.ends_with('.') {
53        return false;
54    }
55
56    // Remove the trailing period
57    let without_period = text.trim_end_matches('.');
58
59    // Get the last word by splitting on whitespace
60    let last_word = without_period.split_whitespace().last().unwrap_or("");
61
62    if last_word.is_empty() {
63        return false;
64    }
65
66    // Common abbreviations (without periods - we already stripped it)
67    // This list matches the abbreviations from sentences-per-line
68    let abbreviations = [
69        "ie", "i.e", "eg", "e.g", "etc", "ex", "vs", "Mr", "Mrs", "Dr", "Ms", "Prof", "Sr", "Jr",
70    ];
71
72    // Case-insensitive exact word match (not substring match)
73    abbreviations.iter().any(|abbr| last_word.eq_ignore_ascii_case(abbr))
74}
75
76/// Detect if a character position is a sentence boundary
77/// Based on the approach from github.com/JoshuaKGoldberg/sentences-per-line
78fn is_sentence_boundary(text: &str, pos: usize) -> bool {
79    let chars: Vec<char> = text.chars().collect();
80
81    if pos + 1 >= chars.len() {
82        return false;
83    }
84
85    // Check for sentence-ending punctuation
86    let c = chars[pos];
87    if c != '.' && c != '!' && c != '?' {
88        return false;
89    }
90
91    // Must be followed by at least one space
92    if chars[pos + 1] != ' ' {
93        return false;
94    }
95
96    // Skip all whitespace after the punctuation to find the start of the next sentence
97    let mut next_char_pos = pos + 2;
98    while next_char_pos < chars.len() && chars[next_char_pos].is_whitespace() {
99        next_char_pos += 1;
100    }
101
102    // Check if we reached the end of the string
103    if next_char_pos >= chars.len() {
104        return false;
105    }
106
107    // Next character after space(s) must be uppercase (new sentence indicator)
108    if !chars[next_char_pos].is_uppercase() {
109        return false;
110    }
111
112    // Look back to check for common abbreviations (only applies to periods)
113    if pos > 0 && c == '.' {
114        // Check if the text up to and including this period ends with an abbreviation
115        // Note: text[..=pos] includes the character at pos (the period)
116        if text_ends_with_abbreviation(&text[..=pos]) {
117            return false;
118        }
119
120        // Check for decimal numbers (e.g., "3.14")
121        // Make sure to check if next_char_pos is within bounds
122        if chars[pos - 1].is_numeric() && next_char_pos < chars.len() && chars[next_char_pos].is_numeric() {
123            return false;
124        }
125    }
126    true
127}
128
129/// Split text into sentences
130pub fn split_into_sentences(text: &str) -> Vec<String> {
131    let mut sentences = Vec::new();
132    let mut current_sentence = String::new();
133    let mut chars = text.chars().peekable();
134    let mut pos = 0;
135
136    while let Some(c) = chars.next() {
137        current_sentence.push(c);
138
139        if is_sentence_boundary(text, pos) {
140            // Include the space after sentence if it exists
141            if chars.peek() == Some(&' ') {
142                chars.next();
143                pos += 1;
144            }
145            sentences.push(current_sentence.trim().to_string());
146            current_sentence.clear();
147        }
148
149        pos += 1;
150    }
151
152    // Add any remaining text as the last sentence
153    if !current_sentence.trim().is_empty() {
154        sentences.push(current_sentence.trim().to_string());
155    }
156    sentences
157}
158
159/// Check if a line is a horizontal rule (---, ___, ***)
160fn is_horizontal_rule(line: &str) -> bool {
161    if line.len() < 3 {
162        return false;
163    }
164
165    // Check if line consists only of -, _, or * characters (at least 3)
166    let chars: Vec<char> = line.chars().collect();
167    if chars.is_empty() {
168        return false;
169    }
170
171    let first_char = chars[0];
172    if first_char != '-' && first_char != '_' && first_char != '*' {
173        return false;
174    }
175
176    // All characters should be the same (allowing spaces between)
177    for c in &chars {
178        if *c != first_char && *c != ' ' {
179            return false;
180        }
181    }
182
183    // Count non-space characters
184    let non_space_count = chars.iter().filter(|c| **c != ' ').count();
185    non_space_count >= 3
186}
187
188/// Check if a line is a numbered list item (e.g., "1. ", "10. ")
189fn is_numbered_list_item(line: &str) -> bool {
190    let mut chars = line.chars();
191
192    // Must start with a digit
193    if !chars.next().is_some_and(|c| c.is_numeric()) {
194        return false;
195    }
196
197    // Can have more digits
198    while let Some(c) = chars.next() {
199        if c == '.' {
200            // After period, must have a space or be end of line
201            return chars.next().is_none_or(|c| c == ' ');
202        }
203        if !c.is_numeric() {
204            return false;
205        }
206    }
207
208    false
209}
210
211/// Check if a line ends with a hard break (either two spaces or backslash)
212///
213/// CommonMark supports two formats for hard line breaks:
214/// 1. Two or more trailing spaces
215/// 2. A backslash at the end of the line
216fn has_hard_break(line: &str) -> bool {
217    let line = line.strip_suffix('\r').unwrap_or(line);
218    line.ends_with("  ") || line.ends_with('\\')
219}
220
221/// Trim trailing whitespace while preserving hard breaks (two trailing spaces or backslash)
222///
223/// Hard breaks in Markdown can be indicated by:
224/// 1. Two trailing spaces before a newline (traditional)
225/// 2. A backslash at the end of the line (mdformat style)
226fn trim_preserving_hard_break(s: &str) -> String {
227    // Strip trailing \r from CRLF line endings first to handle Windows files
228    let s = s.strip_suffix('\r').unwrap_or(s);
229
230    // Check for backslash hard break (mdformat style)
231    if s.ends_with('\\') {
232        // Preserve the backslash exactly as-is
233        return s.to_string();
234    }
235
236    // Check if there are at least 2 trailing spaces (traditional hard break)
237    if s.ends_with("  ") {
238        // Find the position where non-space content ends
239        let content_end = s.trim_end().len();
240        if content_end == 0 {
241            // String is all whitespace
242            return String::new();
243        }
244        // Preserve exactly 2 trailing spaces for hard break
245        format!("{}  ", &s[..content_end])
246    } else {
247        // No hard break, just trim all trailing whitespace
248        s.trim_end().to_string()
249    }
250}
251
252pub fn reflow_line(line: &str, options: &ReflowOptions) -> Vec<String> {
253    // For sentence-per-line mode, always process regardless of length
254    if options.sentence_per_line {
255        let elements = parse_markdown_elements(line);
256        return reflow_elements_sentence_per_line(&elements);
257    }
258
259    // Quick check: if line is already short enough, return as-is
260    if line.chars().count() <= options.line_length {
261        return vec![line.to_string()];
262    }
263
264    // Parse the markdown to identify elements
265    let elements = parse_markdown_elements(line);
266
267    // Reflow the elements into lines
268    reflow_elements(&elements, options)
269}
270
271/// Represents a piece of content in the markdown
272#[derive(Debug, Clone)]
273enum Element {
274    /// Plain text that can be wrapped
275    Text(String),
276    /// A complete markdown inline link [text](url)
277    Link { text: String, url: String },
278    /// A complete markdown reference link [text][ref]
279    ReferenceLink { text: String, reference: String },
280    /// A complete markdown empty reference link [text][]
281    EmptyReferenceLink { text: String },
282    /// A complete markdown shortcut reference link [ref]
283    ShortcutReference { reference: String },
284    /// A complete markdown inline image ![alt](url)
285    InlineImage { alt: String, url: String },
286    /// A complete markdown reference image ![alt][ref]
287    ReferenceImage { alt: String, reference: String },
288    /// A complete markdown empty reference image ![alt][]
289    EmptyReferenceImage { alt: String },
290    /// Footnote reference [^note]
291    FootnoteReference { note: String },
292    /// Strikethrough text ~~text~~
293    Strikethrough(String),
294    /// Wiki-style link [[wiki]] or [[wiki|text]]
295    WikiLink(String),
296    /// Inline math $math$
297    InlineMath(String),
298    /// Display math $$math$$
299    DisplayMath(String),
300    /// Emoji shortcode :emoji:
301    EmojiShortcode(String),
302    /// HTML tag <tag> or </tag> or <tag/>
303    HtmlTag(String),
304    /// HTML entity &nbsp; or &#123;
305    HtmlEntity(String),
306    /// Inline code `code`
307    Code(String),
308    /// Bold text **text**
309    Bold(String),
310    /// Italic text *text*
311    Italic(String),
312}
313
314impl std::fmt::Display for Element {
315    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
316        match self {
317            Element::Text(s) => write!(f, "{s}"),
318            Element::Link { text, url } => write!(f, "[{text}]({url})"),
319            Element::ReferenceLink { text, reference } => write!(f, "[{text}][{reference}]"),
320            Element::EmptyReferenceLink { text } => write!(f, "[{text}][]"),
321            Element::ShortcutReference { reference } => write!(f, "[{reference}]"),
322            Element::InlineImage { alt, url } => write!(f, "![{alt}]({url})"),
323            Element::ReferenceImage { alt, reference } => write!(f, "![{alt}][{reference}]"),
324            Element::EmptyReferenceImage { alt } => write!(f, "![{alt}][]"),
325            Element::FootnoteReference { note } => write!(f, "[^{note}]"),
326            Element::Strikethrough(s) => write!(f, "~~{s}~~"),
327            Element::WikiLink(s) => write!(f, "[[{s}]]"),
328            Element::InlineMath(s) => write!(f, "${s}$"),
329            Element::DisplayMath(s) => write!(f, "$${s}$$"),
330            Element::EmojiShortcode(s) => write!(f, ":{s}:"),
331            Element::HtmlTag(s) => write!(f, "{s}"),
332            Element::HtmlEntity(s) => write!(f, "{s}"),
333            Element::Code(s) => write!(f, "`{s}`"),
334            Element::Bold(s) => write!(f, "**{s}**"),
335            Element::Italic(s) => write!(f, "*{s}*"),
336        }
337    }
338}
339
340impl Element {
341    fn len(&self) -> usize {
342        match self {
343            Element::Text(s) => s.chars().count(),
344            Element::Link { text, url } => text.chars().count() + url.chars().count() + 4, // [text](url)
345            Element::ReferenceLink { text, reference } => text.chars().count() + reference.chars().count() + 4, // [text][ref]
346            Element::EmptyReferenceLink { text } => text.chars().count() + 4, // [text][]
347            Element::ShortcutReference { reference } => reference.chars().count() + 2, // [ref]
348            Element::InlineImage { alt, url } => alt.chars().count() + url.chars().count() + 5, // ![alt](url)
349            Element::ReferenceImage { alt, reference } => alt.chars().count() + reference.chars().count() + 5, // ![alt][ref]
350            Element::EmptyReferenceImage { alt } => alt.chars().count() + 5, // ![alt][]
351            Element::FootnoteReference { note } => note.chars().count() + 3, // [^note]
352            Element::Strikethrough(s) => s.chars().count() + 4,              // ~~text~~
353            Element::WikiLink(s) => s.chars().count() + 4,                   // [[wiki]]
354            Element::InlineMath(s) => s.chars().count() + 2,                 // $math$
355            Element::DisplayMath(s) => s.chars().count() + 4,                // $$math$$
356            Element::EmojiShortcode(s) => s.chars().count() + 2,             // :emoji:
357            Element::HtmlTag(s) => s.chars().count(),                        // <tag> - already includes brackets
358            Element::HtmlEntity(s) => s.chars().count(),                     // &nbsp; - already complete
359            Element::Code(s) => s.chars().count() + 2,                       // `code`
360            Element::Bold(s) => s.chars().count() + 4,                       // **text**
361            Element::Italic(s) => s.chars().count() + 2,                     // *text*
362        }
363    }
364}
365
366/// Parse markdown elements from text preserving the raw syntax
367///
368/// Detection order is critical:
369/// 1. Inline links [text](url) - must be detected first to avoid conflicts
370/// 2. Reference links [text][ref] - detected before shortcut references
371/// 3. Empty reference links [text][] - a special case of reference links
372/// 4. Shortcut reference links [ref] - detected last to avoid false positives
373/// 5. Other elements (code, bold, italic) - processed normally
374fn parse_markdown_elements(text: &str) -> Vec<Element> {
375    let mut elements = Vec::new();
376    let mut remaining = text;
377
378    while !remaining.is_empty() {
379        // Find the earliest occurrence of any markdown pattern
380        let mut earliest_match: Option<(usize, &str, fancy_regex::Match)> = None;
381
382        // Check for images first (they start with ! so should be detected before links)
383        // Inline images - ![alt](url)
384        if let Ok(Some(m)) = INLINE_IMAGE_FANCY_REGEX.find(remaining)
385            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
386        {
387            earliest_match = Some((m.start(), "inline_image", m));
388        }
389
390        // Reference images - ![alt][ref]
391        if let Ok(Some(m)) = REF_IMAGE_REGEX.find(remaining)
392            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
393        {
394            earliest_match = Some((m.start(), "ref_image", m));
395        }
396
397        // Check for footnote references - [^note]
398        if let Ok(Some(m)) = FOOTNOTE_REF_REGEX.find(remaining)
399            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
400        {
401            earliest_match = Some((m.start(), "footnote_ref", m));
402        }
403
404        // Check for inline links - [text](url)
405        if let Ok(Some(m)) = INLINE_LINK_FANCY_REGEX.find(remaining)
406            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
407        {
408            earliest_match = Some((m.start(), "inline_link", m));
409        }
410
411        // Check for reference links - [text][ref]
412        if let Ok(Some(m)) = REF_LINK_REGEX.find(remaining)
413            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
414        {
415            earliest_match = Some((m.start(), "ref_link", m));
416        }
417
418        // Check for shortcut reference links - [ref]
419        // Only check if we haven't found an earlier pattern that would conflict
420        if let Ok(Some(m)) = SHORTCUT_REF_REGEX.find(remaining)
421            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
422        {
423            earliest_match = Some((m.start(), "shortcut_ref", m));
424        }
425
426        // Check for wiki-style links - [[wiki]]
427        if let Ok(Some(m)) = WIKI_LINK_REGEX.find(remaining)
428            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
429        {
430            earliest_match = Some((m.start(), "wiki_link", m));
431        }
432
433        // Check for display math first (before inline) - $$math$$
434        if let Ok(Some(m)) = DISPLAY_MATH_REGEX.find(remaining)
435            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
436        {
437            earliest_match = Some((m.start(), "display_math", m));
438        }
439
440        // Check for inline math - $math$
441        if let Ok(Some(m)) = INLINE_MATH_REGEX.find(remaining)
442            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
443        {
444            earliest_match = Some((m.start(), "inline_math", m));
445        }
446
447        // Check for strikethrough - ~~text~~
448        if let Ok(Some(m)) = STRIKETHROUGH_FANCY_REGEX.find(remaining)
449            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
450        {
451            earliest_match = Some((m.start(), "strikethrough", m));
452        }
453
454        // Check for emoji shortcodes - :emoji:
455        if let Ok(Some(m)) = EMOJI_SHORTCODE_REGEX.find(remaining)
456            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
457        {
458            earliest_match = Some((m.start(), "emoji", m));
459        }
460
461        // Check for HTML entities - &nbsp; etc
462        if let Ok(Some(m)) = HTML_ENTITY_REGEX.find(remaining)
463            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
464        {
465            earliest_match = Some((m.start(), "html_entity", m));
466        }
467
468        // Check for HTML tags - <tag> </tag> <tag/>
469        // But exclude autolinks like <https://...> or <mailto:...>
470        if let Ok(Some(m)) = HTML_TAG_PATTERN.find(remaining)
471            && earliest_match.as_ref().is_none_or(|(start, _, _)| m.start() < *start)
472        {
473            // Check if this is an autolink (starts with protocol or mailto:)
474            let matched_text = &remaining[m.start()..m.end()];
475            let is_autolink = matched_text.starts_with("<http://")
476                || matched_text.starts_with("<https://")
477                || matched_text.starts_with("<mailto:")
478                || matched_text.starts_with("<ftp://")
479                || matched_text.starts_with("<ftps://");
480
481            if !is_autolink {
482                earliest_match = Some((m.start(), "html_tag", m));
483            }
484        }
485
486        // Find earliest non-link special characters
487        let mut next_special = remaining.len();
488        let mut special_type = "";
489
490        if let Some(pos) = remaining.find('`')
491            && pos < next_special
492        {
493            next_special = pos;
494            special_type = "code";
495        }
496        if let Some(pos) = remaining.find("**")
497            && pos < next_special
498        {
499            next_special = pos;
500            special_type = "bold";
501        }
502        if let Some(pos) = remaining.find('*')
503            && pos < next_special
504            && !remaining[pos..].starts_with("**")
505        {
506            next_special = pos;
507            special_type = "italic";
508        }
509
510        // Determine which pattern to process first
511        let should_process_markdown_link = if let Some((pos, _, _)) = earliest_match {
512            pos < next_special
513        } else {
514            false
515        };
516
517        if should_process_markdown_link {
518            let (pos, pattern_type, match_obj) = earliest_match.unwrap();
519
520            // Add any text before the match
521            if pos > 0 {
522                elements.push(Element::Text(remaining[..pos].to_string()));
523            }
524
525            // Process the matched pattern
526            match pattern_type {
527                "inline_image" => {
528                    if let Ok(Some(caps)) = INLINE_IMAGE_FANCY_REGEX.captures(remaining) {
529                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
530                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
531                        elements.push(Element::InlineImage {
532                            alt: alt.to_string(),
533                            url: url.to_string(),
534                        });
535                        remaining = &remaining[match_obj.end()..];
536                    } else {
537                        elements.push(Element::Text("!".to_string()));
538                        remaining = &remaining[1..];
539                    }
540                }
541                "ref_image" => {
542                    if let Ok(Some(caps)) = REF_IMAGE_REGEX.captures(remaining) {
543                        let alt = caps.get(1).map(|m| m.as_str()).unwrap_or("");
544                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
545
546                        if reference.is_empty() {
547                            elements.push(Element::EmptyReferenceImage { alt: alt.to_string() });
548                        } else {
549                            elements.push(Element::ReferenceImage {
550                                alt: alt.to_string(),
551                                reference: reference.to_string(),
552                            });
553                        }
554                        remaining = &remaining[match_obj.end()..];
555                    } else {
556                        elements.push(Element::Text("!".to_string()));
557                        remaining = &remaining[1..];
558                    }
559                }
560                "footnote_ref" => {
561                    if let Ok(Some(caps)) = FOOTNOTE_REF_REGEX.captures(remaining) {
562                        let note = caps.get(1).map(|m| m.as_str()).unwrap_or("");
563                        elements.push(Element::FootnoteReference { note: note.to_string() });
564                        remaining = &remaining[match_obj.end()..];
565                    } else {
566                        elements.push(Element::Text("[".to_string()));
567                        remaining = &remaining[1..];
568                    }
569                }
570                "inline_link" => {
571                    if let Ok(Some(caps)) = INLINE_LINK_FANCY_REGEX.captures(remaining) {
572                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
573                        let url = caps.get(2).map(|m| m.as_str()).unwrap_or("");
574                        elements.push(Element::Link {
575                            text: text.to_string(),
576                            url: url.to_string(),
577                        });
578                        remaining = &remaining[match_obj.end()..];
579                    } else {
580                        // Fallback - shouldn't happen
581                        elements.push(Element::Text("[".to_string()));
582                        remaining = &remaining[1..];
583                    }
584                }
585                "ref_link" => {
586                    if let Ok(Some(caps)) = REF_LINK_REGEX.captures(remaining) {
587                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
588                        let reference = caps.get(2).map(|m| m.as_str()).unwrap_or("");
589
590                        if reference.is_empty() {
591                            // Empty reference link [text][]
592                            elements.push(Element::EmptyReferenceLink { text: text.to_string() });
593                        } else {
594                            // Regular reference link [text][ref]
595                            elements.push(Element::ReferenceLink {
596                                text: text.to_string(),
597                                reference: reference.to_string(),
598                            });
599                        }
600                        remaining = &remaining[match_obj.end()..];
601                    } else {
602                        // Fallback - shouldn't happen
603                        elements.push(Element::Text("[".to_string()));
604                        remaining = &remaining[1..];
605                    }
606                }
607                "shortcut_ref" => {
608                    if let Ok(Some(caps)) = SHORTCUT_REF_REGEX.captures(remaining) {
609                        let reference = caps.get(1).map(|m| m.as_str()).unwrap_or("");
610                        elements.push(Element::ShortcutReference {
611                            reference: reference.to_string(),
612                        });
613                        remaining = &remaining[match_obj.end()..];
614                    } else {
615                        // Fallback - shouldn't happen
616                        elements.push(Element::Text("[".to_string()));
617                        remaining = &remaining[1..];
618                    }
619                }
620                "wiki_link" => {
621                    if let Ok(Some(caps)) = WIKI_LINK_REGEX.captures(remaining) {
622                        let content = caps.get(1).map(|m| m.as_str()).unwrap_or("");
623                        elements.push(Element::WikiLink(content.to_string()));
624                        remaining = &remaining[match_obj.end()..];
625                    } else {
626                        elements.push(Element::Text("[[".to_string()));
627                        remaining = &remaining[2..];
628                    }
629                }
630                "display_math" => {
631                    if let Ok(Some(caps)) = DISPLAY_MATH_REGEX.captures(remaining) {
632                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
633                        elements.push(Element::DisplayMath(math.to_string()));
634                        remaining = &remaining[match_obj.end()..];
635                    } else {
636                        elements.push(Element::Text("$$".to_string()));
637                        remaining = &remaining[2..];
638                    }
639                }
640                "inline_math" => {
641                    if let Ok(Some(caps)) = INLINE_MATH_REGEX.captures(remaining) {
642                        let math = caps.get(1).map(|m| m.as_str()).unwrap_or("");
643                        elements.push(Element::InlineMath(math.to_string()));
644                        remaining = &remaining[match_obj.end()..];
645                    } else {
646                        elements.push(Element::Text("$".to_string()));
647                        remaining = &remaining[1..];
648                    }
649                }
650                "strikethrough" => {
651                    if let Ok(Some(caps)) = STRIKETHROUGH_FANCY_REGEX.captures(remaining) {
652                        let text = caps.get(1).map(|m| m.as_str()).unwrap_or("");
653                        elements.push(Element::Strikethrough(text.to_string()));
654                        remaining = &remaining[match_obj.end()..];
655                    } else {
656                        elements.push(Element::Text("~~".to_string()));
657                        remaining = &remaining[2..];
658                    }
659                }
660                "emoji" => {
661                    if let Ok(Some(caps)) = EMOJI_SHORTCODE_REGEX.captures(remaining) {
662                        let emoji = caps.get(1).map(|m| m.as_str()).unwrap_or("");
663                        elements.push(Element::EmojiShortcode(emoji.to_string()));
664                        remaining = &remaining[match_obj.end()..];
665                    } else {
666                        elements.push(Element::Text(":".to_string()));
667                        remaining = &remaining[1..];
668                    }
669                }
670                "html_entity" => {
671                    // HTML entities are captured whole
672                    elements.push(Element::HtmlEntity(remaining[..match_obj.end()].to_string()));
673                    remaining = &remaining[match_obj.end()..];
674                }
675                "html_tag" => {
676                    // HTML tags are captured whole
677                    elements.push(Element::HtmlTag(remaining[..match_obj.end()].to_string()));
678                    remaining = &remaining[match_obj.end()..];
679                }
680                _ => {
681                    // Unknown pattern, treat as text
682                    elements.push(Element::Text("[".to_string()));
683                    remaining = &remaining[1..];
684                }
685            }
686        } else {
687            // Process non-link special characters
688
689            // Add any text before the special character
690            if next_special > 0 && next_special < remaining.len() {
691                elements.push(Element::Text(remaining[..next_special].to_string()));
692                remaining = &remaining[next_special..];
693            }
694
695            // Process the special element
696            match special_type {
697                "code" => {
698                    // Find end of code
699                    if let Some(code_end) = remaining[1..].find('`') {
700                        let code = &remaining[1..1 + code_end];
701                        elements.push(Element::Code(code.to_string()));
702                        remaining = &remaining[1 + code_end + 1..];
703                    } else {
704                        // No closing backtick, treat as text
705                        elements.push(Element::Text(remaining.to_string()));
706                        break;
707                    }
708                }
709                "bold" => {
710                    // Check for bold text
711                    if let Some(bold_end) = remaining[2..].find("**") {
712                        let bold_text = &remaining[2..2 + bold_end];
713                        elements.push(Element::Bold(bold_text.to_string()));
714                        remaining = &remaining[2 + bold_end + 2..];
715                    } else {
716                        // No closing **, treat as text
717                        elements.push(Element::Text("**".to_string()));
718                        remaining = &remaining[2..];
719                    }
720                }
721                "italic" => {
722                    // Check for italic text
723                    if let Some(italic_end) = remaining[1..].find('*') {
724                        let italic_text = &remaining[1..1 + italic_end];
725                        elements.push(Element::Italic(italic_text.to_string()));
726                        remaining = &remaining[1 + italic_end + 1..];
727                    } else {
728                        // No closing *, treat as text
729                        elements.push(Element::Text("*".to_string()));
730                        remaining = &remaining[1..];
731                    }
732                }
733                _ => {
734                    // No special elements found, add all remaining text
735                    elements.push(Element::Text(remaining.to_string()));
736                    break;
737                }
738            }
739        }
740    }
741
742    elements
743}
744
745/// Reflow elements for sentence-per-line mode
746fn reflow_elements_sentence_per_line(elements: &[Element]) -> Vec<String> {
747    let mut lines = Vec::new();
748    let mut current_line = String::new();
749
750    for element in elements.iter() {
751        let element_str = format!("{element}");
752
753        // For text elements, split into sentences
754        if let Element::Text(text) = element {
755            // Simply append text - it already has correct spacing from tokenization
756            let combined = format!("{current_line}{text}");
757            let sentences = split_into_sentences(&combined);
758
759            if sentences.len() > 1 {
760                // We found sentence boundaries
761                for (i, sentence) in sentences.iter().enumerate() {
762                    if i == 0 {
763                        // First sentence might continue from previous elements
764                        // But check if it ends with an abbreviation
765                        let trimmed = sentence.trim();
766
767                        if text_ends_with_abbreviation(trimmed) {
768                            // Don't emit yet - this sentence ends with abbreviation, continue accumulating
769                            current_line = sentence.to_string();
770                        } else {
771                            // Normal case - emit the first sentence
772                            lines.push(sentence.to_string());
773                            current_line.clear();
774                        }
775                    } else if i == sentences.len() - 1 {
776                        // Last sentence: check if it's complete or incomplete
777                        let trimmed = sentence.trim();
778                        let ends_with_sentence_punct =
779                            trimmed.ends_with('.') || trimmed.ends_with('!') || trimmed.ends_with('?');
780
781                        if ends_with_sentence_punct && !text_ends_with_abbreviation(trimmed) {
782                            // Complete sentence - emit it immediately
783                            lines.push(sentence.to_string());
784                            current_line.clear();
785                        } else {
786                            // Incomplete sentence - save for next iteration
787                            current_line = sentence.to_string();
788                        }
789                    } else {
790                        // Complete sentences in the middle
791                        lines.push(sentence.to_string());
792                    }
793                }
794            } else {
795                // No sentence boundary found, continue accumulating
796                current_line = combined;
797            }
798        } else {
799            // Non-text elements (Code, Bold, Italic, etc.)
800            // Add space before element if needed (unless it's after an opening paren/bracket)
801            if !current_line.is_empty()
802                && !current_line.ends_with(' ')
803                && !current_line.ends_with('(')
804                && !current_line.ends_with('[')
805            {
806                current_line.push(' ');
807            }
808            current_line.push_str(&element_str);
809        }
810    }
811
812    // Add any remaining content
813    if !current_line.is_empty() {
814        lines.push(current_line.trim().to_string());
815    }
816    lines
817}
818
819/// Reflow elements into lines that fit within the line length
820fn reflow_elements(elements: &[Element], options: &ReflowOptions) -> Vec<String> {
821    let mut lines = Vec::new();
822    let mut current_line = String::new();
823    let mut current_length = 0;
824
825    for element in elements {
826        let element_str = format!("{element}");
827        let element_len = element.len();
828
829        // For text elements that might need breaking
830        if let Element::Text(text) = element {
831            // If this is a text element, always process it word by word
832            let words: Vec<&str> = text.split_whitespace().collect();
833
834            for word in words {
835                let word_len = word.chars().count();
836                if current_length > 0 && current_length + 1 + word_len > options.line_length {
837                    // Start a new line
838                    lines.push(current_line.trim().to_string());
839                    current_line = word.to_string();
840                    current_length = word_len;
841                } else {
842                    // Add word to current line
843                    if current_length > 0 {
844                        current_line.push(' ');
845                        current_length += 1;
846                    }
847                    current_line.push_str(word);
848                    current_length += word_len;
849                }
850            }
851        } else {
852            // For non-text elements (code, links, references), treat as atomic units
853            // These should never be broken across lines
854            if current_length > 0 && current_length + 1 + element_len > options.line_length {
855                // Start a new line
856                lines.push(current_line.trim().to_string());
857                current_line = element_str;
858                current_length = element_len;
859            } else {
860                // Add element to current line
861                if current_length > 0 {
862                    current_line.push(' ');
863                    current_length += 1;
864                }
865                current_line.push_str(&element_str);
866                current_length += element_len;
867            }
868        }
869    }
870
871    // Don't forget the last line
872    if !current_line.is_empty() {
873        lines.push(current_line.trim_end().to_string());
874    }
875
876    lines
877}
878
879/// Reflow markdown content preserving structure
880pub fn reflow_markdown(content: &str, options: &ReflowOptions) -> String {
881    let lines: Vec<&str> = content.lines().collect();
882    let mut result = Vec::new();
883    let mut i = 0;
884
885    while i < lines.len() {
886        let line = lines[i];
887        let trimmed = line.trim();
888
889        // Preserve empty lines
890        if trimmed.is_empty() {
891            result.push(String::new());
892            i += 1;
893            continue;
894        }
895
896        // Preserve headings as-is
897        if trimmed.starts_with('#') {
898            result.push(line.to_string());
899            i += 1;
900            continue;
901        }
902
903        // Preserve fenced code blocks
904        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
905            result.push(line.to_string());
906            i += 1;
907            // Copy lines until closing fence
908            while i < lines.len() {
909                result.push(lines[i].to_string());
910                if lines[i].trim().starts_with("```") || lines[i].trim().starts_with("~~~") {
911                    i += 1;
912                    break;
913                }
914                i += 1;
915            }
916            continue;
917        }
918
919        // Preserve indented code blocks (4+ spaces or 1+ tab)
920        if line.starts_with("    ") || line.starts_with("\t") {
921            // Collect all consecutive indented lines
922            result.push(line.to_string());
923            i += 1;
924            while i < lines.len() {
925                let next_line = lines[i];
926                // Continue if next line is also indented or empty (empty lines in code blocks are ok)
927                if next_line.starts_with("    ") || next_line.starts_with("\t") || next_line.trim().is_empty() {
928                    result.push(next_line.to_string());
929                    i += 1;
930                } else {
931                    break;
932                }
933            }
934            continue;
935        }
936
937        // Preserve block quotes (but reflow their content)
938        if trimmed.starts_with('>') {
939            let quote_prefix = line[0..line.find('>').unwrap() + 1].to_string();
940            let quote_content = &line[quote_prefix.len()..].trim_start();
941
942            let reflowed = reflow_line(quote_content, options);
943            for reflowed_line in reflowed.iter() {
944                result.push(format!("{quote_prefix} {reflowed_line}"));
945            }
946            i += 1;
947            continue;
948        }
949
950        // Preserve horizontal rules first (before checking for lists)
951        if is_horizontal_rule(trimmed) {
952            result.push(line.to_string());
953            i += 1;
954            continue;
955        }
956
957        // Preserve lists (but not horizontal rules)
958        if (trimmed.starts_with('-') && !is_horizontal_rule(trimmed))
959            || (trimmed.starts_with('*') && !is_horizontal_rule(trimmed))
960            || trimmed.starts_with('+')
961            || is_numbered_list_item(trimmed)
962        {
963            // Find the list marker and preserve indentation
964            let indent = line.len() - line.trim_start().len();
965            let indent_str = " ".repeat(indent);
966
967            // For numbered lists, find the period and the space after it
968            // For bullet lists, find the marker and the space after it
969            let mut marker_end = indent;
970            let mut content_start = indent;
971
972            if trimmed.chars().next().is_some_and(|c| c.is_numeric()) {
973                // Numbered list: find the period
974                if let Some(period_pos) = line[indent..].find('.') {
975                    marker_end = indent + period_pos + 1; // Include the period
976                    content_start = marker_end;
977                    // Skip any spaces after the period to find content start
978                    while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
979                        content_start += 1;
980                    }
981                }
982            } else {
983                // Bullet list: marker is single character
984                marker_end = indent + 1; // Just the marker character
985                content_start = marker_end;
986                // Skip any spaces after the marker
987                while content_start < line.len() && line.chars().nth(content_start) == Some(' ') {
988                    content_start += 1;
989                }
990            }
991
992            let marker = &line[indent..marker_end];
993
994            // Collect all content for this list item (including continuation lines)
995            // Preserve hard breaks (2 trailing spaces) while trimming excessive whitespace
996            let mut list_content = vec![trim_preserving_hard_break(&line[content_start..])];
997            i += 1;
998
999            // Collect continuation lines (indented lines that are part of this list item)
1000            while i < lines.len() {
1001                let next_line = lines[i];
1002                let next_trimmed = next_line.trim();
1003
1004                // Stop if we hit an empty line or another list item or special block
1005                if next_trimmed.is_empty()
1006                    || next_trimmed.starts_with('#')
1007                    || next_trimmed.starts_with("```")
1008                    || next_trimmed.starts_with("~~~")
1009                    || next_trimmed.starts_with('>')
1010                    || next_trimmed.starts_with('|')
1011                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1012                    || is_horizontal_rule(next_trimmed)
1013                    || (next_trimmed.starts_with('-')
1014                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1015                    || (next_trimmed.starts_with('*')
1016                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1017                    || (next_trimmed.starts_with('+')
1018                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1019                    || is_numbered_list_item(next_trimmed)
1020                    || is_definition_list_item(next_trimmed)
1021                {
1022                    break;
1023                }
1024
1025                // Check if this line is indented (continuation of list item)
1026                let next_indent = next_line.len() - next_line.trim_start().len();
1027                if next_indent >= content_start {
1028                    // This is a continuation line - add its content
1029                    // Preserve hard breaks while trimming excessive whitespace
1030                    let trimmed_start = next_line.trim_start();
1031                    list_content.push(trim_preserving_hard_break(trimmed_start));
1032                    i += 1;
1033                } else {
1034                    // Not indented enough, not part of this list item
1035                    break;
1036                }
1037            }
1038
1039            // Join content, but respect hard breaks (lines ending with 2 spaces or backslash)
1040            // Hard breaks should prevent joining with the next line
1041            let combined_content = if options.preserve_breaks {
1042                list_content[0].clone()
1043            } else {
1044                // Check if any lines have hard breaks - if so, preserve the structure
1045                let has_hard_breaks = list_content.iter().any(|line| has_hard_break(line));
1046                if has_hard_breaks {
1047                    // Don't join lines with hard breaks - keep them separate with newlines
1048                    list_content.join("\n")
1049                } else {
1050                    // No hard breaks, safe to join with spaces
1051                    list_content.join(" ")
1052                }
1053            };
1054
1055            // Calculate the proper indentation for continuation lines
1056            let trimmed_marker = marker;
1057            let continuation_spaces = content_start;
1058
1059            // Adjust line length to account for list marker and space
1060            let prefix_length = indent + trimmed_marker.len() + 1;
1061
1062            // Create adjusted options with reduced line length
1063            let adjusted_options = ReflowOptions {
1064                line_length: options.line_length.saturating_sub(prefix_length),
1065                ..options.clone()
1066            };
1067
1068            let reflowed = reflow_line(&combined_content, &adjusted_options);
1069            for (j, reflowed_line) in reflowed.iter().enumerate() {
1070                if j == 0 {
1071                    result.push(format!("{indent_str}{trimmed_marker} {reflowed_line}"));
1072                } else {
1073                    // Continuation lines aligned with text after marker
1074                    let continuation_indent = " ".repeat(continuation_spaces);
1075                    result.push(format!("{continuation_indent}{reflowed_line}"));
1076                }
1077            }
1078            continue;
1079        }
1080
1081        // Preserve tables
1082        if trimmed.contains('|') {
1083            result.push(line.to_string());
1084            i += 1;
1085            continue;
1086        }
1087
1088        // Preserve reference definitions
1089        if trimmed.starts_with('[') && line.contains("]:") {
1090            result.push(line.to_string());
1091            i += 1;
1092            continue;
1093        }
1094
1095        // Preserve definition list items (extended markdown)
1096        if is_definition_list_item(trimmed) {
1097            result.push(line.to_string());
1098            i += 1;
1099            continue;
1100        }
1101
1102        // Check if this is a single line that doesn't need processing
1103        let mut is_single_line_paragraph = true;
1104        if i + 1 < lines.len() {
1105            let next_line = lines[i + 1];
1106            let next_trimmed = next_line.trim();
1107            // Check if next line starts a new block
1108            if !next_trimmed.is_empty()
1109                && !next_trimmed.starts_with('#')
1110                && !next_trimmed.starts_with("```")
1111                && !next_trimmed.starts_with("~~~")
1112                && !next_trimmed.starts_with('>')
1113                && !next_trimmed.starts_with('|')
1114                && !(next_trimmed.starts_with('[') && next_line.contains("]:"))
1115                && !is_horizontal_rule(next_trimmed)
1116                && !(next_trimmed.starts_with('-')
1117                    && !is_horizontal_rule(next_trimmed)
1118                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1119                && !(next_trimmed.starts_with('*')
1120                    && !is_horizontal_rule(next_trimmed)
1121                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1122                && !(next_trimmed.starts_with('+')
1123                    && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1124                && !is_numbered_list_item(next_trimmed)
1125            {
1126                is_single_line_paragraph = false;
1127            }
1128        }
1129
1130        // If it's a single line that fits, just add it as-is
1131        if is_single_line_paragraph && line.chars().count() <= options.line_length {
1132            result.push(line.to_string());
1133            i += 1;
1134            continue;
1135        }
1136
1137        // For regular paragraphs, collect consecutive lines
1138        let mut paragraph_parts = Vec::new();
1139        let mut current_part = vec![line];
1140        i += 1;
1141
1142        // If preserve_breaks is true, treat each line separately
1143        if options.preserve_breaks {
1144            // Don't collect consecutive lines - just reflow this single line
1145            let hard_break_type = if line.strip_suffix('\r').unwrap_or(line).ends_with('\\') {
1146                Some("\\")
1147            } else if line.ends_with("  ") {
1148                Some("  ")
1149            } else {
1150                None
1151            };
1152            let reflowed = reflow_line(line, options);
1153
1154            // Preserve hard breaks (two trailing spaces or backslash)
1155            if let Some(break_marker) = hard_break_type {
1156                if !reflowed.is_empty() {
1157                    let mut reflowed_with_break = reflowed;
1158                    let last_idx = reflowed_with_break.len() - 1;
1159                    if !has_hard_break(&reflowed_with_break[last_idx]) {
1160                        reflowed_with_break[last_idx].push_str(break_marker);
1161                    }
1162                    result.extend(reflowed_with_break);
1163                }
1164            } else {
1165                result.extend(reflowed);
1166            }
1167        } else {
1168            // Original behavior: collect consecutive lines into a paragraph
1169            while i < lines.len() {
1170                let prev_line = if !current_part.is_empty() {
1171                    current_part.last().unwrap()
1172                } else {
1173                    ""
1174                };
1175                let next_line = lines[i];
1176                let next_trimmed = next_line.trim();
1177
1178                // Stop at empty lines or special blocks
1179                if next_trimmed.is_empty()
1180                    || next_trimmed.starts_with('#')
1181                    || next_trimmed.starts_with("```")
1182                    || next_trimmed.starts_with("~~~")
1183                    || next_trimmed.starts_with('>')
1184                    || next_trimmed.starts_with('|')
1185                    || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1186                    || is_horizontal_rule(next_trimmed)
1187                    || (next_trimmed.starts_with('-')
1188                        && !is_horizontal_rule(next_trimmed)
1189                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1190                    || (next_trimmed.starts_with('*')
1191                        && !is_horizontal_rule(next_trimmed)
1192                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1193                    || (next_trimmed.starts_with('+')
1194                        && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1195                    || is_numbered_list_item(next_trimmed)
1196                    || is_definition_list_item(next_trimmed)
1197                {
1198                    break;
1199                }
1200
1201                // Check if previous line ends with hard break (two spaces or backslash)
1202                if has_hard_break(prev_line) {
1203                    // Start a new part after hard break
1204                    paragraph_parts.push(current_part.join(" "));
1205                    current_part = vec![next_line];
1206                } else {
1207                    current_part.push(next_line);
1208                }
1209                i += 1;
1210            }
1211
1212            // Add the last part
1213            if !current_part.is_empty() {
1214                if current_part.len() == 1 {
1215                    // Single line, don't add trailing space
1216                    paragraph_parts.push(current_part[0].to_string());
1217                } else {
1218                    paragraph_parts.push(current_part.join(" "));
1219                }
1220            }
1221
1222            // Reflow each part separately, preserving hard breaks
1223            for (j, part) in paragraph_parts.iter().enumerate() {
1224                let reflowed = reflow_line(part, options);
1225                result.extend(reflowed);
1226
1227                // Preserve hard break by ensuring last line of part ends with hard break marker
1228                // Use two spaces as the default hard break format for reflows
1229                if j < paragraph_parts.len() - 1 && !result.is_empty() {
1230                    let last_idx = result.len() - 1;
1231                    if !has_hard_break(&result[last_idx]) {
1232                        result[last_idx].push_str("  ");
1233                    }
1234                }
1235            }
1236        }
1237    }
1238
1239    // Preserve trailing newline if the original content had one
1240    let result_text = result.join("\n");
1241    if content.ends_with('\n') && !result_text.ends_with('\n') {
1242        format!("{result_text}\n")
1243    } else {
1244        result_text
1245    }
1246}
1247
1248/// Information about a reflowed paragraph
1249#[derive(Debug, Clone)]
1250pub struct ParagraphReflow {
1251    /// Starting byte offset of the paragraph in the original content
1252    pub start_byte: usize,
1253    /// Ending byte offset of the paragraph in the original content
1254    pub end_byte: usize,
1255    /// The reflowed text for this paragraph
1256    pub reflowed_text: String,
1257}
1258
1259/// Reflow a single paragraph at the specified line number
1260///
1261/// This function finds the paragraph containing the given line number,
1262/// reflows it according to the specified line length, and returns
1263/// information about the paragraph location and its reflowed text.
1264///
1265/// # Arguments
1266///
1267/// * `content` - The full document content
1268/// * `line_number` - The 1-based line number within the paragraph to reflow
1269/// * `line_length` - The target line length for reflowing
1270///
1271/// # Returns
1272///
1273/// Returns `Some(ParagraphReflow)` if a paragraph was found and reflowed,
1274/// or `None` if the line number is out of bounds or the content at that
1275/// line shouldn't be reflowed (e.g., code blocks, headings, etc.)
1276pub fn reflow_paragraph_at_line(content: &str, line_number: usize, line_length: usize) -> Option<ParagraphReflow> {
1277    if line_number == 0 {
1278        return None;
1279    }
1280
1281    let lines: Vec<&str> = content.lines().collect();
1282
1283    // Check if line number is valid (1-based)
1284    if line_number > lines.len() {
1285        return None;
1286    }
1287
1288    let target_idx = line_number - 1; // Convert to 0-based
1289    let target_line = lines[target_idx];
1290    let trimmed = target_line.trim();
1291
1292    // Don't reflow special blocks
1293    if trimmed.is_empty()
1294        || trimmed.starts_with('#')
1295        || trimmed.starts_with("```")
1296        || trimmed.starts_with("~~~")
1297        || target_line.starts_with("    ")
1298        || target_line.starts_with('\t')
1299        || trimmed.starts_with('>')
1300        || trimmed.contains('|') // Tables
1301        || (trimmed.starts_with('[') && target_line.contains("]:")) // Reference definitions
1302        || is_horizontal_rule(trimmed)
1303        || ((trimmed.starts_with('-') || trimmed.starts_with('*') || trimmed.starts_with('+'))
1304            && !is_horizontal_rule(trimmed)
1305            && (trimmed.len() == 1 || trimmed.chars().nth(1) == Some(' ')))
1306        || is_numbered_list_item(trimmed)
1307        || is_definition_list_item(trimmed)
1308    {
1309        return None;
1310    }
1311
1312    // Find paragraph start - scan backward until blank line or special block
1313    let mut para_start = target_idx;
1314    while para_start > 0 {
1315        let prev_idx = para_start - 1;
1316        let prev_line = lines[prev_idx];
1317        let prev_trimmed = prev_line.trim();
1318
1319        // Stop at blank line or special blocks
1320        if prev_trimmed.is_empty()
1321            || prev_trimmed.starts_with('#')
1322            || prev_trimmed.starts_with("```")
1323            || prev_trimmed.starts_with("~~~")
1324            || prev_line.starts_with("    ")
1325            || prev_line.starts_with('\t')
1326            || prev_trimmed.starts_with('>')
1327            || prev_trimmed.contains('|')
1328            || (prev_trimmed.starts_with('[') && prev_line.contains("]:"))
1329            || is_horizontal_rule(prev_trimmed)
1330            || ((prev_trimmed.starts_with('-') || prev_trimmed.starts_with('*') || prev_trimmed.starts_with('+'))
1331                && !is_horizontal_rule(prev_trimmed)
1332                && (prev_trimmed.len() == 1 || prev_trimmed.chars().nth(1) == Some(' ')))
1333            || is_numbered_list_item(prev_trimmed)
1334            || is_definition_list_item(prev_trimmed)
1335        {
1336            break;
1337        }
1338
1339        para_start = prev_idx;
1340    }
1341
1342    // Find paragraph end - scan forward until blank line or special block
1343    let mut para_end = target_idx;
1344    while para_end + 1 < lines.len() {
1345        let next_idx = para_end + 1;
1346        let next_line = lines[next_idx];
1347        let next_trimmed = next_line.trim();
1348
1349        // Stop at blank line or special blocks
1350        if next_trimmed.is_empty()
1351            || next_trimmed.starts_with('#')
1352            || next_trimmed.starts_with("```")
1353            || next_trimmed.starts_with("~~~")
1354            || next_line.starts_with("    ")
1355            || next_line.starts_with('\t')
1356            || next_trimmed.starts_with('>')
1357            || next_trimmed.contains('|')
1358            || (next_trimmed.starts_with('[') && next_line.contains("]:"))
1359            || is_horizontal_rule(next_trimmed)
1360            || ((next_trimmed.starts_with('-') || next_trimmed.starts_with('*') || next_trimmed.starts_with('+'))
1361                && !is_horizontal_rule(next_trimmed)
1362                && (next_trimmed.len() == 1 || next_trimmed.chars().nth(1) == Some(' ')))
1363            || is_numbered_list_item(next_trimmed)
1364            || is_definition_list_item(next_trimmed)
1365        {
1366            break;
1367        }
1368
1369        para_end = next_idx;
1370    }
1371
1372    // Extract paragraph lines
1373    let paragraph_lines = &lines[para_start..=para_end];
1374
1375    // Calculate byte offsets
1376    let mut start_byte = 0;
1377    for line in lines.iter().take(para_start) {
1378        start_byte += line.len() + 1; // +1 for newline
1379    }
1380
1381    let mut end_byte = start_byte;
1382    for line in paragraph_lines.iter() {
1383        end_byte += line.len() + 1; // +1 for newline
1384    }
1385
1386    // Track whether the byte range includes a trailing newline
1387    // (it doesn't if this is the last line and the file doesn't end with newline)
1388    let includes_trailing_newline = para_end != lines.len() - 1 || content.ends_with('\n');
1389
1390    // Adjust end_byte if the last line doesn't have a newline
1391    if !includes_trailing_newline {
1392        end_byte -= 1;
1393    }
1394
1395    // Join paragraph lines and reflow
1396    let paragraph_text = paragraph_lines.join("\n");
1397
1398    // Create reflow options
1399    let options = ReflowOptions {
1400        line_length,
1401        break_on_sentences: true,
1402        preserve_breaks: false,
1403        sentence_per_line: false,
1404    };
1405
1406    // Reflow the paragraph using reflow_markdown to handle it properly
1407    let reflowed = reflow_markdown(&paragraph_text, &options);
1408
1409    // Ensure reflowed text matches whether the byte range includes a trailing newline
1410    // This is critical: if the range includes a newline, the replacement must too,
1411    // otherwise the next line will get appended to the reflowed paragraph
1412    let reflowed_text = if includes_trailing_newline {
1413        // Range includes newline - ensure reflowed text has one
1414        if reflowed.ends_with('\n') {
1415            reflowed
1416        } else {
1417            format!("{reflowed}\n")
1418        }
1419    } else {
1420        // Range doesn't include newline - ensure reflowed text doesn't have one
1421        if reflowed.ends_with('\n') {
1422            reflowed.trim_end_matches('\n').to_string()
1423        } else {
1424            reflowed
1425        }
1426    };
1427
1428    Some(ParagraphReflow {
1429        start_byte,
1430        end_byte,
1431        reflowed_text,
1432    })
1433}
1434
1435#[cfg(test)]
1436mod tests {
1437    use super::*;
1438
1439    /// Unit test for private helper function text_ends_with_abbreviation()
1440    ///
1441    /// This test stays inline because it tests a private function.
1442    /// All other tests (public API, integration tests) are in tests/utils/text_reflow_test.rs
1443    #[test]
1444    fn test_helper_function_text_ends_with_abbreviation() {
1445        // Test the helper function directly
1446
1447        // True cases (should detect abbreviations)
1448        assert!(text_ends_with_abbreviation("Dr."));
1449        assert!(text_ends_with_abbreviation("word Dr."));
1450        assert!(text_ends_with_abbreviation("e.g."));
1451        assert!(text_ends_with_abbreviation("i.e."));
1452        assert!(text_ends_with_abbreviation("etc."));
1453        assert!(text_ends_with_abbreviation("Mr."));
1454        assert!(text_ends_with_abbreviation("Mrs."));
1455        assert!(text_ends_with_abbreviation("Ms."));
1456        assert!(text_ends_with_abbreviation("Prof."));
1457
1458        // False cases (should NOT detect as abbreviations)
1459        assert!(!text_ends_with_abbreviation("paradigms."));
1460        assert!(!text_ends_with_abbreviation("programs."));
1461        assert!(!text_ends_with_abbreviation("items."));
1462        assert!(!text_ends_with_abbreviation("systems."));
1463        assert!(!text_ends_with_abbreviation("Dr?")); // question mark, not period
1464        assert!(!text_ends_with_abbreviation("Mr!")); // exclamation, not period
1465        assert!(!text_ends_with_abbreviation("paradigms?")); // question mark
1466        assert!(!text_ends_with_abbreviation("word")); // no punctuation
1467        assert!(!text_ends_with_abbreviation("")); // empty string
1468    }
1469}
rumdl_lib/utils/text_reflow.rs

rumdl_lib/utils/
text_reflow.rs